catmsg.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. // Copyright 2017 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package catmsg contains support types for package x/text/message/catalog.
  5. //
  6. // This package contains the low-level implementations of Message used by the
  7. // catalog package and provides primitives for other packages to implement their
  8. // own. For instance, the plural package provides functionality for selecting
  9. // translation strings based on the plural category of substitution arguments.
  10. //
  11. // # Encoding and Decoding
  12. //
  13. // Catalogs store Messages encoded as a single string. Compiling a message into
  14. // a string both results in compacter representation and speeds up evaluation.
  15. //
  16. // A Message must implement a Compile method to convert its arbitrary
  17. // representation to a string. The Compile method takes an Encoder which
  18. // facilitates serializing the message. Encoders also provide more context of
  19. // the messages's creation (such as for which language the message is intended),
  20. // which may not be known at the time of the creation of the message.
  21. //
  22. // Each message type must also have an accompanying decoder registered to decode
  23. // the message. This decoder takes a Decoder argument which provides the
  24. // counterparts for the decoding.
  25. //
  26. // # Renderers
  27. //
  28. // A Decoder must be initialized with a Renderer implementation. These
  29. // implementations must be provided by packages that use Catalogs, typically
  30. // formatting packages such as x/text/message. A typical user will not need to
  31. // worry about this type; it is only relevant to packages that do string
  32. // formatting and want to use the catalog package to handle localized strings.
  33. //
  34. // A package that uses catalogs for selecting strings receives selection results
  35. // as sequence of substrings passed to the Renderer. The following snippet shows
  36. // how to express the above example using the message package.
  37. //
  38. // message.Set(language.English, "You are %d minute(s) late.",
  39. // catalog.Var("minutes", plural.Select(1, "one", "minute")),
  40. // catalog.String("You are %[1]d ${minutes} late."))
  41. //
  42. // p := message.NewPrinter(language.English)
  43. // p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late.
  44. //
  45. // To evaluate the Printf, package message wraps the arguments in a Renderer
  46. // that is passed to the catalog for message decoding. The call sequence that
  47. // results from evaluating the above message, assuming the person is rather
  48. // tardy, is:
  49. //
  50. // Render("You are %[1]d ")
  51. // Arg(1)
  52. // Render("minutes")
  53. // Render(" late.")
  54. //
  55. // The calls to Arg is caused by the plural.Select execution, which evaluates
  56. // the argument to determine whether the singular or plural message form should
  57. // be selected. The calls to Render reports the partial results to the message
  58. // package for further evaluation.
  59. package catmsg
  60. import (
  61. "errors"
  62. "fmt"
  63. "strconv"
  64. "strings"
  65. "sync"
  66. "golang.org/x/text/language"
  67. )
  68. // A Handle refers to a registered message type.
  69. type Handle int
  70. // A Handler decodes and evaluates data compiled by a Message and sends the
  71. // result to the Decoder. The output may depend on the value of the substitution
  72. // arguments, accessible by the Decoder's Arg method. The Handler returns false
  73. // if there is no translation for the given substitution arguments.
  74. type Handler func(d *Decoder) bool
  75. // Register records the existence of a message type and returns a Handle that
  76. // can be used in the Encoder's EncodeMessageType method to create such
  77. // messages. The prefix of the name should be the package path followed by
  78. // an optional disambiguating string.
  79. // Register will panic if a handle for the same name was already registered.
  80. func Register(name string, handler Handler) Handle {
  81. mutex.Lock()
  82. defer mutex.Unlock()
  83. if _, ok := names[name]; ok {
  84. panic(fmt.Errorf("catmsg: handler for %q already exists", name))
  85. }
  86. h := Handle(len(handlers))
  87. names[name] = h
  88. handlers = append(handlers, handler)
  89. return h
  90. }
  91. // These handlers require fixed positions in the handlers slice.
  92. const (
  93. msgVars Handle = iota
  94. msgFirst
  95. msgRaw
  96. msgString
  97. msgAffix
  98. // Leave some arbitrary room for future expansion: 20 should suffice.
  99. numInternal = 20
  100. )
  101. const prefix = "golang.org/x/text/internal/catmsg."
  102. var (
  103. // TODO: find a more stable way to link handles to message types.
  104. mutex sync.Mutex
  105. names = map[string]Handle{
  106. prefix + "Vars": msgVars,
  107. prefix + "First": msgFirst,
  108. prefix + "Raw": msgRaw,
  109. prefix + "String": msgString,
  110. prefix + "Affix": msgAffix,
  111. }
  112. handlers = make([]Handler, numInternal)
  113. )
  114. func init() {
  115. // This handler is a message type wrapper that initializes a decoder
  116. // with a variable block. This message type, if present, is always at the
  117. // start of an encoded message.
  118. handlers[msgVars] = func(d *Decoder) bool {
  119. blockSize := int(d.DecodeUint())
  120. d.vars = d.data[:blockSize]
  121. d.data = d.data[blockSize:]
  122. return d.executeMessage()
  123. }
  124. // First takes the first message in a sequence that results in a match for
  125. // the given substitution arguments.
  126. handlers[msgFirst] = func(d *Decoder) bool {
  127. for !d.Done() {
  128. if d.ExecuteMessage() {
  129. return true
  130. }
  131. }
  132. return false
  133. }
  134. handlers[msgRaw] = func(d *Decoder) bool {
  135. d.Render(d.data)
  136. return true
  137. }
  138. // A String message alternates between a string constant and a variable
  139. // substitution.
  140. handlers[msgString] = func(d *Decoder) bool {
  141. for !d.Done() {
  142. if str := d.DecodeString(); str != "" {
  143. d.Render(str)
  144. }
  145. if d.Done() {
  146. break
  147. }
  148. d.ExecuteSubstitution()
  149. }
  150. return true
  151. }
  152. handlers[msgAffix] = func(d *Decoder) bool {
  153. // TODO: use an alternative method for common cases.
  154. prefix := d.DecodeString()
  155. suffix := d.DecodeString()
  156. if prefix != "" {
  157. d.Render(prefix)
  158. }
  159. ret := d.ExecuteMessage()
  160. if suffix != "" {
  161. d.Render(suffix)
  162. }
  163. return ret
  164. }
  165. }
  166. var (
  167. // ErrIncomplete indicates a compiled message does not define translations
  168. // for all possible argument values. If this message is returned, evaluating
  169. // a message may result in the ErrNoMatch error.
  170. ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs")
  171. // ErrNoMatch indicates no translation message matched the given input
  172. // parameters when evaluating a message.
  173. ErrNoMatch = errors.New("catmsg: no translation for inputs")
  174. )
  175. // A Message holds a collection of translations for the same phrase that may
  176. // vary based on the values of substitution arguments.
  177. type Message interface {
  178. // Compile encodes the format string(s) of the message as a string for later
  179. // evaluation.
  180. //
  181. // The first call Compile makes on the encoder must be EncodeMessageType.
  182. // The handle passed to this call may either be a handle returned by
  183. // Register to encode a single custom message, or HandleFirst followed by
  184. // a sequence of calls to EncodeMessage.
  185. //
  186. // Compile must return ErrIncomplete if it is possible for evaluation to
  187. // not match any translation for a given set of formatting parameters.
  188. // For example, selecting a translation based on plural form may not yield
  189. // a match if the form "Other" is not one of the selectors.
  190. //
  191. // Compile may return any other application-specific error. For backwards
  192. // compatibility with package like fmt, which often do not do sanity
  193. // checking of format strings ahead of time, Compile should still make an
  194. // effort to have some sensible fallback in case of an error.
  195. Compile(e *Encoder) error
  196. }
  197. // Compile converts a Message to a data string that can be stored in a Catalog.
  198. // The resulting string can subsequently be decoded by passing to the Execute
  199. // method of a Decoder.
  200. func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) {
  201. // TODO: pass macros so they can be used for validation.
  202. v := &Encoder{inBody: true} // encoder for variables
  203. v.root = v
  204. e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages
  205. err = m.Compile(e)
  206. // This package serves te message package, which in turn is meant to be a
  207. // drop-in replacement for fmt. With the fmt package, format strings are
  208. // evaluated lazily and errors are handled by substituting strings in the
  209. // result, rather then returning an error. Dealing with multiple languages
  210. // makes it more important to check errors ahead of time. We chose to be
  211. // consistent and compatible and allow graceful degradation in case of
  212. // errors.
  213. buf := e.buf[stripPrefix(e.buf):]
  214. if len(v.buf) > 0 {
  215. // Prepend variable block.
  216. b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf))
  217. b[0] = byte(msgVars)
  218. b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))]
  219. b = append(b, v.buf...)
  220. b = append(b, buf...)
  221. buf = b
  222. }
  223. if err == nil {
  224. err = v.err
  225. }
  226. return string(buf), err
  227. }
  228. // FirstOf is a message type that prints the first message in the sequence that
  229. // resolves to a match for the given substitution arguments.
  230. type FirstOf []Message
  231. // Compile implements Message.
  232. func (s FirstOf) Compile(e *Encoder) error {
  233. e.EncodeMessageType(msgFirst)
  234. err := ErrIncomplete
  235. for i, m := range s {
  236. if err == nil {
  237. return fmt.Errorf("catalog: message argument %d is complete and blocks subsequent messages", i-1)
  238. }
  239. err = e.EncodeMessage(m)
  240. }
  241. return err
  242. }
  243. // Var defines a message that can be substituted for a placeholder of the same
  244. // name. If an expression does not result in a string after evaluation, Name is
  245. // used as the substitution. For example:
  246. //
  247. // Var{
  248. // Name: "minutes",
  249. // Message: plural.Select(1, "one", "minute"),
  250. // }
  251. //
  252. // will resolve to minute for singular and minutes for plural forms.
  253. type Var struct {
  254. Name string
  255. Message Message
  256. }
  257. var errIsVar = errors.New("catmsg: variable used as message")
  258. // Compile implements Message.
  259. //
  260. // Note that this method merely registers a variable; it does not create an
  261. // encoded message.
  262. func (v *Var) Compile(e *Encoder) error {
  263. if err := e.addVar(v.Name, v.Message); err != nil {
  264. return err
  265. }
  266. // Using a Var by itself is an error. If it is in a sequence followed by
  267. // other messages referring to it, this error will be ignored.
  268. return errIsVar
  269. }
  270. // Raw is a message consisting of a single format string that is passed as is
  271. // to the Renderer.
  272. //
  273. // Note that a Renderer may still do its own variable substitution.
  274. type Raw string
  275. // Compile implements Message.
  276. func (r Raw) Compile(e *Encoder) (err error) {
  277. e.EncodeMessageType(msgRaw)
  278. // Special case: raw strings don't have a size encoding and so don't use
  279. // EncodeString.
  280. e.buf = append(e.buf, r...)
  281. return nil
  282. }
  283. // String is a message consisting of a single format string which contains
  284. // placeholders that may be substituted with variables.
  285. //
  286. // Variable substitutions are marked with placeholders and a variable name of
  287. // the form ${name}. Any other substitutions such as Go templates or
  288. // printf-style substitutions are left to be done by the Renderer.
  289. //
  290. // When evaluation a string interpolation, a Renderer will receive separate
  291. // calls for each placeholder and interstitial string. For example, for the
  292. // message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls
  293. // is:
  294. //
  295. // d.Render("%[1]v ")
  296. // d.Arg(1)
  297. // d.Render(resultOfInvites)
  298. // d.Render(" %[2]v to ")
  299. // d.Arg(2)
  300. // d.Render(resultOfTheir)
  301. // d.Render(" party.")
  302. //
  303. // where the messages for "invites" and "their" both use a plural.Select
  304. // referring to the first argument.
  305. //
  306. // Strings may also invoke macros. Macros are essentially variables that can be
  307. // reused. Macros may, for instance, be used to make selections between
  308. // different conjugations of a verb. See the catalog package description for an
  309. // overview of macros.
  310. type String string
  311. // Compile implements Message. It parses the placeholder formats and returns
  312. // any error.
  313. func (s String) Compile(e *Encoder) (err error) {
  314. msg := string(s)
  315. const subStart = "${"
  316. hasHeader := false
  317. p := 0
  318. b := []byte{}
  319. for {
  320. i := strings.Index(msg[p:], subStart)
  321. if i == -1 {
  322. break
  323. }
  324. b = append(b, msg[p:p+i]...)
  325. p += i + len(subStart)
  326. if i = strings.IndexByte(msg[p:], '}'); i == -1 {
  327. b = append(b, "$!(MISSINGBRACE)"...)
  328. err = fmt.Errorf("catmsg: missing '}'")
  329. p = len(msg)
  330. break
  331. }
  332. name := strings.TrimSpace(msg[p : p+i])
  333. if q := strings.IndexByte(name, '('); q == -1 {
  334. if !hasHeader {
  335. hasHeader = true
  336. e.EncodeMessageType(msgString)
  337. }
  338. e.EncodeString(string(b))
  339. e.EncodeSubstitution(name)
  340. b = b[:0]
  341. } else if j := strings.IndexByte(name[q:], ')'); j == -1 {
  342. // TODO: what should the error be?
  343. b = append(b, "$!(MISSINGPAREN)"...)
  344. err = fmt.Errorf("catmsg: missing ')'")
  345. } else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil {
  346. // TODO: handle more than one argument
  347. b = append(b, "$!(BADNUM)"...)
  348. err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j]))
  349. } else {
  350. if !hasHeader {
  351. hasHeader = true
  352. e.EncodeMessageType(msgString)
  353. }
  354. e.EncodeString(string(b))
  355. e.EncodeSubstitution(name[:q], int(x))
  356. b = b[:0]
  357. }
  358. p += i + 1
  359. }
  360. b = append(b, msg[p:]...)
  361. if !hasHeader {
  362. // Simplify string to a raw string.
  363. Raw(string(b)).Compile(e)
  364. } else if len(b) > 0 {
  365. e.EncodeString(string(b))
  366. }
  367. return err
  368. }
  369. // Affix is a message that adds a prefix and suffix to another message.
  370. // This is mostly used add back whitespace to a translation that was stripped
  371. // before sending it out.
  372. type Affix struct {
  373. Message Message
  374. Prefix string
  375. Suffix string
  376. }
  377. // Compile implements Message.
  378. func (a Affix) Compile(e *Encoder) (err error) {
  379. // TODO: consider adding a special message type that just adds a single
  380. // return. This is probably common enough to handle the majority of cases.
  381. // Get some stats first, though.
  382. e.EncodeMessageType(msgAffix)
  383. e.EncodeString(a.Prefix)
  384. e.EncodeString(a.Suffix)
  385. e.EncodeMessage(a.Message)
  386. return nil
  387. }