plural.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:generate go run gen.go gen_common.go
  5. // Package plural provides utilities for handling linguistic plurals in text.
  6. //
  7. // The definitions in this package are based on the plural rule handling defined
  8. // in CLDR. See
  9. // https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules for
  10. // details.
  11. package plural
  12. import (
  13. "golang.org/x/text/internal/language/compact"
  14. "golang.org/x/text/internal/number"
  15. "golang.org/x/text/language"
  16. )
  17. // Rules defines the plural rules for all languages for a certain plural type.
  18. //
  19. // This package is UNDER CONSTRUCTION and its API may change.
  20. type Rules struct {
  21. rules []pluralCheck
  22. index []byte
  23. langToIndex []byte
  24. inclusionMasks []uint64
  25. }
  26. var (
  27. // Cardinal defines the plural rules for numbers indicating quantities.
  28. Cardinal *Rules = cardinal
  29. // Ordinal defines the plural rules for numbers indicating position
  30. // (first, second, etc.).
  31. Ordinal *Rules = ordinal
  32. ordinal = &Rules{
  33. ordinalRules,
  34. ordinalIndex,
  35. ordinalLangToIndex,
  36. ordinalInclusionMasks[:],
  37. }
  38. cardinal = &Rules{
  39. cardinalRules,
  40. cardinalIndex,
  41. cardinalLangToIndex,
  42. cardinalInclusionMasks[:],
  43. }
  44. )
  45. // getIntApprox converts the digits in slice digits[start:end] to an integer
  46. // according to the following rules:
  47. // - Let i be asInt(digits[start:end]), where out-of-range digits are assumed
  48. // to be zero.
  49. // - Result n is big if i / 10^nMod > 1.
  50. // - Otherwise the result is i % 10^nMod.
  51. //
  52. // For example, if digits is {1, 2, 3} and start:end is 0:5, then the result
  53. // for various values of nMod is:
  54. // - when nMod == 2, n == big
  55. // - when nMod == 3, n == big
  56. // - when nMod == 4, n == big
  57. // - when nMod == 5, n == 12300
  58. // - when nMod == 6, n == 12300
  59. // - when nMod == 7, n == 12300
  60. func getIntApprox(digits []byte, start, end, nMod, big int) (n int) {
  61. // Leading 0 digits just result in 0.
  62. p := start
  63. if p < 0 {
  64. p = 0
  65. }
  66. // Range only over the part for which we have digits.
  67. mid := end
  68. if mid >= len(digits) {
  69. mid = len(digits)
  70. }
  71. // Check digits more significant that nMod.
  72. if q := end - nMod; q > 0 {
  73. if q > mid {
  74. q = mid
  75. }
  76. for ; p < q; p++ {
  77. if digits[p] != 0 {
  78. return big
  79. }
  80. }
  81. }
  82. for ; p < mid; p++ {
  83. n = 10*n + int(digits[p])
  84. }
  85. // Multiply for trailing zeros.
  86. for ; p < end; p++ {
  87. n *= 10
  88. }
  89. return n
  90. }
  91. // MatchDigits computes the plural form for the given language and the given
  92. // decimal floating point digits. The digits are stored in big-endian order and
  93. // are of value byte(0) - byte(9). The floating point position is indicated by
  94. // exp and the number of visible decimals is scale. All leading and trailing
  95. // zeros may be omitted from digits.
  96. //
  97. // The following table contains examples of possible arguments to represent
  98. // the given numbers.
  99. //
  100. // decimal digits exp scale
  101. // 123 []byte{1, 2, 3} 3 0
  102. // 123.4 []byte{1, 2, 3, 4} 3 1
  103. // 123.40 []byte{1, 2, 3, 4} 3 2
  104. // 100000 []byte{1} 6 0
  105. // 100000.00 []byte{1} 6 3
  106. func (p *Rules) MatchDigits(t language.Tag, digits []byte, exp, scale int) Form {
  107. index := tagToID(t)
  108. // Differentiate up to including mod 1000000 for the integer part.
  109. n := getIntApprox(digits, 0, exp, 6, 1000000)
  110. // Differentiate up to including mod 100 for the fractional part.
  111. f := getIntApprox(digits, exp, exp+scale, 2, 100)
  112. return matchPlural(p, index, n, f, scale)
  113. }
  114. func (p *Rules) matchDisplayDigits(t language.Tag, d *number.Digits) (Form, int) {
  115. n := getIntApprox(d.Digits, 0, int(d.Exp), 6, 1000000)
  116. return p.MatchDigits(t, d.Digits, int(d.Exp), d.NumFracDigits()), n
  117. }
  118. func validForms(p *Rules, t language.Tag) (forms []Form) {
  119. offset := p.langToIndex[tagToID(t)]
  120. rules := p.rules[p.index[offset]:p.index[offset+1]]
  121. forms = append(forms, Other)
  122. last := Other
  123. for _, r := range rules {
  124. if cat := Form(r.cat & formMask); cat != andNext && last != cat {
  125. forms = append(forms, cat)
  126. last = cat
  127. }
  128. }
  129. return forms
  130. }
  131. func (p *Rules) matchComponents(t language.Tag, n, f, scale int) Form {
  132. return matchPlural(p, tagToID(t), n, f, scale)
  133. }
  134. // MatchPlural returns the plural form for the given language and plural
  135. // operands (as defined in
  136. // https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules):
  137. //
  138. // where
  139. // n absolute value of the source number (integer and decimals)
  140. // input
  141. // i integer digits of n.
  142. // v number of visible fraction digits in n, with trailing zeros.
  143. // w number of visible fraction digits in n, without trailing zeros.
  144. // f visible fractional digits in n, with trailing zeros (f = t * 10^(v-w))
  145. // t visible fractional digits in n, without trailing zeros.
  146. //
  147. // If any of the operand values is too large to fit in an int, it is okay to
  148. // pass the value modulo 10,000,000.
  149. func (p *Rules) MatchPlural(lang language.Tag, i, v, w, f, t int) Form {
  150. return matchPlural(p, tagToID(lang), i, f, v)
  151. }
  152. func matchPlural(p *Rules, index compact.ID, n, f, v int) Form {
  153. nMask := p.inclusionMasks[n%maxMod]
  154. // Compute the fMask inline in the rules below, as it is relatively rare.
  155. // fMask := p.inclusionMasks[f%maxMod]
  156. vMask := p.inclusionMasks[v%maxMod]
  157. // Do the matching
  158. offset := p.langToIndex[index]
  159. rules := p.rules[p.index[offset]:p.index[offset+1]]
  160. for i := 0; i < len(rules); i++ {
  161. rule := rules[i]
  162. setBit := uint64(1 << rule.setID)
  163. var skip bool
  164. switch op := opID(rule.cat >> opShift); op {
  165. case opI: // i = x
  166. skip = n >= numN || nMask&setBit == 0
  167. case opI | opNotEqual: // i != x
  168. skip = n < numN && nMask&setBit != 0
  169. case opI | opMod: // i % m = x
  170. skip = nMask&setBit == 0
  171. case opI | opMod | opNotEqual: // i % m != x
  172. skip = nMask&setBit != 0
  173. case opN: // n = x
  174. skip = f != 0 || n >= numN || nMask&setBit == 0
  175. case opN | opNotEqual: // n != x
  176. skip = f == 0 && n < numN && nMask&setBit != 0
  177. case opN | opMod: // n % m = x
  178. skip = f != 0 || nMask&setBit == 0
  179. case opN | opMod | opNotEqual: // n % m != x
  180. skip = f == 0 && nMask&setBit != 0
  181. case opF: // f = x
  182. skip = f >= numN || p.inclusionMasks[f%maxMod]&setBit == 0
  183. case opF | opNotEqual: // f != x
  184. skip = f < numN && p.inclusionMasks[f%maxMod]&setBit != 0
  185. case opF | opMod: // f % m = x
  186. skip = p.inclusionMasks[f%maxMod]&setBit == 0
  187. case opF | opMod | opNotEqual: // f % m != x
  188. skip = p.inclusionMasks[f%maxMod]&setBit != 0
  189. case opV: // v = x
  190. skip = v < numN && vMask&setBit == 0
  191. case opV | opNotEqual: // v != x
  192. skip = v < numN && vMask&setBit != 0
  193. case opW: // w == 0
  194. skip = f != 0
  195. case opW | opNotEqual: // w != 0
  196. skip = f == 0
  197. // Hard-wired rules that cannot be handled by our algorithm.
  198. case opBretonM:
  199. skip = f != 0 || n == 0 || n%1000000 != 0
  200. case opAzerbaijan00s:
  201. // 100,200,300,400,500,600,700,800,900
  202. skip = n == 0 || n >= 1000 || n%100 != 0
  203. case opItalian800:
  204. skip = (f != 0 || n >= numN || nMask&setBit == 0) && n != 800
  205. }
  206. if skip {
  207. // advance over AND entries.
  208. for ; i < len(rules) && rules[i].cat&formMask == andNext; i++ {
  209. }
  210. continue
  211. }
  212. // return if we have a final entry.
  213. if cat := rule.cat & formMask; cat != andNext {
  214. return Form(cat)
  215. }
  216. }
  217. return Other
  218. }
  219. func tagToID(t language.Tag) compact.ID {
  220. id, _ := compact.RegionalID(compact.Tag(t))
  221. return id
  222. }