common.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. // Package parse contains a collection of parsers for various formats in its subpackages.
  2. package parse
  3. import (
  4. "bytes"
  5. "encoding/base64"
  6. "errors"
  7. )
  8. var (
  9. dataSchemeBytes = []byte("data:")
  10. base64Bytes = []byte("base64")
  11. textMimeBytes = []byte("text/plain")
  12. )
  13. // ErrBadDataURI is returned by DataURI when the byte slice does not start with 'data:' or is too short.
  14. var ErrBadDataURI = errors.New("not a data URI")
  15. // Number returns the number of bytes that parse as a number of the regex format (+|-)?([0-9]+(\.[0-9]+)?|\.[0-9]+)((e|E)(+|-)?[0-9]+)?.
  16. func Number(b []byte) int {
  17. if len(b) == 0 {
  18. return 0
  19. }
  20. i := 0
  21. if b[i] == '+' || b[i] == '-' {
  22. i++
  23. if i >= len(b) {
  24. return 0
  25. }
  26. }
  27. firstDigit := (b[i] >= '0' && b[i] <= '9')
  28. if firstDigit {
  29. i++
  30. for i < len(b) && b[i] >= '0' && b[i] <= '9' {
  31. i++
  32. }
  33. }
  34. if i < len(b) && b[i] == '.' {
  35. i++
  36. if i < len(b) && b[i] >= '0' && b[i] <= '9' {
  37. i++
  38. for i < len(b) && b[i] >= '0' && b[i] <= '9' {
  39. i++
  40. }
  41. } else if firstDigit {
  42. // . could belong to the next token
  43. i--
  44. return i
  45. } else {
  46. return 0
  47. }
  48. } else if !firstDigit {
  49. return 0
  50. }
  51. iOld := i
  52. if i < len(b) && (b[i] == 'e' || b[i] == 'E') {
  53. i++
  54. if i < len(b) && (b[i] == '+' || b[i] == '-') {
  55. i++
  56. }
  57. if i >= len(b) || b[i] < '0' || b[i] > '9' {
  58. // e could belong to next token
  59. return iOld
  60. }
  61. for i < len(b) && b[i] >= '0' && b[i] <= '9' {
  62. i++
  63. }
  64. }
  65. return i
  66. }
  67. // Dimension parses a byte-slice and returns the length of the number and its unit.
  68. func Dimension(b []byte) (int, int) {
  69. num := Number(b)
  70. if num == 0 || num == len(b) {
  71. return num, 0
  72. } else if b[num] == '%' {
  73. return num, 1
  74. } else if b[num] >= 'a' && b[num] <= 'z' || b[num] >= 'A' && b[num] <= 'Z' {
  75. i := num + 1
  76. for i < len(b) && (b[i] >= 'a' && b[i] <= 'z' || b[i] >= 'A' && b[i] <= 'Z') {
  77. i++
  78. }
  79. return num, i - num
  80. }
  81. return num, 0
  82. }
  83. // Mediatype parses a given mediatype and splits the mimetype from the parameters.
  84. // It works similar to mime.ParseMediaType but is faster.
  85. func Mediatype(b []byte) ([]byte, map[string]string) {
  86. i := 0
  87. for i < len(b) && b[i] == ' ' {
  88. i++
  89. }
  90. b = b[i:]
  91. n := len(b)
  92. mimetype := b
  93. var params map[string]string
  94. for i := 3; i < n; i++ { // mimetype is at least three characters long
  95. if b[i] == ';' || b[i] == ' ' {
  96. mimetype = b[:i]
  97. if b[i] == ' ' {
  98. i++ // space
  99. for i < n && b[i] == ' ' {
  100. i++
  101. }
  102. if n <= i || b[i] != ';' {
  103. break
  104. }
  105. }
  106. params = map[string]string{}
  107. s := string(b)
  108. PARAM:
  109. i++ // semicolon
  110. for i < n && s[i] == ' ' {
  111. i++
  112. }
  113. start := i
  114. for i < n && s[i] != '=' && s[i] != ';' && s[i] != ' ' {
  115. i++
  116. }
  117. key := s[start:i]
  118. for i < n && s[i] == ' ' {
  119. i++
  120. }
  121. if i < n && s[i] == '=' {
  122. i++
  123. for i < n && s[i] == ' ' {
  124. i++
  125. }
  126. start = i
  127. for i < n && s[i] != ';' && s[i] != ' ' {
  128. i++
  129. }
  130. } else {
  131. start = i
  132. }
  133. params[key] = s[start:i]
  134. for i < n && s[i] == ' ' {
  135. i++
  136. }
  137. if i < n && s[i] == ';' {
  138. goto PARAM
  139. }
  140. break
  141. }
  142. }
  143. return mimetype, params
  144. }
  145. // DataURI parses the given data URI and returns the mediatype, data and ok.
  146. func DataURI(dataURI []byte) ([]byte, []byte, error) {
  147. if len(dataURI) > 5 && bytes.Equal(dataURI[:5], dataSchemeBytes) {
  148. dataURI = dataURI[5:]
  149. inBase64 := false
  150. var mediatype []byte
  151. i := 0
  152. for j := 0; j < len(dataURI); j++ {
  153. c := dataURI[j]
  154. if c == '=' || c == ';' || c == ',' {
  155. if c != '=' && bytes.Equal(TrimWhitespace(dataURI[i:j]), base64Bytes) {
  156. if len(mediatype) > 0 {
  157. mediatype = mediatype[:len(mediatype)-1]
  158. }
  159. inBase64 = true
  160. i = j
  161. } else if c != ',' {
  162. mediatype = append(append(mediatype, TrimWhitespace(dataURI[i:j])...), c)
  163. i = j + 1
  164. } else {
  165. mediatype = append(mediatype, TrimWhitespace(dataURI[i:j])...)
  166. }
  167. if c == ',' {
  168. if len(mediatype) == 0 || mediatype[0] == ';' {
  169. mediatype = textMimeBytes
  170. }
  171. data := dataURI[j+1:]
  172. if inBase64 {
  173. decoded := make([]byte, base64.StdEncoding.DecodedLen(len(data)))
  174. n, err := base64.StdEncoding.Decode(decoded, data)
  175. if err != nil {
  176. return nil, nil, err
  177. }
  178. data = decoded[:n]
  179. } else {
  180. data = DecodeURL(data)
  181. }
  182. return mediatype, data, nil
  183. }
  184. }
  185. }
  186. }
  187. return nil, nil, ErrBadDataURI
  188. }
  189. // QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length.
  190. // TODO: deprecated
  191. func QuoteEntity(b []byte) (quote byte, n int) {
  192. if len(b) < 5 || b[0] != '&' {
  193. return 0, 0
  194. }
  195. if b[1] == '#' {
  196. if b[2] == 'x' {
  197. i := 3
  198. for i < len(b) && b[i] == '0' {
  199. i++
  200. }
  201. if i+2 < len(b) && b[i] == '2' && b[i+2] == ';' {
  202. if b[i+1] == '2' {
  203. return '"', i + 3 // &#x22;
  204. } else if b[i+1] == '7' {
  205. return '\'', i + 3 // &#x27;
  206. }
  207. }
  208. } else {
  209. i := 2
  210. for i < len(b) && b[i] == '0' {
  211. i++
  212. }
  213. if i+2 < len(b) && b[i] == '3' && b[i+2] == ';' {
  214. if b[i+1] == '4' {
  215. return '"', i + 3 // &#34;
  216. } else if b[i+1] == '9' {
  217. return '\'', i + 3 // &#39;
  218. }
  219. }
  220. }
  221. } else if len(b) >= 6 && b[5] == ';' {
  222. if bytes.Equal(b[1:5], []byte{'q', 'u', 'o', 't'}) {
  223. return '"', 6 // &quot;
  224. } else if bytes.Equal(b[1:5], []byte{'a', 'p', 'o', 's'}) {
  225. return '\'', 6 // &apos;
  226. }
  227. }
  228. return 0, 0
  229. }