123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237 |
- // Package parse contains a collection of parsers for various formats in its subpackages.
- package parse
- import (
- "bytes"
- "encoding/base64"
- "errors"
- )
- var (
- dataSchemeBytes = []byte("data:")
- base64Bytes = []byte("base64")
- textMimeBytes = []byte("text/plain")
- )
- // ErrBadDataURI is returned by DataURI when the byte slice does not start with 'data:' or is too short.
- var ErrBadDataURI = errors.New("not a data URI")
- // Number returns the number of bytes that parse as a number of the regex format (+|-)?([0-9]+(\.[0-9]+)?|\.[0-9]+)((e|E)(+|-)?[0-9]+)?.
- func Number(b []byte) int {
- if len(b) == 0 {
- return 0
- }
- i := 0
- if b[i] == '+' || b[i] == '-' {
- i++
- if i >= len(b) {
- return 0
- }
- }
- firstDigit := (b[i] >= '0' && b[i] <= '9')
- if firstDigit {
- i++
- for i < len(b) && b[i] >= '0' && b[i] <= '9' {
- i++
- }
- }
- if i < len(b) && b[i] == '.' {
- i++
- if i < len(b) && b[i] >= '0' && b[i] <= '9' {
- i++
- for i < len(b) && b[i] >= '0' && b[i] <= '9' {
- i++
- }
- } else if firstDigit {
- // . could belong to the next token
- i--
- return i
- } else {
- return 0
- }
- } else if !firstDigit {
- return 0
- }
- iOld := i
- if i < len(b) && (b[i] == 'e' || b[i] == 'E') {
- i++
- if i < len(b) && (b[i] == '+' || b[i] == '-') {
- i++
- }
- if i >= len(b) || b[i] < '0' || b[i] > '9' {
- // e could belong to next token
- return iOld
- }
- for i < len(b) && b[i] >= '0' && b[i] <= '9' {
- i++
- }
- }
- return i
- }
- // Dimension parses a byte-slice and returns the length of the number and its unit.
- func Dimension(b []byte) (int, int) {
- num := Number(b)
- if num == 0 || num == len(b) {
- return num, 0
- } else if b[num] == '%' {
- return num, 1
- } else if b[num] >= 'a' && b[num] <= 'z' || b[num] >= 'A' && b[num] <= 'Z' {
- i := num + 1
- for i < len(b) && (b[i] >= 'a' && b[i] <= 'z' || b[i] >= 'A' && b[i] <= 'Z') {
- i++
- }
- return num, i - num
- }
- return num, 0
- }
- // Mediatype parses a given mediatype and splits the mimetype from the parameters.
- // It works similar to mime.ParseMediaType but is faster.
- func Mediatype(b []byte) ([]byte, map[string]string) {
- i := 0
- for i < len(b) && b[i] == ' ' {
- i++
- }
- b = b[i:]
- n := len(b)
- mimetype := b
- var params map[string]string
- for i := 3; i < n; i++ { // mimetype is at least three characters long
- if b[i] == ';' || b[i] == ' ' {
- mimetype = b[:i]
- if b[i] == ' ' {
- i++ // space
- for i < n && b[i] == ' ' {
- i++
- }
- if n <= i || b[i] != ';' {
- break
- }
- }
- params = map[string]string{}
- s := string(b)
- PARAM:
- i++ // semicolon
- for i < n && s[i] == ' ' {
- i++
- }
- start := i
- for i < n && s[i] != '=' && s[i] != ';' && s[i] != ' ' {
- i++
- }
- key := s[start:i]
- for i < n && s[i] == ' ' {
- i++
- }
- if i < n && s[i] == '=' {
- i++
- for i < n && s[i] == ' ' {
- i++
- }
- start = i
- for i < n && s[i] != ';' && s[i] != ' ' {
- i++
- }
- } else {
- start = i
- }
- params[key] = s[start:i]
- for i < n && s[i] == ' ' {
- i++
- }
- if i < n && s[i] == ';' {
- goto PARAM
- }
- break
- }
- }
- return mimetype, params
- }
- // DataURI parses the given data URI and returns the mediatype, data and ok.
- func DataURI(dataURI []byte) ([]byte, []byte, error) {
- if len(dataURI) > 5 && bytes.Equal(dataURI[:5], dataSchemeBytes) {
- dataURI = dataURI[5:]
- inBase64 := false
- var mediatype []byte
- i := 0
- for j := 0; j < len(dataURI); j++ {
- c := dataURI[j]
- if c == '=' || c == ';' || c == ',' {
- if c != '=' && bytes.Equal(TrimWhitespace(dataURI[i:j]), base64Bytes) {
- if len(mediatype) > 0 {
- mediatype = mediatype[:len(mediatype)-1]
- }
- inBase64 = true
- i = j
- } else if c != ',' {
- mediatype = append(append(mediatype, TrimWhitespace(dataURI[i:j])...), c)
- i = j + 1
- } else {
- mediatype = append(mediatype, TrimWhitespace(dataURI[i:j])...)
- }
- if c == ',' {
- if len(mediatype) == 0 || mediatype[0] == ';' {
- mediatype = textMimeBytes
- }
- data := dataURI[j+1:]
- if inBase64 {
- decoded := make([]byte, base64.StdEncoding.DecodedLen(len(data)))
- n, err := base64.StdEncoding.Decode(decoded, data)
- if err != nil {
- return nil, nil, err
- }
- data = decoded[:n]
- } else {
- data = DecodeURL(data)
- }
- return mediatype, data, nil
- }
- }
- }
- }
- return nil, nil, ErrBadDataURI
- }
- // QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length.
- // TODO: deprecated
- func QuoteEntity(b []byte) (quote byte, n int) {
- if len(b) < 5 || b[0] != '&' {
- return 0, 0
- }
- if b[1] == '#' {
- if b[2] == 'x' {
- i := 3
- for i < len(b) && b[i] == '0' {
- i++
- }
- if i+2 < len(b) && b[i] == '2' && b[i+2] == ';' {
- if b[i+1] == '2' {
- return '"', i + 3 // "
- } else if b[i+1] == '7' {
- return '\'', i + 3 // '
- }
- }
- } else {
- i := 2
- for i < len(b) && b[i] == '0' {
- i++
- }
- if i+2 < len(b) && b[i] == '3' && b[i+2] == ';' {
- if b[i+1] == '4' {
- return '"', i + 3 // "
- } else if b[i+1] == '9' {
- return '\'', i + 3 // '
- }
- }
- }
- } else if len(b) >= 6 && b[5] == ';' {
- if bytes.Equal(b[1:5], []byte{'q', 'u', 'o', 't'}) {
- return '"', 6 // "
- } else if bytes.Equal(b[1:5], []byte{'a', 'p', 'o', 's'}) {
- return '\'', 6 // '
- }
- }
- return 0, 0
- }
|