parse.go 13 KB


  1. package css
  2. import (
  3. "bytes"
  4. "fmt"
  5. "strconv"
  6. "github.com/tdewolff/parse/v2"
  7. "github.com/tdewolff/parse/v2/buffer"
  8. )
  9. var wsBytes = []byte(" ")
  10. var endBytes = []byte("}")
  11. var emptyBytes = []byte("")
  12. // GrammarType determines the type of grammar.
  13. type GrammarType uint32
  14. // GrammarType values.
  15. const (
  16. ErrorGrammar GrammarType = iota // extra token when errors occur
  17. CommentGrammar
  18. AtRuleGrammar
  19. BeginAtRuleGrammar
  20. EndAtRuleGrammar
  21. QualifiedRuleGrammar
  22. BeginRulesetGrammar
  23. EndRulesetGrammar
  24. DeclarationGrammar
  25. TokenGrammar
  26. CustomPropertyGrammar
  27. )
  28. // String returns the string representation of a GrammarType.
  29. func (tt GrammarType) String() string {
  30. switch tt {
  31. case ErrorGrammar:
  32. return "Error"
  33. case CommentGrammar:
  34. return "Comment"
  35. case AtRuleGrammar:
  36. return "AtRule"
  37. case BeginAtRuleGrammar:
  38. return "BeginAtRule"
  39. case EndAtRuleGrammar:
  40. return "EndAtRule"
  41. case QualifiedRuleGrammar:
  42. return "QualifiedRule"
  43. case BeginRulesetGrammar:
  44. return "BeginRuleset"
  45. case EndRulesetGrammar:
  46. return "EndRuleset"
  47. case DeclarationGrammar:
  48. return "Declaration"
  49. case TokenGrammar:
  50. return "Token"
  51. case CustomPropertyGrammar:
  52. return "CustomProperty"
  53. }
  54. return "Invalid(" + strconv.Itoa(int(tt)) + ")"
  55. }
  56. ////////////////////////////////////////////////////////////////
  57. // State is the state function the parser currently is in.
  58. type State func(*Parser) GrammarType
  59. // Token is a single TokenType and its associated data.
  60. type Token struct {
  61. TokenType
  62. Data []byte
  63. }
  64. func (t Token) String() string {
  65. return t.TokenType.String() + "('" + string(t.Data) + "')"
  66. }
  67. // Parser is the state for the parser.
  68. type Parser struct {
  69. l *Lexer
  70. state []State
  71. err string
  72. errPos int
  73. buf []Token
  74. level int
  75. data []byte
  76. tt TokenType
  77. keepWS bool
  78. prevWS bool
  79. prevEnd bool
  80. prevComment bool
  81. }
  82. // NewParser returns a new CSS parser from an io.Reader. isInline specifies whether this is an inline style attribute.
  83. func NewParser(r *parse.Input, isInline bool) *Parser {
  84. l := NewLexer(r)
  85. p := &Parser{
  86. l: l,
  87. state: make([]State, 0, 4),
  88. }
  89. if isInline {
  90. p.state = append(p.state, (*Parser).parseDeclarationList)
  91. } else {
  92. p.state = append(p.state, (*Parser).parseStylesheet)
  93. }
  94. return p
  95. }
  96. // HasParseError returns true if there is a parse error (and not a read error).
  97. func (p *Parser) HasParseError() bool {
  98. return p.err != ""
  99. }
  100. // Err returns the error encountered during parsing, this is often io.EOF but also other errors can be returned.
  101. func (p *Parser) Err() error {
  102. if p.err != "" {
  103. r := buffer.NewReader(p.l.r.Bytes())
  104. return parse.NewError(r, p.errPos, p.err)
  105. }
  106. return p.l.Err()
  107. }
  108. // Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message.
  109. func (p *Parser) Next() (GrammarType, TokenType, []byte) {
  110. p.err = ""
  111. if p.prevEnd {
  112. p.tt, p.data = RightBraceToken, endBytes
  113. p.prevEnd = false
  114. } else {
  115. p.tt, p.data = p.popToken(true)
  116. }
  117. gt := p.state[len(p.state)-1](p)
  118. return gt, p.tt, p.data
  119. }
  120. // Offset return offset for current Grammar
  121. func (p *Parser) Offset() int {
  122. return p.l.r.Offset()
  123. }
  124. // Values returns a slice of Tokens for the last Grammar. Only AtRuleGrammar, BeginAtRuleGrammar, BeginRulesetGrammar and Declaration will return the at-rule components, ruleset selector and declaration values respectively.
  125. func (p *Parser) Values() []Token {
  126. return p.buf
  127. }
  128. func (p *Parser) popToken(allowComment bool) (TokenType, []byte) {
  129. p.prevWS = false
  130. p.prevComment = false
  131. tt, data := p.l.Next()
  132. for !p.keepWS && tt == WhitespaceToken || tt == CommentToken {
  133. if tt == WhitespaceToken {
  134. p.prevWS = true
  135. } else {
  136. p.prevComment = true
  137. if allowComment && len(p.state) == 1 {
  138. break
  139. }
  140. }
  141. tt, data = p.l.Next()
  142. }
  143. return tt, data
  144. }
  145. func (p *Parser) initBuf() {
  146. p.buf = p.buf[:0]
  147. }
  148. func (p *Parser) pushBuf(tt TokenType, data []byte) {
  149. p.buf = append(p.buf, Token{tt, data})
  150. }
  151. ////////////////////////////////////////////////////////////////
  152. func (p *Parser) parseStylesheet() GrammarType {
  153. if p.tt == CDOToken || p.tt == CDCToken {
  154. return TokenGrammar
  155. } else if p.tt == AtKeywordToken {
  156. return p.parseAtRule()
  157. } else if p.tt == CommentToken {
  158. return CommentGrammar
  159. } else if p.tt == ErrorToken {
  160. return ErrorGrammar
  161. }
  162. return p.parseQualifiedRule()
  163. }
  164. func (p *Parser) parseDeclarationList() GrammarType {
  165. if p.tt == CommentToken {
  166. p.tt, p.data = p.popToken(false)
  167. }
  168. for p.tt == SemicolonToken {
  169. p.tt, p.data = p.popToken(false)
  170. }
  171. // IE hack: *color:red;
  172. if p.tt == DelimToken && p.data[0] == '*' {
  173. tt, data := p.popToken(false)
  174. p.tt = tt
  175. p.data = append(p.data, data...)
  176. }
  177. if p.tt == ErrorToken {
  178. return ErrorGrammar
  179. } else if p.tt == AtKeywordToken {
  180. return p.parseAtRule()
  181. } else if p.tt == IdentToken || p.tt == DelimToken {
  182. return p.parseDeclaration()
  183. } else if p.tt == CustomPropertyNameToken {
  184. return p.parseCustomProperty()
  185. }
  186. // parse error
  187. p.initBuf()
  188. p.l.r.Move(-len(p.data))
  189. p.err, p.errPos = fmt.Sprintf("unexpected token '%s' in declaration", string(p.data)), p.l.r.Offset()
  190. p.l.r.Move(len(p.data))
  191. if p.tt == RightBraceToken {
  192. // right brace token will occur when we've had a decl error that ended in a right brace token
  193. // as these are not handled by decl error, we handle it here explicitly. Normally its used to end eg. the qual rule.
  194. p.pushBuf(p.tt, p.data)
  195. return ErrorGrammar
  196. }
  197. return p.parseDeclarationError(p.tt, p.data)
  198. }
  199. ////////////////////////////////////////////////////////////////
  200. func (p *Parser) parseAtRule() GrammarType {
  201. p.initBuf()
  202. p.data = parse.ToLower(parse.Copy(p.data))
  203. atRuleName := p.data
  204. if len(atRuleName) > 0 && atRuleName[1] == '-' {
  205. if i := bytes.IndexByte(atRuleName[2:], '-'); i != -1 {
  206. atRuleName = atRuleName[i+2:] // skip vendor specific prefix
  207. }
  208. }
  209. atRule := ToHash(atRuleName[1:])
  210. first := true
  211. skipWS := false
  212. for {
  213. tt, data := p.popToken(false)
  214. if tt == LeftBraceToken && p.level == 0 {
  215. if atRule == Font_Face || atRule == Page {
  216. p.state = append(p.state, (*Parser).parseAtRuleDeclarationList)
  217. } else if atRule == Document || atRule == Keyframes || atRule == Media || atRule == Supports {
  218. p.state = append(p.state, (*Parser).parseAtRuleRuleList)
  219. } else {
  220. p.state = append(p.state, (*Parser).parseAtRuleUnknown)
  221. }
  222. return BeginAtRuleGrammar
  223. } else if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  224. p.prevEnd = (tt == RightBraceToken)
  225. return AtRuleGrammar
  226. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  227. p.level++
  228. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  229. if p.level == 0 {
  230. // TODO: buggy
  231. p.pushBuf(tt, data)
  232. if 1 < len(p.state) {
  233. p.state = p.state[:len(p.state)-1]
  234. }
  235. p.err, p.errPos = "unexpected ending in at rule", p.l.r.Offset()
  236. return ErrorGrammar
  237. }
  238. p.level--
  239. }
  240. if first {
  241. if tt == LeftParenthesisToken || tt == LeftBracketToken {
  242. p.prevWS = false
  243. }
  244. first = false
  245. }
  246. if len(data) == 1 && (data[0] == ',' || data[0] == ':') {
  247. skipWS = true
  248. } else if p.prevWS && !skipWS && tt != RightParenthesisToken {
  249. p.pushBuf(WhitespaceToken, wsBytes)
  250. } else {
  251. skipWS = false
  252. }
  253. if tt == LeftParenthesisToken {
  254. skipWS = true
  255. }
  256. p.pushBuf(tt, data)
  257. }
  258. }
  259. func (p *Parser) parseAtRuleRuleList() GrammarType {
  260. if p.tt == RightBraceToken || p.tt == ErrorToken {
  261. p.state = p.state[:len(p.state)-1]
  262. return EndAtRuleGrammar
  263. } else if p.tt == AtKeywordToken {
  264. return p.parseAtRule()
  265. } else {
  266. return p.parseQualifiedRule()
  267. }
  268. }
  269. func (p *Parser) parseAtRuleDeclarationList() GrammarType {
  270. for p.tt == SemicolonToken {
  271. p.tt, p.data = p.popToken(false)
  272. }
  273. if p.tt == RightBraceToken || p.tt == ErrorToken {
  274. p.state = p.state[:len(p.state)-1]
  275. return EndAtRuleGrammar
  276. }
  277. return p.parseDeclarationList()
  278. }
  279. func (p *Parser) parseAtRuleUnknown() GrammarType {
  280. p.keepWS = true
  281. if p.tt == RightBraceToken && p.level == 0 || p.tt == ErrorToken {
  282. p.state = p.state[:len(p.state)-1]
  283. p.keepWS = false
  284. return EndAtRuleGrammar
  285. }
  286. if p.tt == LeftParenthesisToken || p.tt == LeftBraceToken || p.tt == LeftBracketToken || p.tt == FunctionToken {
  287. p.level++
  288. } else if p.tt == RightParenthesisToken || p.tt == RightBraceToken || p.tt == RightBracketToken {
  289. p.level--
  290. }
  291. return TokenGrammar
  292. }
  293. func (p *Parser) parseQualifiedRule() GrammarType {
  294. p.initBuf()
  295. first := true
  296. inAttrSel := false
  297. skipWS := true
  298. var tt TokenType
  299. var data []byte
  300. for {
  301. if first {
  302. tt, data = p.tt, p.data
  303. p.tt = WhitespaceToken
  304. p.data = emptyBytes
  305. first = false
  306. } else {
  307. tt, data = p.popToken(false)
  308. }
  309. if tt == LeftBraceToken && p.level == 0 {
  310. p.state = append(p.state, (*Parser).parseQualifiedRuleDeclarationList)
  311. return BeginRulesetGrammar
  312. } else if tt == ErrorToken {
  313. p.err, p.errPos = "unexpected ending in qualified rule", p.l.r.Offset()
  314. return ErrorGrammar
  315. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  316. p.level++
  317. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  318. if p.level == 0 {
  319. // TODO: buggy
  320. p.pushBuf(tt, data)
  321. if 1 < len(p.state) {
  322. p.state = p.state[:len(p.state)-1]
  323. }
  324. p.err, p.errPos = "unexpected ending in qualified rule", p.l.r.Offset()
  325. return ErrorGrammar
  326. }
  327. p.level--
  328. }
  329. if len(data) == 1 && (data[0] == ',' || data[0] == '>' || data[0] == '+' || data[0] == '~') {
  330. if data[0] == ',' {
  331. return QualifiedRuleGrammar
  332. }
  333. skipWS = true
  334. } else if p.prevWS && !skipWS && !inAttrSel {
  335. p.pushBuf(WhitespaceToken, wsBytes)
  336. } else {
  337. skipWS = false
  338. }
  339. if tt == LeftBracketToken {
  340. inAttrSel = true
  341. } else if tt == RightBracketToken {
  342. inAttrSel = false
  343. }
  344. p.pushBuf(tt, data)
  345. }
  346. }
  347. func (p *Parser) parseQualifiedRuleDeclarationList() GrammarType {
  348. for p.tt == SemicolonToken {
  349. p.tt, p.data = p.popToken(false)
  350. }
  351. if p.tt == RightBraceToken || p.tt == ErrorToken {
  352. p.state = p.state[:len(p.state)-1]
  353. return EndRulesetGrammar
  354. }
  355. return p.parseDeclarationList()
  356. }
  357. func (p *Parser) parseDeclaration() GrammarType {
  358. p.initBuf()
  359. p.data = parse.ToLower(parse.Copy(p.data))
  360. ttName, dataName := p.tt, p.data
  361. tt, data := p.popToken(false)
  362. if tt != ColonToken {
  363. p.l.r.Move(-len(data))
  364. p.err, p.errPos = "expected colon in declaration", p.l.r.Offset()
  365. p.l.r.Move(len(data))
  366. p.pushBuf(ttName, dataName)
  367. return p.parseDeclarationError(tt, data)
  368. }
  369. skipWS := true
  370. for {
  371. tt, data := p.popToken(false)
  372. if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  373. p.prevEnd = (tt == RightBraceToken)
  374. return DeclarationGrammar
  375. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  376. p.level++
  377. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  378. if p.level == 0 {
  379. // TODO: buggy
  380. p.err, p.errPos = "unexpected ending in declaration", p.l.r.Offset()
  381. p.pushBuf(ttName, dataName)
  382. p.pushBuf(ColonToken, []byte{':'})
  383. return p.parseDeclarationError(tt, data)
  384. }
  385. p.level--
  386. }
  387. if len(data) == 1 && (data[0] == ',' || data[0] == '/' || data[0] == ':' || data[0] == '!' || data[0] == '=') {
  388. skipWS = true
  389. } else if (p.prevWS || p.prevComment) && !skipWS {
  390. p.pushBuf(WhitespaceToken, wsBytes)
  391. } else {
  392. skipWS = false
  393. }
  394. p.pushBuf(tt, data)
  395. }
  396. }
  397. func (p *Parser) parseDeclarationError(tt TokenType, data []byte) GrammarType {
  398. // we're on the offending (tt,data), keep popping tokens till we reach ;, }, or EOF
  399. p.tt, p.data = tt, data
  400. for {
  401. if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  402. p.prevEnd = (tt == RightBraceToken)
  403. if tt == SemicolonToken {
  404. p.pushBuf(tt, data)
  405. }
  406. return ErrorGrammar
  407. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  408. p.level++
  409. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  410. p.level--
  411. }
  412. if p.prevWS {
  413. p.pushBuf(WhitespaceToken, wsBytes)
  414. }
  415. p.pushBuf(tt, data)
  416. tt, data = p.popToken(false)
  417. }
  418. }
  419. func (p *Parser) parseCustomProperty() GrammarType {
  420. p.initBuf()
  421. if tt, data := p.popToken(false); tt != ColonToken {
  422. p.l.r.Move(-len(data))
  423. p.err, p.errPos = "expected colon in custom property", p.l.r.Offset()
  424. p.l.r.Move(len(data))
  425. return ErrorGrammar
  426. }
  427. val := []byte{}
  428. for {
  429. tt, data := p.l.Next()
  430. if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
  431. p.prevEnd = (tt == RightBraceToken)
  432. p.pushBuf(CustomPropertyValueToken, val)
  433. return CustomPropertyGrammar
  434. } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
  435. p.level++
  436. } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
  437. if p.level == 0 {
  438. // TODO: buggy
  439. p.pushBuf(tt, data)
  440. p.err, p.errPos = "unexpected ending in custom property", p.l.r.Offset()
  441. return ErrorGrammar
  442. }
  443. p.level--
  444. }
  445. val = append(val, data...)
  446. }
  447. }