123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846 |
- // Package parser provides a handlebars syntax analyser. It consumes the tokens provided by the lexer to build an AST.
- package parser
- import (
- "fmt"
- "regexp"
- "runtime"
- "strconv"
- "github.com/aymerick/raymond/ast"
- "github.com/aymerick/raymond/lexer"
- )
- // References:
- // - https://github.com/wycats/handlebars.js/blob/master/src/handlebars.yy
- // - https://github.com/golang/go/blob/master/src/text/template/parse/parse.go
- // parser is a syntax analyzer.
- type parser struct {
- // Lexer
- lex *lexer.Lexer
- // Root node
- root ast.Node
- // Tokens parsed but not consumed yet
- tokens []*lexer.Token
- // All tokens have been retreieved from lexer
- lexOver bool
- }
- var (
- rOpenComment = regexp.MustCompile(`^\{\{~?!-?-?`)
- rCloseComment = regexp.MustCompile(`-?-?~?\}\}$`)
- rOpenAmp = regexp.MustCompile(`^\{\{~?&`)
- )
- // new instanciates a new parser
- func new(input string) *parser {
- return &parser{
- lex: lexer.Scan(input),
- }
- }
- // Parse analyzes given input and returns the AST root node.
- func Parse(input string) (result *ast.Program, err error) {
- // recover error
- defer errRecover(&err)
- parser := new(input)
- // parse
- result = parser.parseProgram()
- // check last token
- token := parser.shift()
- if token.Kind != lexer.TokenEOF {
- // Parsing ended before EOF
- errToken(token, "Syntax error")
- }
- // fix whitespaces
- processWhitespaces(result)
- // named returned values
- return
- }
- // errRecover recovers parsing panic
- func errRecover(errp *error) {
- e := recover()
- if e != nil {
- switch err := e.(type) {
- case runtime.Error:
- panic(e)
- case error:
- *errp = err
- default:
- panic(e)
- }
- }
- }
- // errPanic panics
- func errPanic(err error, line int) {
- panic(fmt.Errorf("Parse error on line %d:\n%s", line, err))
- }
- // errNode panics with given node infos
- func errNode(node ast.Node, msg string) {
- errPanic(fmt.Errorf("%s\nNode: %s", msg, node), node.Location().Line)
- }
- // errNode panics with given Token infos
- func errToken(tok *lexer.Token, msg string) {
- errPanic(fmt.Errorf("%s\nToken: %s", msg, tok), tok.Line)
- }
- // errNode panics because of an unexpected Token kind
- func errExpected(expect lexer.TokenKind, tok *lexer.Token) {
- errPanic(fmt.Errorf("Expecting %s, got: '%s'", expect, tok), tok.Line)
- }
- // program : statement*
- func (p *parser) parseProgram() *ast.Program {
- result := ast.NewProgram(p.next().Pos, p.next().Line)
- for p.isStatement() {
- result.AddStatement(p.parseStatement())
- }
- return result
- }
- // statement : mustache | block | rawBlock | partial | content | COMMENT
- func (p *parser) parseStatement() ast.Node {
- var result ast.Node
- tok := p.next()
- switch tok.Kind {
- case lexer.TokenOpen, lexer.TokenOpenUnescaped:
- // mustache
- result = p.parseMustache()
- case lexer.TokenOpenBlock:
- // block
- result = p.parseBlock()
- case lexer.TokenOpenInverse:
- // block
- result = p.parseInverse()
- case lexer.TokenOpenRawBlock:
- // rawBlock
- result = p.parseRawBlock()
- case lexer.TokenOpenPartial:
- // partial
- result = p.parsePartial()
- case lexer.TokenContent:
- // content
- result = p.parseContent()
- case lexer.TokenComment:
- // COMMENT
- result = p.parseComment()
- }
- return result
- }
- // isStatement returns true if next token starts a statement
- func (p *parser) isStatement() bool {
- if !p.have(1) {
- return false
- }
- switch p.next().Kind {
- case lexer.TokenOpen, lexer.TokenOpenUnescaped, lexer.TokenOpenBlock,
- lexer.TokenOpenInverse, lexer.TokenOpenRawBlock, lexer.TokenOpenPartial,
- lexer.TokenContent, lexer.TokenComment:
- return true
- }
- return false
- }
- // content : CONTENT
- func (p *parser) parseContent() *ast.ContentStatement {
- // CONTENT
- tok := p.shift()
- if tok.Kind != lexer.TokenContent {
- // @todo This check can be removed if content is optional in a raw block
- errExpected(lexer.TokenContent, tok)
- }
- return ast.NewContentStatement(tok.Pos, tok.Line, tok.Val)
- }
- // COMMENT
- func (p *parser) parseComment() *ast.CommentStatement {
- // COMMENT
- tok := p.shift()
- value := rOpenComment.ReplaceAllString(tok.Val, "")
- value = rCloseComment.ReplaceAllString(value, "")
- result := ast.NewCommentStatement(tok.Pos, tok.Line, value)
- result.Strip = ast.NewStripForStr(tok.Val)
- return result
- }
- // param* hash?
- func (p *parser) parseExpressionParamsHash() ([]ast.Node, *ast.Hash) {
- var params []ast.Node
- var hash *ast.Hash
- // params*
- if p.isParam() {
- params = p.parseParams()
- }
- // hash?
- if p.isHashSegment() {
- hash = p.parseHash()
- }
- return params, hash
- }
- // helperName param* hash?
- func (p *parser) parseExpression(tok *lexer.Token) *ast.Expression {
- result := ast.NewExpression(tok.Pos, tok.Line)
- // helperName
- result.Path = p.parseHelperName()
- // param* hash?
- result.Params, result.Hash = p.parseExpressionParamsHash()
- return result
- }
- // rawBlock : openRawBlock content endRawBlock
- // openRawBlock : OPEN_RAW_BLOCK helperName param* hash? CLOSE_RAW_BLOCK
- // endRawBlock : OPEN_END_RAW_BLOCK helperName CLOSE_RAW_BLOCK
- func (p *parser) parseRawBlock() *ast.BlockStatement {
- // OPEN_RAW_BLOCK
- tok := p.shift()
- result := ast.NewBlockStatement(tok.Pos, tok.Line)
- // helperName param* hash?
- result.Expression = p.parseExpression(tok)
- openName := result.Expression.Canonical()
- // CLOSE_RAW_BLOCK
- tok = p.shift()
- if tok.Kind != lexer.TokenCloseRawBlock {
- errExpected(lexer.TokenCloseRawBlock, tok)
- }
- // content
- // @todo Is content mandatory in a raw block ?
- content := p.parseContent()
- program := ast.NewProgram(tok.Pos, tok.Line)
- program.AddStatement(content)
- result.Program = program
- // OPEN_END_RAW_BLOCK
- tok = p.shift()
- if tok.Kind != lexer.TokenOpenEndRawBlock {
- // should never happen as it is caught by lexer
- errExpected(lexer.TokenOpenEndRawBlock, tok)
- }
- // helperName
- endID := p.parseHelperName()
- closeName, ok := ast.HelperNameStr(endID)
- if !ok {
- errNode(endID, "Erroneous closing expression")
- }
- if openName != closeName {
- errNode(endID, fmt.Sprintf("%s doesn't match %s", openName, closeName))
- }
- // CLOSE_RAW_BLOCK
- tok = p.shift()
- if tok.Kind != lexer.TokenCloseRawBlock {
- errExpected(lexer.TokenCloseRawBlock, tok)
- }
- return result
- }
- // block : openBlock program inverseChain? closeBlock
- func (p *parser) parseBlock() *ast.BlockStatement {
- // openBlock
- result, blockParams := p.parseOpenBlock()
- // program
- program := p.parseProgram()
- program.BlockParams = blockParams
- result.Program = program
- // inverseChain?
- if p.isInverseChain() {
- result.Inverse = p.parseInverseChain()
- }
- // closeBlock
- p.parseCloseBlock(result)
- setBlockInverseStrip(result)
- return result
- }
- // setBlockInverseStrip is called when parsing `block` (openBlock | openInverse) and `inverseChain`
- //
- // TODO: This was totally cargo culted ! CHECK THAT !
- //
- // cf. prepareBlock() in:
- // https://github.com/wycats/handlebars.js/blob/master/lib/handlebars/compiler/helper.js
- func setBlockInverseStrip(block *ast.BlockStatement) {
- if block.Inverse == nil {
- return
- }
- if block.Inverse.Chained {
- b, _ := block.Inverse.Body[0].(*ast.BlockStatement)
- b.CloseStrip = block.CloseStrip
- }
- block.InverseStrip = block.Inverse.Strip
- }
- // block : openInverse program inverseAndProgram? closeBlock
- func (p *parser) parseInverse() *ast.BlockStatement {
- // openInverse
- result, blockParams := p.parseOpenBlock()
- // program
- program := p.parseProgram()
- program.BlockParams = blockParams
- result.Inverse = program
- // inverseAndProgram?
- if p.isInverse() {
- result.Program = p.parseInverseAndProgram()
- }
- // closeBlock
- p.parseCloseBlock(result)
- setBlockInverseStrip(result)
- return result
- }
- // helperName param* hash? blockParams?
- func (p *parser) parseOpenBlockExpression(tok *lexer.Token) (*ast.BlockStatement, []string) {
- var blockParams []string
- result := ast.NewBlockStatement(tok.Pos, tok.Line)
- // helperName param* hash?
- result.Expression = p.parseExpression(tok)
- // blockParams?
- if p.isBlockParams() {
- blockParams = p.parseBlockParams()
- }
- // named returned values
- return result, blockParams
- }
- // inverseChain : openInverseChain program inverseChain?
- // | inverseAndProgram
- func (p *parser) parseInverseChain() *ast.Program {
- if p.isInverse() {
- // inverseAndProgram
- return p.parseInverseAndProgram()
- }
- result := ast.NewProgram(p.next().Pos, p.next().Line)
- // openInverseChain
- block, blockParams := p.parseOpenBlock()
- // program
- program := p.parseProgram()
- program.BlockParams = blockParams
- block.Program = program
- // inverseChain?
- if p.isInverseChain() {
- block.Inverse = p.parseInverseChain()
- }
- setBlockInverseStrip(block)
- result.Chained = true
- result.AddStatement(block)
- return result
- }
- // Returns true if current token starts an inverse chain
- func (p *parser) isInverseChain() bool {
- return p.isOpenInverseChain() || p.isInverse()
- }
- // inverseAndProgram : INVERSE program
- func (p *parser) parseInverseAndProgram() *ast.Program {
- // INVERSE
- tok := p.shift()
- // program
- result := p.parseProgram()
- result.Strip = ast.NewStripForStr(tok.Val)
- return result
- }
- // openBlock : OPEN_BLOCK helperName param* hash? blockParams? CLOSE
- // openInverse : OPEN_INVERSE helperName param* hash? blockParams? CLOSE
- // openInverseChain: OPEN_INVERSE_CHAIN helperName param* hash? blockParams? CLOSE
- func (p *parser) parseOpenBlock() (*ast.BlockStatement, []string) {
- // OPEN_BLOCK | OPEN_INVERSE | OPEN_INVERSE_CHAIN
- tok := p.shift()
- // helperName param* hash? blockParams?
- result, blockParams := p.parseOpenBlockExpression(tok)
- // CLOSE
- tokClose := p.shift()
- if tokClose.Kind != lexer.TokenClose {
- errExpected(lexer.TokenClose, tokClose)
- }
- result.OpenStrip = ast.NewStrip(tok.Val, tokClose.Val)
- // named returned values
- return result, blockParams
- }
- // closeBlock : OPEN_ENDBLOCK helperName CLOSE
- func (p *parser) parseCloseBlock(block *ast.BlockStatement) {
- // OPEN_ENDBLOCK
- tok := p.shift()
- if tok.Kind != lexer.TokenOpenEndBlock {
- errExpected(lexer.TokenOpenEndBlock, tok)
- }
- // helperName
- endID := p.parseHelperName()
- closeName, ok := ast.HelperNameStr(endID)
- if !ok {
- errNode(endID, "Erroneous closing expression")
- }
- openName := block.Expression.Canonical()
- if openName != closeName {
- errNode(endID, fmt.Sprintf("%s doesn't match %s", openName, closeName))
- }
- // CLOSE
- tokClose := p.shift()
- if tokClose.Kind != lexer.TokenClose {
- errExpected(lexer.TokenClose, tokClose)
- }
- block.CloseStrip = ast.NewStrip(tok.Val, tokClose.Val)
- }
- // mustache : OPEN helperName param* hash? CLOSE
- // | OPEN_UNESCAPED helperName param* hash? CLOSE_UNESCAPED
- func (p *parser) parseMustache() *ast.MustacheStatement {
- // OPEN | OPEN_UNESCAPED
- tok := p.shift()
- closeToken := lexer.TokenClose
- if tok.Kind == lexer.TokenOpenUnescaped {
- closeToken = lexer.TokenCloseUnescaped
- }
- unescaped := false
- if (tok.Kind == lexer.TokenOpenUnescaped) || (rOpenAmp.MatchString(tok.Val)) {
- unescaped = true
- }
- result := ast.NewMustacheStatement(tok.Pos, tok.Line, unescaped)
- // helperName param* hash?
- result.Expression = p.parseExpression(tok)
- // CLOSE | CLOSE_UNESCAPED
- tokClose := p.shift()
- if tokClose.Kind != closeToken {
- errExpected(closeToken, tokClose)
- }
- result.Strip = ast.NewStrip(tok.Val, tokClose.Val)
- return result
- }
- // partial : OPEN_PARTIAL partialName param* hash? CLOSE
- func (p *parser) parsePartial() *ast.PartialStatement {
- // OPEN_PARTIAL
- tok := p.shift()
- result := ast.NewPartialStatement(tok.Pos, tok.Line)
- // partialName
- result.Name = p.parsePartialName()
- // param* hash?
- result.Params, result.Hash = p.parseExpressionParamsHash()
- // CLOSE
- tokClose := p.shift()
- if tokClose.Kind != lexer.TokenClose {
- errExpected(lexer.TokenClose, tokClose)
- }
- result.Strip = ast.NewStrip(tok.Val, tokClose.Val)
- return result
- }
- // helperName | sexpr
- func (p *parser) parseHelperNameOrSexpr() ast.Node {
- if p.isSexpr() {
- // sexpr
- return p.parseSexpr()
- }
- // helperName
- return p.parseHelperName()
- }
- // param : helperName | sexpr
- func (p *parser) parseParam() ast.Node {
- return p.parseHelperNameOrSexpr()
- }
- // Returns true if next tokens represent a `param`
- func (p *parser) isParam() bool {
- return (p.isSexpr() || p.isHelperName()) && !p.isHashSegment()
- }
- // param*
- func (p *parser) parseParams() []ast.Node {
- var result []ast.Node
- for p.isParam() {
- result = append(result, p.parseParam())
- }
- return result
- }
- // sexpr : OPEN_SEXPR helperName param* hash? CLOSE_SEXPR
- func (p *parser) parseSexpr() *ast.SubExpression {
- // OPEN_SEXPR
- tok := p.shift()
- result := ast.NewSubExpression(tok.Pos, tok.Line)
- // helperName param* hash?
- result.Expression = p.parseExpression(tok)
- // CLOSE_SEXPR
- tok = p.shift()
- if tok.Kind != lexer.TokenCloseSexpr {
- errExpected(lexer.TokenCloseSexpr, tok)
- }
- return result
- }
- // hash : hashSegment+
- func (p *parser) parseHash() *ast.Hash {
- var pairs []*ast.HashPair
- for p.isHashSegment() {
- pairs = append(pairs, p.parseHashSegment())
- }
- firstLoc := pairs[0].Location()
- result := ast.NewHash(firstLoc.Pos, firstLoc.Line)
- result.Pairs = pairs
- return result
- }
- // returns true if next tokens represents a `hashSegment`
- func (p *parser) isHashSegment() bool {
- return p.have(2) && (p.next().Kind == lexer.TokenID) && (p.nextAt(1).Kind == lexer.TokenEquals)
- }
- // hashSegment : ID EQUALS param
- func (p *parser) parseHashSegment() *ast.HashPair {
- // ID
- tok := p.shift()
- // EQUALS
- p.shift()
- // param
- param := p.parseParam()
- result := ast.NewHashPair(tok.Pos, tok.Line)
- result.Key = tok.Val
- result.Val = param
- return result
- }
- // blockParams : OPEN_BLOCK_PARAMS ID+ CLOSE_BLOCK_PARAMS
- func (p *parser) parseBlockParams() []string {
- var result []string
- // OPEN_BLOCK_PARAMS
- tok := p.shift()
- // ID+
- for p.isID() {
- result = append(result, p.shift().Val)
- }
- if len(result) == 0 {
- errExpected(lexer.TokenID, p.next())
- }
- // CLOSE_BLOCK_PARAMS
- tok = p.shift()
- if tok.Kind != lexer.TokenCloseBlockParams {
- errExpected(lexer.TokenCloseBlockParams, tok)
- }
- return result
- }
- // helperName : path | dataName | STRING | NUMBER | BOOLEAN | UNDEFINED | NULL
- func (p *parser) parseHelperName() ast.Node {
- var result ast.Node
- tok := p.next()
- switch tok.Kind {
- case lexer.TokenBoolean:
- // BOOLEAN
- p.shift()
- result = ast.NewBooleanLiteral(tok.Pos, tok.Line, (tok.Val == "true"), tok.Val)
- case lexer.TokenNumber:
- // NUMBER
- p.shift()
- val, isInt := parseNumber(tok)
- result = ast.NewNumberLiteral(tok.Pos, tok.Line, val, isInt, tok.Val)
- case lexer.TokenString:
- // STRING
- p.shift()
- result = ast.NewStringLiteral(tok.Pos, tok.Line, tok.Val)
- case lexer.TokenData:
- // dataName
- result = p.parseDataName()
- default:
- // path
- result = p.parsePath(false)
- }
- return result
- }
- // parseNumber parses a number
- func parseNumber(tok *lexer.Token) (result float64, isInt bool) {
- var valInt int
- var err error
- valInt, err = strconv.Atoi(tok.Val)
- if err == nil {
- isInt = true
- result = float64(valInt)
- } else {
- isInt = false
- result, err = strconv.ParseFloat(tok.Val, 64)
- if err != nil {
- errToken(tok, fmt.Sprintf("Failed to parse number: %s", tok.Val))
- }
- }
- // named returned values
- return
- }
- // Returns true if next tokens represent a `helperName`
- func (p *parser) isHelperName() bool {
- switch p.next().Kind {
- case lexer.TokenBoolean, lexer.TokenNumber, lexer.TokenString, lexer.TokenData, lexer.TokenID:
- return true
- }
- return false
- }
- // partialName : helperName | sexpr
- func (p *parser) parsePartialName() ast.Node {
- return p.parseHelperNameOrSexpr()
- }
- // dataName : DATA pathSegments
- func (p *parser) parseDataName() *ast.PathExpression {
- // DATA
- p.shift()
- // pathSegments
- return p.parsePath(true)
- }
- // path : pathSegments
- // pathSegments : pathSegments SEP ID
- // | ID
- func (p *parser) parsePath(data bool) *ast.PathExpression {
- var tok *lexer.Token
- // ID
- tok = p.shift()
- if tok.Kind != lexer.TokenID {
- errExpected(lexer.TokenID, tok)
- }
- result := ast.NewPathExpression(tok.Pos, tok.Line, data)
- result.Part(tok.Val)
- for p.isPathSep() {
- // SEP
- tok = p.shift()
- result.Sep(tok.Val)
- // ID
- tok = p.shift()
- if tok.Kind != lexer.TokenID {
- errExpected(lexer.TokenID, tok)
- }
- result.Part(tok.Val)
- if len(result.Parts) > 0 {
- switch tok.Val {
- case "..", ".", "this":
- errToken(tok, "Invalid path: "+result.Original)
- }
- }
- }
- return result
- }
- // Ensures there is token to parse at given index
- func (p *parser) ensure(index int) {
- if p.lexOver {
- // nothing more to grab
- return
- }
- nb := index + 1
- for len(p.tokens) < nb {
- // fetch next token
- tok := p.lex.NextToken()
- // queue it
- p.tokens = append(p.tokens, &tok)
- if (tok.Kind == lexer.TokenEOF) || (tok.Kind == lexer.TokenError) {
- p.lexOver = true
- break
- }
- }
- }
- // have returns true is there are a list given number of tokens to consume left
- func (p *parser) have(nb int) bool {
- p.ensure(nb - 1)
- return len(p.tokens) >= nb
- }
- // nextAt returns next token at given index, without consuming it
- func (p *parser) nextAt(index int) *lexer.Token {
- p.ensure(index)
- return p.tokens[index]
- }
- // next returns next token without consuming it
- func (p *parser) next() *lexer.Token {
- return p.nextAt(0)
- }
- // shift returns next token and remove it from the tokens buffer
- //
- // Panics if next token is `TokenError`
- func (p *parser) shift() *lexer.Token {
- var result *lexer.Token
- p.ensure(0)
- result, p.tokens = p.tokens[0], p.tokens[1:]
- // check error token
- if result.Kind == lexer.TokenError {
- errToken(result, "Lexer error")
- }
- return result
- }
- // isToken returns true if next token is of given type
- func (p *parser) isToken(kind lexer.TokenKind) bool {
- return p.have(1) && p.next().Kind == kind
- }
- // isSexpr returns true if next token starts a sexpr
- func (p *parser) isSexpr() bool {
- return p.isToken(lexer.TokenOpenSexpr)
- }
- // isPathSep returns true if next token is a path separator
- func (p *parser) isPathSep() bool {
- return p.isToken(lexer.TokenSep)
- }
- // isID returns true if next token is an ID
- func (p *parser) isID() bool {
- return p.isToken(lexer.TokenID)
- }
- // isBlockParams returns true if next token starts a block params
- func (p *parser) isBlockParams() bool {
- return p.isToken(lexer.TokenOpenBlockParams)
- }
- // isInverse returns true if next token starts an INVERSE sequence
- func (p *parser) isInverse() bool {
- return p.isToken(lexer.TokenInverse)
- }
- // isOpenInverseChain returns true if next token is OPEN_INVERSE_CHAIN
- func (p *parser) isOpenInverseChain() bool {
- return p.isToken(lexer.TokenOpenInverseChain)
- }
|