123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220 |
- // Copyright 2011 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package jade
- import (
- "fmt"
- "strings"
- "unicode"
- "unicode/utf8"
- )
- // item represents a token or text string returned from the scanner.
- type item struct {
- typ itemType // The type of this item.
- pos Pos // The starting position, in bytes, of this item in the input string.
- val string // The value of this item.
- line int // The line number at the start of this item.
- depth int
- }
- func (i item) String() string {
- switch {
- case i.typ == itemEOF:
- return "EOF"
- case i.typ == itemError:
- return i.val
- // case i.typ > itemKeyword:
- // return fmt.Sprintf("<%s>", i.val)
- case len(i.val) > 10:
- return fmt.Sprintf("%.10q...", i.val)
- }
- return fmt.Sprintf("%q", i.val)
- }
- const (
- eof = -1
- spaceChars = " \t\r\n" // These are the space characters defined by Go itself.
- )
- // stateFn represents the state of the scanner as a function that returns the next state.
- type stateFn func(*lexer) stateFn
- // lexer holds the state of the scanner.
- type lexer struct {
- name string // the name of the input; used only for error reports
- input string // the string being scanned
- pos Pos // current position in the input
- start Pos // start position of this item
- width Pos // width of last rune read from input
- items chan item // channel of scanned items
- line int // 1+number of newlines seen
- depth int // current tag depth
- interpolation int // interpolation depth
- longtext bool // long text flag
- }
- // next returns the next rune in the input.
- func (l *lexer) next() rune {
- if int(l.pos) >= len(l.input) {
- l.width = 0
- return eof
- }
- r, w := utf8.DecodeRuneInString(l.input[l.pos:])
- l.width = Pos(w)
- l.pos += l.width
- if r == '\n' {
- l.line++
- }
- return r
- }
- // peek returns but does not consume the next rune in the input.
- func (l *lexer) peek() rune {
- r := l.next()
- l.backup()
- return r
- }
- // backup steps back one rune. Can only be called once per call of next.
- func (l *lexer) backup() {
- l.pos -= l.width
- // Correct newline count.
- if l.width == 1 && l.input[l.pos] == '\n' {
- l.line--
- }
- }
- // emit passes an item back to the client.
- func (l *lexer) emit(t itemType) {
- l.items <- item{t, l.start, l.input[l.start:l.pos], l.line, l.depth}
- // Some items contain text internally. If so, count their newlines.
- switch t {
- // case itemText, itemRawString, itemLeftDelim, itemRightDelim:
- case itemText:
- l.line += strings.Count(l.input[l.start:l.pos], "\n")
- }
- l.start = l.pos
- }
- // ignore skips over the pending input before this point.
- func (l *lexer) ignore() {
- l.line += strings.Count(l.input[l.start:l.pos], "\n")
- l.start = l.pos
- }
- // accept consumes the next rune if it's from the valid set.
- func (l *lexer) accept(valid string) bool {
- if strings.ContainsRune(valid, l.next()) {
- return true
- }
- l.backup()
- return false
- }
- // acceptRun consumes a run of runes from the valid set.
- func (l *lexer) acceptRun(valid string) {
- for strings.ContainsRune(valid, l.next()) {
- }
- l.backup()
- }
- // errorf returns an error token and terminates the scan by passing
- // back a nil pointer that will be the next state, terminating l.nextItem.
- func (l *lexer) errorf(format string, args ...interface{}) stateFn {
- l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.line, l.depth}
- return nil
- }
- // nextItem returns the next item from the input.
- // Called by the parser, not in the lexing goroutine.
- func (l *lexer) nextItem() item {
- return <-l.items
- }
- // drain drains the output so the lexing goroutine will exit.
- // Called by the parser, not in the lexing goroutine.
- func (l *lexer) drain() {
- for range l.items {
- }
- }
- // lex creates a new scanner for the input string.
- func lex(name, input string) *lexer {
- l := &lexer{
- name: name,
- input: input,
- items: make(chan item),
- line: 1,
- }
- go l.run()
- return l
- }
- func (l *lexer) run() {
- for state := lexIndents; state != nil; {
- state = state(l)
- }
- close(l.items)
- }
- // atTerminator reports whether the input is at valid termination character to
- // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases
- // like "$x+2" not being acceptable without a space, in case we decide one
- // day to implement arithmetic.
- func (l *lexer) atTerminator() bool {
- r := l.peek()
- if isSpace(r) || isEndOfLine(r) {
- return true
- }
- switch r {
- case eof, '.', ',', '|', ':', ')', '(':
- return true
- }
- return false
- }
- func (l *lexer) scanNumber() bool {
- // Optional leading sign.
- l.accept("+-")
- // Is it hex?
- digits := "0123456789"
- if l.accept("0") && l.accept("xX") {
- digits = "0123456789abcdefABCDEF"
- }
- l.acceptRun(digits)
- if l.accept(".") {
- l.acceptRun(digits)
- }
- if l.accept("eE") {
- l.accept("+-")
- l.acceptRun("0123456789")
- }
- // Is it imaginary?
- l.accept("i")
- // Next thing mustn't be alphanumeric.
- if isAlphaNumeric(l.peek()) {
- l.next()
- return false
- }
- return true
- }
- // isSpace reports whether r is a space character.
- func isSpace(r rune) bool {
- return r == ' ' || r == '\t'
- }
- // isEndOfLine reports whether r is an end-of-line character.
- func isEndOfLine(r rune) bool {
- return r == '\r' || r == '\n'
- }
- // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
- func isAlphaNumeric(r rune) bool {
- return r == '_' || r == '-' || unicode.IsLetter(r) || unicode.IsDigit(r)
- }
|