lex.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package jade
  5. import (
  6. "fmt"
  7. "strings"
  8. "unicode"
  9. "unicode/utf8"
  10. )
  11. // item represents a token or text string returned from the scanner.
  12. type item struct {
  13. typ itemType // The type of this item.
  14. pos Pos // The starting position, in bytes, of this item in the input string.
  15. val string // The value of this item.
  16. line int // The line number at the start of this item.
  17. depth int
  18. }
  19. func (i item) String() string {
  20. switch {
  21. case i.typ == itemEOF:
  22. return "EOF"
  23. case i.typ == itemError:
  24. return i.val
  25. // case i.typ > itemKeyword:
  26. // return fmt.Sprintf("<%s>", i.val)
  27. case len(i.val) > 10:
  28. return fmt.Sprintf("%.10q...", i.val)
  29. }
  30. return fmt.Sprintf("%q", i.val)
  31. }
  32. const (
  33. eof = -1
  34. spaceChars = " \t\r\n" // These are the space characters defined by Go itself.
  35. )
  36. // stateFn represents the state of the scanner as a function that returns the next state.
  37. type stateFn func(*lexer) stateFn
  38. // lexer holds the state of the scanner.
  39. type lexer struct {
  40. name string // the name of the input; used only for error reports
  41. input string // the string being scanned
  42. pos Pos // current position in the input
  43. start Pos // start position of this item
  44. width Pos // width of last rune read from input
  45. items chan item // channel of scanned items
  46. line int // 1+number of newlines seen
  47. depth int // current tag depth
  48. interpolation int // interpolation depth
  49. longtext bool // long text flag
  50. }
  51. // next returns the next rune in the input.
  52. func (l *lexer) next() rune {
  53. if int(l.pos) >= len(l.input) {
  54. l.width = 0
  55. return eof
  56. }
  57. r, w := utf8.DecodeRuneInString(l.input[l.pos:])
  58. l.width = Pos(w)
  59. l.pos += l.width
  60. if r == '\n' {
  61. l.line++
  62. }
  63. return r
  64. }
  65. // peek returns but does not consume the next rune in the input.
  66. func (l *lexer) peek() rune {
  67. r := l.next()
  68. l.backup()
  69. return r
  70. }
  71. // backup steps back one rune. Can only be called once per call of next.
  72. func (l *lexer) backup() {
  73. l.pos -= l.width
  74. // Correct newline count.
  75. if l.width == 1 && l.input[l.pos] == '\n' {
  76. l.line--
  77. }
  78. }
  79. // emit passes an item back to the client.
  80. func (l *lexer) emit(t itemType) {
  81. l.items <- item{t, l.start, l.input[l.start:l.pos], l.line, l.depth}
  82. // Some items contain text internally. If so, count their newlines.
  83. switch t {
  84. // case itemText, itemRawString, itemLeftDelim, itemRightDelim:
  85. case itemText:
  86. l.line += strings.Count(l.input[l.start:l.pos], "\n")
  87. }
  88. l.start = l.pos
  89. }
  90. // ignore skips over the pending input before this point.
  91. func (l *lexer) ignore() {
  92. l.line += strings.Count(l.input[l.start:l.pos], "\n")
  93. l.start = l.pos
  94. }
  95. // accept consumes the next rune if it's from the valid set.
  96. func (l *lexer) accept(valid string) bool {
  97. if strings.ContainsRune(valid, l.next()) {
  98. return true
  99. }
  100. l.backup()
  101. return false
  102. }
  103. // acceptRun consumes a run of runes from the valid set.
  104. func (l *lexer) acceptRun(valid string) {
  105. for strings.ContainsRune(valid, l.next()) {
  106. }
  107. l.backup()
  108. }
  109. // errorf returns an error token and terminates the scan by passing
  110. // back a nil pointer that will be the next state, terminating l.nextItem.
  111. func (l *lexer) errorf(format string, args ...interface{}) stateFn {
  112. l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.line, l.depth}
  113. return nil
  114. }
  115. // nextItem returns the next item from the input.
  116. // Called by the parser, not in the lexing goroutine.
  117. func (l *lexer) nextItem() item {
  118. return <-l.items
  119. }
  120. // drain drains the output so the lexing goroutine will exit.
  121. // Called by the parser, not in the lexing goroutine.
  122. func (l *lexer) drain() {
  123. for range l.items {
  124. }
  125. }
  126. // lex creates a new scanner for the input string.
  127. func lex(name, input string) *lexer {
  128. l := &lexer{
  129. name: name,
  130. input: input,
  131. items: make(chan item),
  132. line: 1,
  133. }
  134. go l.run()
  135. return l
  136. }
  137. func (l *lexer) run() {
  138. for state := lexIndents; state != nil; {
  139. state = state(l)
  140. }
  141. close(l.items)
  142. }
  143. // atTerminator reports whether the input is at valid termination character to
  144. // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases
  145. // like "$x+2" not being acceptable without a space, in case we decide one
  146. // day to implement arithmetic.
  147. func (l *lexer) atTerminator() bool {
  148. r := l.peek()
  149. if isSpace(r) || isEndOfLine(r) {
  150. return true
  151. }
  152. switch r {
  153. case eof, '.', ',', '|', ':', ')', '(':
  154. return true
  155. }
  156. return false
  157. }
  158. func (l *lexer) scanNumber() bool {
  159. // Optional leading sign.
  160. l.accept("+-")
  161. // Is it hex?
  162. digits := "0123456789"
  163. if l.accept("0") && l.accept("xX") {
  164. digits = "0123456789abcdefABCDEF"
  165. }
  166. l.acceptRun(digits)
  167. if l.accept(".") {
  168. l.acceptRun(digits)
  169. }
  170. if l.accept("eE") {
  171. l.accept("+-")
  172. l.acceptRun("0123456789")
  173. }
  174. // Is it imaginary?
  175. l.accept("i")
  176. // Next thing mustn't be alphanumeric.
  177. if isAlphaNumeric(l.peek()) {
  178. l.next()
  179. return false
  180. }
  181. return true
  182. }
  183. // isSpace reports whether r is a space character.
  184. func isSpace(r rune) bool {
  185. return r == ' ' || r == '\t'
  186. }
  187. // isEndOfLine reports whether r is an end-of-line character.
  188. func isEndOfLine(r rune) bool {
  189. return r == '\r' || r == '\n'
  190. }
  191. // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
  192. func isAlphaNumeric(r rune) bool {
  193. return r == '_' || r == '-' || unicode.IsLetter(r) || unicode.IsDigit(r)
  194. }