lexer.go 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. package lexer
  2. import (
  3. "github.com/kataras/iris/v12/macro/interpreter/token"
  4. )
  5. // Lexer helps us to read/scan characters of a source and resolve their token types.
  6. type Lexer struct {
  7. input string
  8. pos int // current pos in input, current char
  9. readPos int // current reading pos in input, after current char
  10. ch byte // current char under examination
  11. }
  12. // New takes a source, series of chars, and returns
  13. // a new, ready to read from the first letter, lexer.
  14. func New(src string) *Lexer {
  15. l := &Lexer{
  16. input: src,
  17. }
  18. // step to the first character in order to be ready
  19. l.readChar()
  20. return l
  21. }
  22. func (l *Lexer) readChar() {
  23. if l.readPos >= len(l.input) {
  24. l.ch = 0
  25. } else {
  26. l.ch = l.input[l.readPos]
  27. }
  28. l.pos = l.readPos
  29. l.readPos++
  30. }
  31. const (
  32. // Begin is the symbol which lexer should scan forward to.
  33. Begin = '{' // token.LBRACE
  34. // End is the symbol which lexer should stop scanning.
  35. End = '}' // token.RBRACE
  36. )
  37. func resolveTokenType(ch byte) token.Type {
  38. switch ch {
  39. case Begin:
  40. return token.LBRACE
  41. case End:
  42. return token.RBRACE
  43. // Let's keep it simple, no evaluation for logical operators, we are not making a new programming language, keep it simple makis.
  44. // ||
  45. // case '|':
  46. // if l.peekChar() == '|' {
  47. // ch := l.ch
  48. // l.readChar()
  49. // t = token.Token{Type: token.OR, Literal: string(ch) + string(l.ch)}
  50. // }
  51. // ==
  52. case ':':
  53. return token.COLON
  54. case '(':
  55. return token.LPAREN
  56. case ')':
  57. return token.RPAREN
  58. case ',':
  59. return token.COMMA
  60. // literals
  61. case 0:
  62. return token.EOF
  63. default:
  64. return token.IDENT //
  65. }
  66. }
  67. // NextToken returns the next token in the series of characters.
  68. // It can be a single symbol, a token type or a literal.
  69. // It's able to return an EOF token too.
  70. //
  71. // It moves the cursor forward.
  72. func (l *Lexer) NextToken() (t token.Token) {
  73. l.skipWhitespace()
  74. typ := resolveTokenType(l.ch)
  75. t.Type = typ
  76. switch typ {
  77. case token.EOF:
  78. t.Literal = ""
  79. case token.IDENT:
  80. if isLetter(l.ch) {
  81. // letters
  82. lit := l.readIdentifier()
  83. typ = token.LookupIdent(lit)
  84. t = l.newToken(typ, lit)
  85. return
  86. }
  87. if isDigit(l.ch) {
  88. // numbers
  89. lit := l.readNumber()
  90. t = l.newToken(token.INT, lit)
  91. return
  92. }
  93. t = l.newTokenRune(token.ILLEGAL, l.ch)
  94. default:
  95. t = l.newTokenRune(typ, l.ch)
  96. }
  97. l.readChar() // set the pos to the next
  98. return
  99. }
  100. // NextDynamicToken doesn't cares about the grammar.
  101. // It reads numbers or any unknown symbol,
  102. // it's being used by parser to skip all characters
  103. // between parameter function's arguments inside parenthesis,
  104. // in order to allow custom regexp on the end-language too.
  105. //
  106. // It moves the cursor forward.
  107. func (l *Lexer) NextDynamicToken() (t token.Token) {
  108. // calculate anything, even spaces.
  109. // numbers
  110. lit := l.readNumber()
  111. if lit != "" {
  112. return l.newToken(token.INT, lit)
  113. }
  114. lit = l.readIdentifierFuncArgument()
  115. return l.newToken(token.IDENT, lit)
  116. }
  117. // used to skip any illegal token if inside parenthesis, used to be able to set custom regexp inside a func.
  118. func (l *Lexer) readIdentifierFuncArgument() string {
  119. pos := l.pos
  120. for resolveTokenType(l.ch) != token.RPAREN && l.ch != 0 {
  121. l.readChar()
  122. }
  123. return l.input[pos:l.pos]
  124. }
  125. // PeekNextTokenType returns only the token type
  126. // of the next character and it does not move forward the cursor.
  127. // It's being used by parser to recognise empty functions, i.e `even()`
  128. // as valid functions with zero input arguments.
  129. func (l *Lexer) PeekNextTokenType() token.Type {
  130. if len(l.input)-1 > l.pos {
  131. ch := l.input[l.pos]
  132. return resolveTokenType(ch)
  133. }
  134. return resolveTokenType(0) // EOF
  135. }
  136. func (l *Lexer) newToken(tokenType token.Type, lit string) token.Token {
  137. t := token.Token{
  138. Type: tokenType,
  139. Literal: lit,
  140. Start: l.pos,
  141. End: l.pos,
  142. }
  143. // remember, l.pos is the last char
  144. // and we want to include both start and end
  145. // in order to be easy to the user to see by just marking the expression
  146. if l.pos > 1 && len(lit) > 1 {
  147. t.End = l.pos - 1
  148. t.Start = t.End - len(lit) + 1
  149. }
  150. return t
  151. }
  152. func (l *Lexer) newTokenRune(tokenType token.Type, ch byte) token.Token {
  153. return l.newToken(tokenType, string(ch))
  154. }
  155. func (l *Lexer) skipWhitespace() {
  156. for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
  157. l.readChar()
  158. }
  159. }
  160. func (l *Lexer) readIdentifier() string {
  161. pos := l.pos
  162. for isLetter(l.ch) || isDigit(l.ch) {
  163. l.readChar()
  164. }
  165. return l.input[pos:l.pos]
  166. }
  167. func isLetter(ch byte) bool {
  168. return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
  169. }
  170. func (l *Lexer) readNumber() string {
  171. pos := l.pos
  172. for isDigit(l.ch) {
  173. l.readChar()
  174. }
  175. return l.input[pos:l.pos]
  176. }
  177. func isDigit(ch byte) bool {
  178. return '0' <= ch && ch <= '9'
  179. }