lexer.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901
  1. package parser
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "regexp"
  7. "strconv"
  8. "strings"
  9. "unicode"
  10. "unicode/utf8"
  11. "github.com/robertkrimen/otto/ast"
  12. "github.com/robertkrimen/otto/file"
  13. "github.com/robertkrimen/otto/token"
  14. )
  15. type chr struct { //nolint:unused
  16. value rune
  17. width int
  18. }
  19. var matchIdentifier = regexp.MustCompile(`^[$_\p{L}][$_\p{L}\d}]*$`)
  20. func isDecimalDigit(chr rune) bool {
  21. return '0' <= chr && chr <= '9'
  22. }
  23. func digitValue(chr rune) int {
  24. switch {
  25. case '0' <= chr && chr <= '9':
  26. return int(chr - '0')
  27. case 'a' <= chr && chr <= 'f':
  28. return int(chr - 'a' + 10)
  29. case 'A' <= chr && chr <= 'F':
  30. return int(chr - 'A' + 10)
  31. }
  32. return 16 // Larger than any legal digit value
  33. }
  34. // See https://www.unicode.org/reports/tr31/ for reference on ID_Start and ID_Continue.
  35. var includeIDStart = []*unicode.RangeTable{
  36. unicode.Lu,
  37. unicode.Ll,
  38. unicode.Lt,
  39. unicode.Lm,
  40. unicode.Lo,
  41. unicode.Nl,
  42. unicode.Other_ID_Start,
  43. }
  44. var includeIDContinue = []*unicode.RangeTable{
  45. unicode.Lu,
  46. unicode.Ll,
  47. unicode.Lt,
  48. unicode.Lm,
  49. unicode.Lo,
  50. unicode.Nl,
  51. unicode.Other_ID_Start,
  52. unicode.Mn,
  53. unicode.Mc,
  54. unicode.Nd,
  55. unicode.Pc,
  56. unicode.Other_ID_Continue,
  57. }
  58. var exclude = []*unicode.RangeTable{
  59. unicode.Pattern_Syntax,
  60. unicode.Pattern_White_Space,
  61. }
  62. func unicodeIDStart(r rune) bool {
  63. if unicode.In(r, exclude...) {
  64. return false
  65. }
  66. return unicode.In(r, includeIDStart...)
  67. }
  68. func unicodeIDContinue(r rune) bool {
  69. if unicode.In(r, exclude...) {
  70. return false
  71. }
  72. return unicode.In(r, includeIDContinue...)
  73. }
  74. func isDigit(chr rune, base int) bool {
  75. return digitValue(chr) < base
  76. }
  77. func isIdentifierStart(chr rune) bool {
  78. return chr == '$' || chr == '_' || chr == '\\' ||
  79. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  80. chr >= utf8.RuneSelf && unicodeIDStart(chr)
  81. }
  82. func isIdentifierPart(chr rune) bool {
  83. return chr == '$' || chr == '_' || chr == '\\' ||
  84. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  85. '0' <= chr && chr <= '9' ||
  86. chr >= utf8.RuneSelf && unicodeIDContinue(chr)
  87. }
  88. func (p *parser) scanIdentifier() (string, error) {
  89. offset := p.chrOffset
  90. parse := false
  91. for isIdentifierPart(p.chr) {
  92. if p.chr == '\\' {
  93. distance := p.chrOffset - offset
  94. p.read()
  95. if p.chr != 'u' {
  96. return "", fmt.Errorf("invalid identifier escape character: %c (%s)", p.chr, string(p.chr))
  97. }
  98. parse = true
  99. var value rune
  100. for range 4 {
  101. p.read()
  102. decimal, ok := hex2decimal(byte(p.chr))
  103. if !ok {
  104. return "", fmt.Errorf("invalid identifier escape character: %c (%s)", p.chr, string(p.chr))
  105. }
  106. value = value<<4 | decimal
  107. }
  108. switch {
  109. case value == '\\':
  110. return "", fmt.Errorf("invalid identifier escape value: %c (%s)", value, string(value))
  111. case distance == 0:
  112. if !isIdentifierStart(value) {
  113. return "", fmt.Errorf("invalid identifier escape value: %c (%s)", value, string(value))
  114. }
  115. case distance > 0:
  116. if !isIdentifierPart(value) {
  117. return "", fmt.Errorf("invalid identifier escape value: %c (%s)", value, string(value))
  118. }
  119. }
  120. }
  121. p.read()
  122. }
  123. literal := p.str[offset:p.chrOffset]
  124. if parse {
  125. return parseStringLiteral(literal)
  126. }
  127. return literal, nil
  128. }
  129. // 7.2.
  130. func isLineWhiteSpace(chr rune) bool { //nolint:unused, deadcode
  131. switch chr {
  132. case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
  133. return true
  134. case '\u000a', '\u000d', '\u2028', '\u2029':
  135. return false
  136. case '\u0085':
  137. return false
  138. }
  139. return unicode.IsSpace(chr)
  140. }
  141. // 7.3.
  142. func isLineTerminator(chr rune) bool {
  143. switch chr {
  144. case '\u000a', '\u000d', '\u2028', '\u2029':
  145. return true
  146. }
  147. return false
  148. }
  149. func (p *parser) scan() (tkn token.Token, literal string, idx file.Idx) { //nolint:nonamedreturns
  150. p.implicitSemicolon = false
  151. for {
  152. p.skipWhiteSpace()
  153. idx = p.idxOf(p.chrOffset)
  154. insertSemicolon := false
  155. switch chr := p.chr; {
  156. case isIdentifierStart(chr):
  157. var err error
  158. literal, err = p.scanIdentifier()
  159. if err != nil {
  160. tkn = token.ILLEGAL
  161. break
  162. }
  163. if len(literal) > 1 {
  164. // Keywords are longer than 1 character, avoid lookup otherwise
  165. var strict bool
  166. tkn, strict = token.IsKeyword(literal)
  167. switch tkn {
  168. case 0: // Not a keyword
  169. switch literal {
  170. case "true", "false":
  171. p.insertSemicolon = true
  172. return token.BOOLEAN, literal, idx
  173. case "null":
  174. p.insertSemicolon = true
  175. return token.NULL, literal, idx
  176. }
  177. case token.KEYWORD:
  178. if strict {
  179. // TODO If strict and in strict mode, then this is not a break
  180. break
  181. }
  182. return token.KEYWORD, literal, idx
  183. case
  184. token.THIS,
  185. token.BREAK,
  186. token.THROW, // A newline after a throw is not allowed, but we need to detect it
  187. token.RETURN,
  188. token.CONTINUE,
  189. token.DEBUGGER:
  190. p.insertSemicolon = true
  191. return tkn, literal, idx
  192. default:
  193. return tkn, literal, idx
  194. }
  195. }
  196. p.insertSemicolon = true
  197. return token.IDENTIFIER, literal, idx
  198. case '0' <= chr && chr <= '9':
  199. p.insertSemicolon = true
  200. tkn, literal = p.scanNumericLiteral(false)
  201. return tkn, literal, idx
  202. default:
  203. p.read()
  204. switch chr {
  205. case -1:
  206. if p.insertSemicolon {
  207. p.insertSemicolon = false
  208. p.implicitSemicolon = true
  209. }
  210. tkn = token.EOF
  211. case '\r', '\n', '\u2028', '\u2029':
  212. p.insertSemicolon = false
  213. p.implicitSemicolon = true
  214. p.comments.AtLineBreak()
  215. continue
  216. case ':':
  217. tkn = token.COLON
  218. case '.':
  219. if digitValue(p.chr) < 10 {
  220. insertSemicolon = true
  221. tkn, literal = p.scanNumericLiteral(true)
  222. } else {
  223. tkn = token.PERIOD
  224. }
  225. case ',':
  226. tkn = token.COMMA
  227. case ';':
  228. tkn = token.SEMICOLON
  229. case '(':
  230. tkn = token.LEFT_PARENTHESIS
  231. case ')':
  232. tkn = token.RIGHT_PARENTHESIS
  233. insertSemicolon = true
  234. case '[':
  235. tkn = token.LEFT_BRACKET
  236. case ']':
  237. tkn = token.RIGHT_BRACKET
  238. insertSemicolon = true
  239. case '{':
  240. tkn = token.LEFT_BRACE
  241. case '}':
  242. tkn = token.RIGHT_BRACE
  243. insertSemicolon = true
  244. case '+':
  245. tkn = p.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
  246. if tkn == token.INCREMENT {
  247. insertSemicolon = true
  248. }
  249. case '-':
  250. tkn = p.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
  251. if tkn == token.DECREMENT {
  252. insertSemicolon = true
  253. }
  254. case '*':
  255. tkn = p.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
  256. case '/':
  257. switch p.chr {
  258. case '/':
  259. if p.mode&StoreComments != 0 {
  260. comment := string(p.readSingleLineComment())
  261. p.comments.AddComment(ast.NewComment(comment, idx))
  262. continue
  263. }
  264. p.skipSingleLineComment()
  265. continue
  266. case '*':
  267. if p.mode&StoreComments != 0 {
  268. comment := string(p.readMultiLineComment())
  269. p.comments.AddComment(ast.NewComment(comment, idx))
  270. continue
  271. }
  272. p.skipMultiLineComment()
  273. continue
  274. default:
  275. // Could be division, could be RegExp literal
  276. tkn = p.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
  277. insertSemicolon = true
  278. }
  279. case '%':
  280. tkn = p.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
  281. case '^':
  282. tkn = p.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
  283. case '<':
  284. tkn = p.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
  285. case '>':
  286. tkn = p.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
  287. case '=':
  288. tkn = p.switch2(token.ASSIGN, token.EQUAL)
  289. if tkn == token.EQUAL && p.chr == '=' {
  290. p.read()
  291. tkn = token.STRICT_EQUAL
  292. }
  293. case '!':
  294. tkn = p.switch2(token.NOT, token.NOT_EQUAL)
  295. if tkn == token.NOT_EQUAL && p.chr == '=' {
  296. p.read()
  297. tkn = token.STRICT_NOT_EQUAL
  298. }
  299. case '&':
  300. if p.chr == '^' {
  301. p.read()
  302. tkn = p.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
  303. } else {
  304. tkn = p.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
  305. }
  306. case '|':
  307. tkn = p.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
  308. case '~':
  309. tkn = token.BITWISE_NOT
  310. case '?':
  311. tkn = token.QUESTION_MARK
  312. case '"', '\'':
  313. insertSemicolon = true
  314. tkn = token.STRING
  315. var err error
  316. literal, err = p.scanString(p.chrOffset - 1)
  317. if err != nil {
  318. tkn = token.ILLEGAL
  319. }
  320. default:
  321. p.errorUnexpected(idx, chr)
  322. tkn = token.ILLEGAL
  323. }
  324. }
  325. p.insertSemicolon = insertSemicolon
  326. return tkn, literal, idx
  327. }
  328. }
  329. func (p *parser) switch2(tkn0, tkn1 token.Token) token.Token {
  330. if p.chr == '=' {
  331. p.read()
  332. return tkn1
  333. }
  334. return tkn0
  335. }
  336. func (p *parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
  337. if p.chr == '=' {
  338. p.read()
  339. return tkn1
  340. }
  341. if p.chr == chr2 {
  342. p.read()
  343. return tkn2
  344. }
  345. return tkn0
  346. }
  347. func (p *parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
  348. if p.chr == '=' {
  349. p.read()
  350. return tkn1
  351. }
  352. if p.chr == chr2 {
  353. p.read()
  354. if p.chr == '=' {
  355. p.read()
  356. return tkn3
  357. }
  358. return tkn2
  359. }
  360. return tkn0
  361. }
  362. func (p *parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
  363. if p.chr == '=' {
  364. p.read()
  365. return tkn1
  366. }
  367. if p.chr == chr2 {
  368. p.read()
  369. if p.chr == '=' {
  370. p.read()
  371. return tkn3
  372. }
  373. if p.chr == chr3 {
  374. p.read()
  375. if p.chr == '=' {
  376. p.read()
  377. return tkn5
  378. }
  379. return tkn4
  380. }
  381. return tkn2
  382. }
  383. return tkn0
  384. }
  385. func (p *parser) chrAt(index int) chr { //nolint:unused
  386. value, width := utf8.DecodeRuneInString(p.str[index:])
  387. return chr{
  388. value: value,
  389. width: width,
  390. }
  391. }
  392. func (p *parser) peek() rune {
  393. if p.offset+1 < p.length {
  394. return rune(p.str[p.offset+1])
  395. }
  396. return -1
  397. }
  398. func (p *parser) read() {
  399. if p.offset < p.length {
  400. p.chrOffset = p.offset
  401. chr, width := rune(p.str[p.offset]), 1
  402. if chr >= utf8.RuneSelf { // !ASCII
  403. chr, width = utf8.DecodeRuneInString(p.str[p.offset:])
  404. if chr == utf8.RuneError && width == 1 {
  405. p.error(p.chrOffset, "Invalid UTF-8 character")
  406. }
  407. }
  408. p.offset += width
  409. p.chr = chr
  410. } else {
  411. p.chrOffset = p.length
  412. p.chr = -1 // EOF
  413. }
  414. }
  415. // This is here since the functions are so similar.
  416. func (p *regExpParser) read() {
  417. if p.offset < p.length {
  418. p.chrOffset = p.offset
  419. chr, width := rune(p.str[p.offset]), 1
  420. if chr >= utf8.RuneSelf { // !ASCII
  421. chr, width = utf8.DecodeRuneInString(p.str[p.offset:])
  422. if chr == utf8.RuneError && width == 1 {
  423. p.error(p.chrOffset, "Invalid UTF-8 character")
  424. }
  425. }
  426. p.offset += width
  427. p.chr = chr
  428. } else {
  429. p.chrOffset = p.length
  430. p.chr = -1 // EOF
  431. }
  432. }
  433. func (p *parser) readSingleLineComment() []rune {
  434. var result []rune
  435. for p.chr != -1 {
  436. p.read()
  437. if isLineTerminator(p.chr) {
  438. return result
  439. }
  440. result = append(result, p.chr)
  441. }
  442. // Get rid of the trailing -1
  443. return result[:len(result)-1]
  444. }
  445. func (p *parser) readMultiLineComment() []rune {
  446. var result []rune
  447. p.read()
  448. for p.chr >= 0 {
  449. chr := p.chr
  450. p.read()
  451. if chr == '*' && p.chr == '/' {
  452. p.read()
  453. return result
  454. }
  455. result = append(result, chr)
  456. }
  457. p.errorUnexpected(0, p.chr)
  458. return result
  459. }
  460. func (p *parser) skipSingleLineComment() {
  461. for p.chr != -1 {
  462. p.read()
  463. if isLineTerminator(p.chr) {
  464. return
  465. }
  466. }
  467. }
  468. func (p *parser) skipMultiLineComment() {
  469. p.read()
  470. for p.chr >= 0 {
  471. chr := p.chr
  472. p.read()
  473. if chr == '*' && p.chr == '/' {
  474. p.read()
  475. return
  476. }
  477. }
  478. p.errorUnexpected(0, p.chr)
  479. }
  480. func (p *parser) skipWhiteSpace() {
  481. for {
  482. switch p.chr {
  483. case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
  484. p.read()
  485. continue
  486. case '\r':
  487. if p.peek() == '\n' {
  488. p.comments.AtLineBreak()
  489. p.read()
  490. }
  491. fallthrough
  492. case '\u2028', '\u2029', '\n':
  493. if p.insertSemicolon {
  494. return
  495. }
  496. p.comments.AtLineBreak()
  497. p.read()
  498. continue
  499. }
  500. if p.chr >= utf8.RuneSelf {
  501. if unicode.IsSpace(p.chr) {
  502. p.read()
  503. continue
  504. }
  505. }
  506. break
  507. }
  508. }
  509. func (p *parser) scanMantissa(base int) {
  510. for digitValue(p.chr) < base {
  511. p.read()
  512. }
  513. }
  514. func (p *parser) scanEscape(quote rune) {
  515. var length, base uint32
  516. switch p.chr {
  517. // Octal:
  518. // length, base, limit = 3, 8, 255
  519. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'', '0':
  520. p.read()
  521. return
  522. case '\r', '\n', '\u2028', '\u2029':
  523. p.scanNewline()
  524. return
  525. case 'x':
  526. p.read()
  527. length, base = 2, 16
  528. case 'u':
  529. p.read()
  530. length, base = 4, 16
  531. default:
  532. p.read() // Always make progress
  533. return
  534. }
  535. var value uint32
  536. for ; length > 0 && p.chr != quote && p.chr >= 0; length-- {
  537. digit := uint32(digitValue(p.chr))
  538. if digit >= base {
  539. break
  540. }
  541. value = value*base + digit
  542. p.read()
  543. }
  544. }
  545. func (p *parser) scanString(offset int) (string, error) {
  546. // " ' /
  547. quote := rune(p.str[offset])
  548. for p.chr != quote {
  549. chr := p.chr
  550. if chr == '\n' || chr == '\r' || chr == '\u2028' || chr == '\u2029' || chr < 0 {
  551. goto newline
  552. }
  553. p.read()
  554. switch {
  555. case chr == '\\':
  556. if quote == '/' {
  557. if p.chr == '\n' || p.chr == '\r' || p.chr == '\u2028' || p.chr == '\u2029' || p.chr < 0 {
  558. goto newline
  559. }
  560. p.read()
  561. } else {
  562. p.scanEscape(quote)
  563. }
  564. case chr == '[' && quote == '/':
  565. // Allow a slash (/) in a bracket character class ([...])
  566. // TODO Fix this, this is hacky...
  567. quote = -1
  568. case chr == ']' && quote == -1:
  569. quote = '/'
  570. }
  571. }
  572. // " ' /
  573. p.read()
  574. return p.str[offset:p.chrOffset], nil
  575. newline:
  576. p.scanNewline()
  577. err := "String not terminated"
  578. if quote == '/' {
  579. err = "Invalid regular expression: missing /"
  580. p.error(p.idxOf(offset), err)
  581. }
  582. return "", errors.New(err)
  583. }
  584. func (p *parser) scanNewline() {
  585. if p.chr == '\r' {
  586. p.read()
  587. if p.chr != '\n' {
  588. return
  589. }
  590. }
  591. p.read()
  592. }
  593. func hex2decimal(chr byte) (rune, bool) {
  594. r := rune(chr)
  595. switch {
  596. case '0' <= r && r <= '9':
  597. return r - '0', true
  598. case 'a' <= r && r <= 'f':
  599. return r - 'a' + 10, true
  600. case 'A' <= r && r <= 'F':
  601. return r - 'A' + 10, true
  602. default:
  603. return 0, false
  604. }
  605. }
  606. func parseNumberLiteral(literal string) (value interface{}, err error) { //nolint:nonamedreturns
  607. // TODO Is Uint okay? What about -MAX_UINT
  608. value, err = strconv.ParseInt(literal, 0, 64)
  609. if err == nil {
  610. return value, nil
  611. }
  612. parseIntErr := err // Save this first error, just in case
  613. value, err = strconv.ParseFloat(literal, 64)
  614. if err == nil {
  615. return value, nil
  616. } else if errors.Is(err, strconv.ErrRange) {
  617. // Infinity, etc.
  618. return value, nil
  619. }
  620. // TODO(steve): Fix as this is assigning to err so we know the type.
  621. // Need to understand what this was trying to do?
  622. err = parseIntErr
  623. if errors.Is(err, strconv.ErrRange) {
  624. if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') {
  625. // Could just be a very large number (e.g. 0x8000000000000000)
  626. var value float64
  627. literal = literal[2:]
  628. for _, chr := range literal {
  629. digit := digitValue(chr)
  630. if digit >= 16 {
  631. return nil, fmt.Errorf("illegal numeric literal: %v (>= 16)", digit)
  632. }
  633. value = value*16 + float64(digit)
  634. }
  635. return value, nil
  636. }
  637. }
  638. return nil, errors.New("illegal numeric literal")
  639. }
  640. func parseStringLiteral(literal string) (string, error) {
  641. // Best case scenario...
  642. if literal == "" {
  643. return "", nil
  644. }
  645. // Slightly less-best case scenario...
  646. if !strings.ContainsRune(literal, '\\') {
  647. return literal, nil
  648. }
  649. str := literal
  650. buffer := bytes.NewBuffer(make([]byte, 0, 3*len(literal)/2))
  651. for len(str) > 0 {
  652. switch chr := str[0]; {
  653. // We do not explicitly handle the case of the quote
  654. // value, which can be: " ' /
  655. // This assumes we're already passed a partially well-formed literal
  656. case chr >= utf8.RuneSelf:
  657. chr, size := utf8.DecodeRuneInString(str)
  658. buffer.WriteRune(chr)
  659. str = str[size:]
  660. continue
  661. case chr != '\\':
  662. buffer.WriteByte(chr)
  663. str = str[1:]
  664. continue
  665. }
  666. if len(str) <= 1 {
  667. panic("len(str) <= 1")
  668. }
  669. chr := str[1]
  670. var value rune
  671. if chr >= utf8.RuneSelf {
  672. str = str[1:]
  673. var size int
  674. value, size = utf8.DecodeRuneInString(str)
  675. str = str[size:] // \ + <character>
  676. } else {
  677. str = str[2:] // \<character>
  678. switch chr {
  679. case 'b':
  680. value = '\b'
  681. case 'f':
  682. value = '\f'
  683. case 'n':
  684. value = '\n'
  685. case 'r':
  686. value = '\r'
  687. case 't':
  688. value = '\t'
  689. case 'v':
  690. value = '\v'
  691. case 'x', 'u':
  692. size := 0
  693. switch chr {
  694. case 'x':
  695. size = 2
  696. case 'u':
  697. size = 4
  698. }
  699. if len(str) < size {
  700. return "", fmt.Errorf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
  701. }
  702. for j := range size {
  703. decimal, ok := hex2decimal(str[j])
  704. if !ok {
  705. return "", fmt.Errorf("invalid escape: \\%s: %q", string(chr), str[:size])
  706. }
  707. value = value<<4 | decimal
  708. }
  709. str = str[size:]
  710. if chr == 'x' {
  711. break
  712. }
  713. if value > utf8.MaxRune {
  714. panic("value > utf8.MaxRune")
  715. }
  716. case '0':
  717. if len(str) == 0 || '0' > str[0] || str[0] > '7' {
  718. value = 0
  719. break
  720. }
  721. fallthrough
  722. case '1', '2', '3', '4', '5', '6', '7':
  723. // TODO strict
  724. value = rune(chr) - '0'
  725. j := 0
  726. for ; j < 2; j++ {
  727. if len(str) < j+1 {
  728. break
  729. }
  730. if ch := str[j]; '0' > ch || ch > '7' {
  731. break
  732. }
  733. decimal := rune(str[j]) - '0'
  734. value = (value << 3) | decimal
  735. }
  736. str = str[j:]
  737. case '\\':
  738. value = '\\'
  739. case '\'', '"':
  740. value = rune(chr)
  741. case '\r':
  742. if len(str) > 0 {
  743. if str[0] == '\n' {
  744. str = str[1:]
  745. }
  746. }
  747. fallthrough
  748. case '\n':
  749. continue
  750. default:
  751. value = rune(chr)
  752. }
  753. }
  754. buffer.WriteRune(value)
  755. }
  756. return buffer.String(), nil
  757. }
  758. func (p *parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
  759. offset := p.chrOffset
  760. tkn := token.NUMBER
  761. if decimalPoint {
  762. offset--
  763. p.scanMantissa(10)
  764. goto exponent
  765. }
  766. if p.chr == '0' {
  767. chrOffset := p.chrOffset
  768. p.read()
  769. switch p.chr {
  770. case 'x', 'X':
  771. // Hexadecimal
  772. p.read()
  773. if isDigit(p.chr, 16) {
  774. p.read()
  775. } else {
  776. return token.ILLEGAL, p.str[chrOffset:p.chrOffset]
  777. }
  778. p.scanMantissa(16)
  779. if p.chrOffset-chrOffset <= 2 {
  780. // Only "0x" or "0X"
  781. p.error(0, "Illegal hexadecimal number")
  782. }
  783. goto hexadecimal
  784. case '.':
  785. // Float
  786. goto float
  787. default:
  788. // Octal, Float
  789. if p.chr == 'e' || p.chr == 'E' {
  790. goto exponent
  791. }
  792. p.scanMantissa(8)
  793. if p.chr == '8' || p.chr == '9' {
  794. return token.ILLEGAL, p.str[chrOffset:p.chrOffset]
  795. }
  796. goto octal
  797. }
  798. }
  799. p.scanMantissa(10)
  800. float:
  801. if p.chr == '.' {
  802. p.read()
  803. p.scanMantissa(10)
  804. }
  805. exponent:
  806. if p.chr == 'e' || p.chr == 'E' {
  807. p.read()
  808. if p.chr == '-' || p.chr == '+' {
  809. p.read()
  810. }
  811. if isDecimalDigit(p.chr) {
  812. p.read()
  813. p.scanMantissa(10)
  814. } else {
  815. return token.ILLEGAL, p.str[offset:p.chrOffset]
  816. }
  817. }
  818. hexadecimal:
  819. octal:
  820. if isIdentifierStart(p.chr) || isDecimalDigit(p.chr) {
  821. return token.ILLEGAL, p.str[offset:p.chrOffset]
  822. }
  823. return tkn, p.str[offset:p.chrOffset]
  824. }