lexer.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854
  1. package parser
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "regexp"
  7. "strconv"
  8. "strings"
  9. "unicode"
  10. "unicode/utf8"
  11. "github.com/robertkrimen/otto/ast"
  12. "github.com/robertkrimen/otto/file"
  13. "github.com/robertkrimen/otto/token"
  14. )
  15. type chr struct { //nolint: unused
  16. value rune
  17. width int
  18. }
  19. var matchIdentifier = regexp.MustCompile(`^[$_\p{L}][$_\p{L}\d}]*$`)
  20. func isDecimalDigit(chr rune) bool {
  21. return '0' <= chr && chr <= '9'
  22. }
  23. func digitValue(chr rune) int {
  24. switch {
  25. case '0' <= chr && chr <= '9':
  26. return int(chr - '0')
  27. case 'a' <= chr && chr <= 'f':
  28. return int(chr - 'a' + 10)
  29. case 'A' <= chr && chr <= 'F':
  30. return int(chr - 'A' + 10)
  31. }
  32. return 16 // Larger than any legal digit value
  33. }
  34. func isDigit(chr rune, base int) bool {
  35. return digitValue(chr) < base
  36. }
  37. func isIdentifierStart(chr rune) bool {
  38. return chr == '$' || chr == '_' || chr == '\\' ||
  39. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  40. chr >= utf8.RuneSelf && unicode.IsLetter(chr)
  41. }
  42. func isIdentifierPart(chr rune) bool {
  43. return chr == '$' || chr == '_' || chr == '\\' ||
  44. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  45. '0' <= chr && chr <= '9' ||
  46. chr >= utf8.RuneSelf && (unicode.IsLetter(chr) || unicode.IsDigit(chr))
  47. }
  48. func (p *parser) scanIdentifier() (string, error) {
  49. offset := p.chrOffset
  50. parse := false
  51. for isIdentifierPart(p.chr) {
  52. if p.chr == '\\' {
  53. distance := p.chrOffset - offset
  54. p.read()
  55. if p.chr != 'u' {
  56. return "", fmt.Errorf("invalid identifier escape character: %c (%s)", p.chr, string(p.chr))
  57. }
  58. parse = true
  59. var value rune
  60. for j := 0; j < 4; j++ {
  61. p.read()
  62. decimal, ok := hex2decimal(byte(p.chr))
  63. if !ok {
  64. return "", fmt.Errorf("invalid identifier escape character: %c (%s)", p.chr, string(p.chr))
  65. }
  66. value = value<<4 | decimal
  67. }
  68. switch {
  69. case value == '\\':
  70. return "", fmt.Errorf("invalid identifier escape value: %c (%s)", value, string(value))
  71. case distance == 0:
  72. if !isIdentifierStart(value) {
  73. return "", fmt.Errorf("invalid identifier escape value: %c (%s)", value, string(value))
  74. }
  75. case distance > 0:
  76. if !isIdentifierPart(value) {
  77. return "", fmt.Errorf("invalid identifier escape value: %c (%s)", value, string(value))
  78. }
  79. }
  80. }
  81. p.read()
  82. }
  83. literal := p.str[offset:p.chrOffset]
  84. if parse {
  85. return parseStringLiteral(literal)
  86. }
  87. return literal, nil
  88. }
  89. // 7.2.
  90. func isLineWhiteSpace(chr rune) bool { //nolint: unused, deadcode
  91. switch chr {
  92. case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
  93. return true
  94. case '\u000a', '\u000d', '\u2028', '\u2029':
  95. return false
  96. case '\u0085':
  97. return false
  98. }
  99. return unicode.IsSpace(chr)
  100. }
  101. // 7.3.
  102. func isLineTerminator(chr rune) bool {
  103. switch chr {
  104. case '\u000a', '\u000d', '\u2028', '\u2029':
  105. return true
  106. }
  107. return false
  108. }
  109. func (p *parser) scan() (tkn token.Token, literal string, idx file.Idx) { //nolint: nonamedreturns
  110. p.implicitSemicolon = false
  111. for {
  112. p.skipWhiteSpace()
  113. idx = p.idxOf(p.chrOffset)
  114. insertSemicolon := false
  115. switch chr := p.chr; {
  116. case isIdentifierStart(chr):
  117. var err error
  118. literal, err = p.scanIdentifier()
  119. if err != nil {
  120. tkn = token.ILLEGAL
  121. break
  122. }
  123. if len(literal) > 1 {
  124. // Keywords are longer than 1 character, avoid lookup otherwise
  125. var strict bool
  126. tkn, strict = token.IsKeyword(literal)
  127. switch tkn {
  128. case 0: // Not a keyword
  129. switch literal {
  130. case "true", "false":
  131. p.insertSemicolon = true
  132. return token.BOOLEAN, literal, idx
  133. case "null":
  134. p.insertSemicolon = true
  135. return token.NULL, literal, idx
  136. }
  137. case token.KEYWORD:
  138. if strict {
  139. // TODO If strict and in strict mode, then this is not a break
  140. break
  141. }
  142. return token.KEYWORD, literal, idx
  143. case
  144. token.THIS,
  145. token.BREAK,
  146. token.THROW, // A newline after a throw is not allowed, but we need to detect it
  147. token.RETURN,
  148. token.CONTINUE,
  149. token.DEBUGGER:
  150. p.insertSemicolon = true
  151. return tkn, literal, idx
  152. default:
  153. return tkn, literal, idx
  154. }
  155. }
  156. p.insertSemicolon = true
  157. return token.IDENTIFIER, literal, idx
  158. case '0' <= chr && chr <= '9':
  159. p.insertSemicolon = true
  160. tkn, literal = p.scanNumericLiteral(false)
  161. return tkn, literal, idx
  162. default:
  163. p.read()
  164. switch chr {
  165. case -1:
  166. if p.insertSemicolon {
  167. p.insertSemicolon = false
  168. p.implicitSemicolon = true
  169. }
  170. tkn = token.EOF
  171. case '\r', '\n', '\u2028', '\u2029':
  172. p.insertSemicolon = false
  173. p.implicitSemicolon = true
  174. p.comments.AtLineBreak()
  175. continue
  176. case ':':
  177. tkn = token.COLON
  178. case '.':
  179. if digitValue(p.chr) < 10 {
  180. insertSemicolon = true
  181. tkn, literal = p.scanNumericLiteral(true)
  182. } else {
  183. tkn = token.PERIOD
  184. }
  185. case ',':
  186. tkn = token.COMMA
  187. case ';':
  188. tkn = token.SEMICOLON
  189. case '(':
  190. tkn = token.LEFT_PARENTHESIS
  191. case ')':
  192. tkn = token.RIGHT_PARENTHESIS
  193. insertSemicolon = true
  194. case '[':
  195. tkn = token.LEFT_BRACKET
  196. case ']':
  197. tkn = token.RIGHT_BRACKET
  198. insertSemicolon = true
  199. case '{':
  200. tkn = token.LEFT_BRACE
  201. case '}':
  202. tkn = token.RIGHT_BRACE
  203. insertSemicolon = true
  204. case '+':
  205. tkn = p.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
  206. if tkn == token.INCREMENT {
  207. insertSemicolon = true
  208. }
  209. case '-':
  210. tkn = p.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
  211. if tkn == token.DECREMENT {
  212. insertSemicolon = true
  213. }
  214. case '*':
  215. tkn = p.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
  216. case '/':
  217. switch p.chr {
  218. case '/':
  219. if p.mode&StoreComments != 0 {
  220. literal := string(p.readSingleLineComment())
  221. p.comments.AddComment(ast.NewComment(literal, idx))
  222. continue
  223. }
  224. p.skipSingleLineComment()
  225. continue
  226. case '*':
  227. if p.mode&StoreComments != 0 {
  228. literal = string(p.readMultiLineComment())
  229. p.comments.AddComment(ast.NewComment(literal, idx))
  230. continue
  231. }
  232. p.skipMultiLineComment()
  233. continue
  234. default:
  235. // Could be division, could be RegExp literal
  236. tkn = p.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
  237. insertSemicolon = true
  238. }
  239. case '%':
  240. tkn = p.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
  241. case '^':
  242. tkn = p.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
  243. case '<':
  244. tkn = p.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
  245. case '>':
  246. tkn = p.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
  247. case '=':
  248. tkn = p.switch2(token.ASSIGN, token.EQUAL)
  249. if tkn == token.EQUAL && p.chr == '=' {
  250. p.read()
  251. tkn = token.STRICT_EQUAL
  252. }
  253. case '!':
  254. tkn = p.switch2(token.NOT, token.NOT_EQUAL)
  255. if tkn == token.NOT_EQUAL && p.chr == '=' {
  256. p.read()
  257. tkn = token.STRICT_NOT_EQUAL
  258. }
  259. case '&':
  260. if p.chr == '^' {
  261. p.read()
  262. tkn = p.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
  263. } else {
  264. tkn = p.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
  265. }
  266. case '|':
  267. tkn = p.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
  268. case '~':
  269. tkn = token.BITWISE_NOT
  270. case '?':
  271. tkn = token.QUESTION_MARK
  272. case '"', '\'':
  273. insertSemicolon = true
  274. tkn = token.STRING
  275. var err error
  276. literal, err = p.scanString(p.chrOffset - 1)
  277. if err != nil {
  278. tkn = token.ILLEGAL
  279. }
  280. default:
  281. p.errorUnexpected(idx, chr)
  282. tkn = token.ILLEGAL
  283. }
  284. }
  285. p.insertSemicolon = insertSemicolon
  286. return tkn, literal, idx
  287. }
  288. }
  289. func (p *parser) switch2(tkn0, tkn1 token.Token) token.Token {
  290. if p.chr == '=' {
  291. p.read()
  292. return tkn1
  293. }
  294. return tkn0
  295. }
  296. func (p *parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
  297. if p.chr == '=' {
  298. p.read()
  299. return tkn1
  300. }
  301. if p.chr == chr2 {
  302. p.read()
  303. return tkn2
  304. }
  305. return tkn0
  306. }
  307. func (p *parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
  308. if p.chr == '=' {
  309. p.read()
  310. return tkn1
  311. }
  312. if p.chr == chr2 {
  313. p.read()
  314. if p.chr == '=' {
  315. p.read()
  316. return tkn3
  317. }
  318. return tkn2
  319. }
  320. return tkn0
  321. }
  322. func (p *parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
  323. if p.chr == '=' {
  324. p.read()
  325. return tkn1
  326. }
  327. if p.chr == chr2 {
  328. p.read()
  329. if p.chr == '=' {
  330. p.read()
  331. return tkn3
  332. }
  333. if p.chr == chr3 {
  334. p.read()
  335. if p.chr == '=' {
  336. p.read()
  337. return tkn5
  338. }
  339. return tkn4
  340. }
  341. return tkn2
  342. }
  343. return tkn0
  344. }
  345. func (p *parser) chrAt(index int) chr { //nolint: unused
  346. value, width := utf8.DecodeRuneInString(p.str[index:])
  347. return chr{
  348. value: value,
  349. width: width,
  350. }
  351. }
  352. func (p *parser) peek() rune {
  353. if p.offset+1 < p.length {
  354. return rune(p.str[p.offset+1])
  355. }
  356. return -1
  357. }
  358. func (p *parser) read() {
  359. if p.offset < p.length {
  360. p.chrOffset = p.offset
  361. chr, width := rune(p.str[p.offset]), 1
  362. if chr >= utf8.RuneSelf { // !ASCII
  363. chr, width = utf8.DecodeRuneInString(p.str[p.offset:])
  364. if chr == utf8.RuneError && width == 1 {
  365. p.error(p.chrOffset, "Invalid UTF-8 character")
  366. }
  367. }
  368. p.offset += width
  369. p.chr = chr
  370. } else {
  371. p.chrOffset = p.length
  372. p.chr = -1 // EOF
  373. }
  374. }
  375. // This is here since the functions are so similar.
  376. func (p *regExpParser) read() {
  377. if p.offset < p.length {
  378. p.chrOffset = p.offset
  379. chr, width := rune(p.str[p.offset]), 1
  380. if chr >= utf8.RuneSelf { // !ASCII
  381. chr, width = utf8.DecodeRuneInString(p.str[p.offset:])
  382. if chr == utf8.RuneError && width == 1 {
  383. p.error(p.chrOffset, "Invalid UTF-8 character")
  384. }
  385. }
  386. p.offset += width
  387. p.chr = chr
  388. } else {
  389. p.chrOffset = p.length
  390. p.chr = -1 // EOF
  391. }
  392. }
  393. func (p *parser) readSingleLineComment() []rune {
  394. var result []rune
  395. for p.chr != -1 {
  396. p.read()
  397. if isLineTerminator(p.chr) {
  398. return result
  399. }
  400. result = append(result, p.chr)
  401. }
  402. // Get rid of the trailing -1
  403. return result[:len(result)-1]
  404. }
  405. func (p *parser) readMultiLineComment() []rune {
  406. var result []rune
  407. p.read()
  408. for p.chr >= 0 {
  409. chr := p.chr
  410. p.read()
  411. if chr == '*' && p.chr == '/' {
  412. p.read()
  413. return result
  414. }
  415. result = append(result, chr)
  416. }
  417. p.errorUnexpected(0, p.chr)
  418. return result
  419. }
  420. func (p *parser) skipSingleLineComment() {
  421. for p.chr != -1 {
  422. p.read()
  423. if isLineTerminator(p.chr) {
  424. return
  425. }
  426. }
  427. }
  428. func (p *parser) skipMultiLineComment() {
  429. p.read()
  430. for p.chr >= 0 {
  431. chr := p.chr
  432. p.read()
  433. if chr == '*' && p.chr == '/' {
  434. p.read()
  435. return
  436. }
  437. }
  438. p.errorUnexpected(0, p.chr)
  439. }
  440. func (p *parser) skipWhiteSpace() {
  441. for {
  442. switch p.chr {
  443. case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
  444. p.read()
  445. continue
  446. case '\r':
  447. if p.peek() == '\n' {
  448. p.comments.AtLineBreak()
  449. p.read()
  450. }
  451. fallthrough
  452. case '\u2028', '\u2029', '\n':
  453. if p.insertSemicolon {
  454. return
  455. }
  456. p.comments.AtLineBreak()
  457. p.read()
  458. continue
  459. }
  460. if p.chr >= utf8.RuneSelf {
  461. if unicode.IsSpace(p.chr) {
  462. p.read()
  463. continue
  464. }
  465. }
  466. break
  467. }
  468. }
  469. func (p *parser) scanMantissa(base int) {
  470. for digitValue(p.chr) < base {
  471. p.read()
  472. }
  473. }
  474. func (p *parser) scanEscape(quote rune) {
  475. var length, base uint32
  476. switch p.chr {
  477. // Octal:
  478. // length, base, limit = 3, 8, 255
  479. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'', '0':
  480. p.read()
  481. return
  482. case '\r', '\n', '\u2028', '\u2029':
  483. p.scanNewline()
  484. return
  485. case 'x':
  486. p.read()
  487. length, base = 2, 16
  488. case 'u':
  489. p.read()
  490. length, base = 4, 16
  491. default:
  492. p.read() // Always make progress
  493. return
  494. }
  495. var value uint32
  496. for ; length > 0 && p.chr != quote && p.chr >= 0; length-- {
  497. digit := uint32(digitValue(p.chr))
  498. if digit >= base {
  499. break
  500. }
  501. value = value*base + digit
  502. p.read()
  503. }
  504. }
  505. func (p *parser) scanString(offset int) (string, error) {
  506. // " ' /
  507. quote := rune(p.str[offset])
  508. for p.chr != quote {
  509. chr := p.chr
  510. if chr == '\n' || chr == '\r' || chr == '\u2028' || chr == '\u2029' || chr < 0 {
  511. goto newline
  512. }
  513. p.read()
  514. switch {
  515. case chr == '\\':
  516. if quote == '/' {
  517. if p.chr == '\n' || p.chr == '\r' || p.chr == '\u2028' || p.chr == '\u2029' || p.chr < 0 {
  518. goto newline
  519. }
  520. p.read()
  521. } else {
  522. p.scanEscape(quote)
  523. }
  524. case chr == '[' && quote == '/':
  525. // Allow a slash (/) in a bracket character class ([...])
  526. // TODO Fix this, this is hacky...
  527. quote = -1
  528. case chr == ']' && quote == -1:
  529. quote = '/'
  530. }
  531. }
  532. // " ' /
  533. p.read()
  534. return p.str[offset:p.chrOffset], nil
  535. newline:
  536. p.scanNewline()
  537. err := "String not terminated"
  538. if quote == '/' {
  539. err = "Invalid regular expression: missing /"
  540. p.error(p.idxOf(offset), err)
  541. }
  542. return "", errors.New(err)
  543. }
  544. func (p *parser) scanNewline() {
  545. if p.chr == '\r' {
  546. p.read()
  547. if p.chr != '\n' {
  548. return
  549. }
  550. }
  551. p.read()
  552. }
  553. func hex2decimal(chr byte) (rune, bool) {
  554. r := rune(chr)
  555. switch {
  556. case '0' <= r && r <= '9':
  557. return r - '0', true
  558. case 'a' <= r && r <= 'f':
  559. return r - 'a' + 10, true
  560. case 'A' <= r && r <= 'F':
  561. return r - 'A' + 10, true
  562. default:
  563. return 0, false
  564. }
  565. }
  566. func parseNumberLiteral(literal string) (value interface{}, err error) { //nolint: nonamedreturns
  567. // TODO Is Uint okay? What about -MAX_UINT
  568. value, err = strconv.ParseInt(literal, 0, 64)
  569. if err == nil {
  570. return value, nil
  571. }
  572. parseIntErr := err // Save this first error, just in case
  573. value, err = strconv.ParseFloat(literal, 64)
  574. if err == nil {
  575. return value, nil
  576. } else if errors.Is(err, strconv.ErrRange) {
  577. // Infinity, etc.
  578. return value, nil
  579. }
  580. // TODO(steve): Fix as this is assigning to err so we know the type.
  581. // Need to understand what this was trying to do?
  582. err = parseIntErr
  583. if errors.Is(err, strconv.ErrRange) {
  584. if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') {
  585. // Could just be a very large number (e.g. 0x8000000000000000)
  586. var value float64
  587. literal = literal[2:]
  588. for _, chr := range literal {
  589. digit := digitValue(chr)
  590. if digit >= 16 {
  591. return nil, fmt.Errorf("illegal numeric literal: %v (>= 16)", digit)
  592. }
  593. value = value*16 + float64(digit)
  594. }
  595. return value, nil
  596. }
  597. }
  598. return nil, errors.New("illegal numeric literal")
  599. }
  600. func parseStringLiteral(literal string) (string, error) {
  601. // Best case scenario...
  602. if literal == "" {
  603. return "", nil
  604. }
  605. // Slightly less-best case scenario...
  606. if !strings.ContainsRune(literal, '\\') {
  607. return literal, nil
  608. }
  609. str := literal
  610. buffer := bytes.NewBuffer(make([]byte, 0, 3*len(literal)/2))
  611. for len(str) > 0 {
  612. switch chr := str[0]; {
  613. // We do not explicitly handle the case of the quote
  614. // value, which can be: " ' /
  615. // This assumes we're already passed a partially well-formed literal
  616. case chr >= utf8.RuneSelf:
  617. chr, size := utf8.DecodeRuneInString(str)
  618. buffer.WriteRune(chr)
  619. str = str[size:]
  620. continue
  621. case chr != '\\':
  622. buffer.WriteByte(chr)
  623. str = str[1:]
  624. continue
  625. }
  626. if len(str) <= 1 {
  627. panic("len(str) <= 1")
  628. }
  629. chr := str[1]
  630. var value rune
  631. if chr >= utf8.RuneSelf {
  632. str = str[1:]
  633. var size int
  634. value, size = utf8.DecodeRuneInString(str)
  635. str = str[size:] // \ + <character>
  636. } else {
  637. str = str[2:] // \<character>
  638. switch chr {
  639. case 'b':
  640. value = '\b'
  641. case 'f':
  642. value = '\f'
  643. case 'n':
  644. value = '\n'
  645. case 'r':
  646. value = '\r'
  647. case 't':
  648. value = '\t'
  649. case 'v':
  650. value = '\v'
  651. case 'x', 'u':
  652. size := 0
  653. switch chr {
  654. case 'x':
  655. size = 2
  656. case 'u':
  657. size = 4
  658. }
  659. if len(str) < size {
  660. return "", fmt.Errorf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
  661. }
  662. for j := 0; j < size; j++ {
  663. decimal, ok := hex2decimal(str[j])
  664. if !ok {
  665. return "", fmt.Errorf("invalid escape: \\%s: %q", string(chr), str[:size])
  666. }
  667. value = value<<4 | decimal
  668. }
  669. str = str[size:]
  670. if chr == 'x' {
  671. break
  672. }
  673. if value > utf8.MaxRune {
  674. panic("value > utf8.MaxRune")
  675. }
  676. case '0':
  677. if len(str) == 0 || '0' > str[0] || str[0] > '7' {
  678. value = 0
  679. break
  680. }
  681. fallthrough
  682. case '1', '2', '3', '4', '5', '6', '7':
  683. // TODO strict
  684. value = rune(chr) - '0'
  685. j := 0
  686. for ; j < 2; j++ {
  687. if len(str) < j+1 {
  688. break
  689. }
  690. chr := str[j]
  691. if '0' > chr || chr > '7' {
  692. break
  693. }
  694. decimal := rune(str[j]) - '0'
  695. value = (value << 3) | decimal
  696. }
  697. str = str[j:]
  698. case '\\':
  699. value = '\\'
  700. case '\'', '"':
  701. value = rune(chr)
  702. case '\r':
  703. if len(str) > 0 {
  704. if str[0] == '\n' {
  705. str = str[1:]
  706. }
  707. }
  708. fallthrough
  709. case '\n':
  710. continue
  711. default:
  712. value = rune(chr)
  713. }
  714. }
  715. buffer.WriteRune(value)
  716. }
  717. return buffer.String(), nil
  718. }
  719. func (p *parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
  720. offset := p.chrOffset
  721. tkn := token.NUMBER
  722. if decimalPoint {
  723. offset--
  724. p.scanMantissa(10)
  725. goto exponent
  726. }
  727. if p.chr == '0' {
  728. offset := p.chrOffset
  729. p.read()
  730. switch p.chr {
  731. case 'x', 'X':
  732. // Hexadecimal
  733. p.read()
  734. if isDigit(p.chr, 16) {
  735. p.read()
  736. } else {
  737. return token.ILLEGAL, p.str[offset:p.chrOffset]
  738. }
  739. p.scanMantissa(16)
  740. if p.chrOffset-offset <= 2 {
  741. // Only "0x" or "0X"
  742. p.error(0, "Illegal hexadecimal number")
  743. }
  744. goto hexadecimal
  745. case '.':
  746. // Float
  747. goto float
  748. default:
  749. // Octal, Float
  750. if p.chr == 'e' || p.chr == 'E' {
  751. goto exponent
  752. }
  753. p.scanMantissa(8)
  754. if p.chr == '8' || p.chr == '9' {
  755. return token.ILLEGAL, p.str[offset:p.chrOffset]
  756. }
  757. goto octal
  758. }
  759. }
  760. p.scanMantissa(10)
  761. float:
  762. if p.chr == '.' {
  763. p.read()
  764. p.scanMantissa(10)
  765. }
  766. exponent:
  767. if p.chr == 'e' || p.chr == 'E' {
  768. p.read()
  769. if p.chr == '-' || p.chr == '+' {
  770. p.read()
  771. }
  772. if isDecimalDigit(p.chr) {
  773. p.read()
  774. p.scanMantissa(10)
  775. } else {
  776. return token.ILLEGAL, p.str[offset:p.chrOffset]
  777. }
  778. }
  779. hexadecimal:
  780. octal:
  781. if isIdentifierStart(p.chr) || isDecimalDigit(p.chr) {
  782. return token.ILLEGAL, p.str[offset:p.chrOffset]
  783. }
  784. return tkn, p.str[offset:p.chrOffset]
  785. }