lexer.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865
  1. package parser
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "regexp"
  7. "strconv"
  8. "strings"
  9. "unicode"
  10. "unicode/utf8"
  11. "github.com/robertkrimen/otto/ast"
  12. "github.com/robertkrimen/otto/file"
  13. "github.com/robertkrimen/otto/token"
  14. )
  15. type _chr struct {
  16. value rune
  17. width int
  18. }
  19. var matchIdentifier = regexp.MustCompile(`^[$_\p{L}][$_\p{L}\d}]*$`)
  20. func isDecimalDigit(chr rune) bool {
  21. return '0' <= chr && chr <= '9'
  22. }
  23. func digitValue(chr rune) int {
  24. switch {
  25. case '0' <= chr && chr <= '9':
  26. return int(chr - '0')
  27. case 'a' <= chr && chr <= 'f':
  28. return int(chr - 'a' + 10)
  29. case 'A' <= chr && chr <= 'F':
  30. return int(chr - 'A' + 10)
  31. }
  32. return 16 // Larger than any legal digit value
  33. }
  34. func isDigit(chr rune, base int) bool {
  35. return digitValue(chr) < base
  36. }
  37. func isIdentifierStart(chr rune) bool {
  38. return chr == '$' || chr == '_' || chr == '\\' ||
  39. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  40. chr >= utf8.RuneSelf && unicode.IsLetter(chr)
  41. }
  42. func isIdentifierPart(chr rune) bool {
  43. return chr == '$' || chr == '_' || chr == '\\' ||
  44. 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
  45. '0' <= chr && chr <= '9' ||
  46. chr >= utf8.RuneSelf && (unicode.IsLetter(chr) || unicode.IsDigit(chr))
  47. }
  48. func (self *_parser) scanIdentifier() (string, error) {
  49. offset := self.chrOffset
  50. parse := false
  51. for isIdentifierPart(self.chr) {
  52. if self.chr == '\\' {
  53. distance := self.chrOffset - offset
  54. self.read()
  55. if self.chr != 'u' {
  56. return "", fmt.Errorf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  57. }
  58. parse = true
  59. var value rune
  60. for j := 0; j < 4; j++ {
  61. self.read()
  62. decimal, ok := hex2decimal(byte(self.chr))
  63. if !ok {
  64. return "", fmt.Errorf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
  65. }
  66. value = value<<4 | decimal
  67. }
  68. if value == '\\' {
  69. return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
  70. } else if distance == 0 {
  71. if !isIdentifierStart(value) {
  72. return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
  73. }
  74. } else if distance > 0 {
  75. if !isIdentifierPart(value) {
  76. return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
  77. }
  78. }
  79. }
  80. self.read()
  81. }
  82. literal := string(self.str[offset:self.chrOffset])
  83. if parse {
  84. return parseStringLiteral(literal)
  85. }
  86. return literal, nil
  87. }
  88. // 7.2
  89. func isLineWhiteSpace(chr rune) bool {
  90. switch chr {
  91. case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
  92. return true
  93. case '\u000a', '\u000d', '\u2028', '\u2029':
  94. return false
  95. case '\u0085':
  96. return false
  97. }
  98. return unicode.IsSpace(chr)
  99. }
  100. // 7.3
  101. func isLineTerminator(chr rune) bool {
  102. switch chr {
  103. case '\u000a', '\u000d', '\u2028', '\u2029':
  104. return true
  105. }
  106. return false
  107. }
  108. func (self *_parser) scan() (tkn token.Token, literal string, idx file.Idx) {
  109. self.implicitSemicolon = false
  110. for {
  111. self.skipWhiteSpace()
  112. idx = self.idxOf(self.chrOffset)
  113. insertSemicolon := false
  114. switch chr := self.chr; {
  115. case isIdentifierStart(chr):
  116. var err error
  117. literal, err = self.scanIdentifier()
  118. if err != nil {
  119. tkn = token.ILLEGAL
  120. break
  121. }
  122. if len(literal) > 1 {
  123. // Keywords are longer than 1 character, avoid lookup otherwise
  124. var strict bool
  125. tkn, strict = token.IsKeyword(literal)
  126. switch tkn {
  127. case 0: // Not a keyword
  128. if literal == "true" || literal == "false" {
  129. self.insertSemicolon = true
  130. tkn = token.BOOLEAN
  131. return
  132. } else if literal == "null" {
  133. self.insertSemicolon = true
  134. tkn = token.NULL
  135. return
  136. }
  137. case token.KEYWORD:
  138. tkn = token.KEYWORD
  139. if strict {
  140. // TODO If strict and in strict mode, then this is not a break
  141. break
  142. }
  143. return
  144. case
  145. token.THIS,
  146. token.BREAK,
  147. token.THROW, // A newline after a throw is not allowed, but we need to detect it
  148. token.RETURN,
  149. token.CONTINUE,
  150. token.DEBUGGER:
  151. self.insertSemicolon = true
  152. return
  153. default:
  154. return
  155. }
  156. }
  157. self.insertSemicolon = true
  158. tkn = token.IDENTIFIER
  159. return
  160. case '0' <= chr && chr <= '9':
  161. self.insertSemicolon = true
  162. tkn, literal = self.scanNumericLiteral(false)
  163. return
  164. default:
  165. self.read()
  166. switch chr {
  167. case -1:
  168. if self.insertSemicolon {
  169. self.insertSemicolon = false
  170. self.implicitSemicolon = true
  171. }
  172. tkn = token.EOF
  173. case '\r', '\n', '\u2028', '\u2029':
  174. self.insertSemicolon = false
  175. self.implicitSemicolon = true
  176. self.comments.AtLineBreak()
  177. continue
  178. case ':':
  179. tkn = token.COLON
  180. case '.':
  181. if digitValue(self.chr) < 10 {
  182. insertSemicolon = true
  183. tkn, literal = self.scanNumericLiteral(true)
  184. } else {
  185. tkn = token.PERIOD
  186. }
  187. case ',':
  188. tkn = token.COMMA
  189. case ';':
  190. tkn = token.SEMICOLON
  191. case '(':
  192. tkn = token.LEFT_PARENTHESIS
  193. case ')':
  194. tkn = token.RIGHT_PARENTHESIS
  195. insertSemicolon = true
  196. case '[':
  197. tkn = token.LEFT_BRACKET
  198. case ']':
  199. tkn = token.RIGHT_BRACKET
  200. insertSemicolon = true
  201. case '{':
  202. tkn = token.LEFT_BRACE
  203. case '}':
  204. tkn = token.RIGHT_BRACE
  205. insertSemicolon = true
  206. case '+':
  207. tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
  208. if tkn == token.INCREMENT {
  209. insertSemicolon = true
  210. }
  211. case '-':
  212. tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
  213. if tkn == token.DECREMENT {
  214. insertSemicolon = true
  215. }
  216. case '*':
  217. tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
  218. case '/':
  219. if self.chr == '/' {
  220. if self.mode&StoreComments != 0 {
  221. literal := string(self.readSingleLineComment())
  222. self.comments.AddComment(ast.NewComment(literal, self.idx))
  223. continue
  224. }
  225. self.skipSingleLineComment()
  226. continue
  227. } else if self.chr == '*' {
  228. if self.mode&StoreComments != 0 {
  229. literal = string(self.readMultiLineComment())
  230. self.comments.AddComment(ast.NewComment(literal, self.idx))
  231. continue
  232. }
  233. self.skipMultiLineComment()
  234. continue
  235. } else {
  236. // Could be division, could be RegExp literal
  237. tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
  238. insertSemicolon = true
  239. }
  240. case '%':
  241. tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
  242. case '^':
  243. tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
  244. case '<':
  245. tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
  246. case '>':
  247. tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
  248. case '=':
  249. tkn = self.switch2(token.ASSIGN, token.EQUAL)
  250. if tkn == token.EQUAL && self.chr == '=' {
  251. self.read()
  252. tkn = token.STRICT_EQUAL
  253. }
  254. case '!':
  255. tkn = self.switch2(token.NOT, token.NOT_EQUAL)
  256. if tkn == token.NOT_EQUAL && self.chr == '=' {
  257. self.read()
  258. tkn = token.STRICT_NOT_EQUAL
  259. }
  260. case '&':
  261. if self.chr == '^' {
  262. self.read()
  263. tkn = self.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
  264. } else {
  265. tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
  266. }
  267. case '|':
  268. tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
  269. case '~':
  270. tkn = token.BITWISE_NOT
  271. case '?':
  272. tkn = token.QUESTION_MARK
  273. case '"', '\'':
  274. insertSemicolon = true
  275. tkn = token.STRING
  276. var err error
  277. literal, err = self.scanString(self.chrOffset - 1)
  278. if err != nil {
  279. tkn = token.ILLEGAL
  280. }
  281. default:
  282. self.errorUnexpected(idx, chr)
  283. tkn = token.ILLEGAL
  284. }
  285. }
  286. self.insertSemicolon = insertSemicolon
  287. return
  288. }
  289. }
  290. func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
  291. if self.chr == '=' {
  292. self.read()
  293. return tkn1
  294. }
  295. return tkn0
  296. }
  297. func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
  298. if self.chr == '=' {
  299. self.read()
  300. return tkn1
  301. }
  302. if self.chr == chr2 {
  303. self.read()
  304. return tkn2
  305. }
  306. return tkn0
  307. }
  308. func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
  309. if self.chr == '=' {
  310. self.read()
  311. return tkn1
  312. }
  313. if self.chr == chr2 {
  314. self.read()
  315. if self.chr == '=' {
  316. self.read()
  317. return tkn3
  318. }
  319. return tkn2
  320. }
  321. return tkn0
  322. }
  323. func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
  324. if self.chr == '=' {
  325. self.read()
  326. return tkn1
  327. }
  328. if self.chr == chr2 {
  329. self.read()
  330. if self.chr == '=' {
  331. self.read()
  332. return tkn3
  333. }
  334. if self.chr == chr3 {
  335. self.read()
  336. if self.chr == '=' {
  337. self.read()
  338. return tkn5
  339. }
  340. return tkn4
  341. }
  342. return tkn2
  343. }
  344. return tkn0
  345. }
  346. func (self *_parser) chrAt(index int) _chr {
  347. value, width := utf8.DecodeRuneInString(self.str[index:])
  348. return _chr{
  349. value: value,
  350. width: width,
  351. }
  352. }
  353. func (self *_parser) _peek() rune {
  354. if self.offset+1 < self.length {
  355. return rune(self.str[self.offset+1])
  356. }
  357. return -1
  358. }
  359. func (self *_parser) read() {
  360. if self.offset < self.length {
  361. self.chrOffset = self.offset
  362. chr, width := rune(self.str[self.offset]), 1
  363. if chr >= utf8.RuneSelf { // !ASCII
  364. chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
  365. if chr == utf8.RuneError && width == 1 {
  366. self.error(self.chrOffset, "Invalid UTF-8 character")
  367. }
  368. }
  369. self.offset += width
  370. self.chr = chr
  371. } else {
  372. self.chrOffset = self.length
  373. self.chr = -1 // EOF
  374. }
  375. }
  376. // This is here since the functions are so similar
  377. func (self *_RegExp_parser) read() {
  378. if self.offset < self.length {
  379. self.chrOffset = self.offset
  380. chr, width := rune(self.str[self.offset]), 1
  381. if chr >= utf8.RuneSelf { // !ASCII
  382. chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
  383. if chr == utf8.RuneError && width == 1 {
  384. self.error(self.chrOffset, "Invalid UTF-8 character")
  385. }
  386. }
  387. self.offset += width
  388. self.chr = chr
  389. } else {
  390. self.chrOffset = self.length
  391. self.chr = -1 // EOF
  392. }
  393. }
  394. func (self *_parser) readSingleLineComment() (result []rune) {
  395. for self.chr != -1 {
  396. self.read()
  397. if isLineTerminator(self.chr) {
  398. return
  399. }
  400. result = append(result, self.chr)
  401. }
  402. // Get rid of the trailing -1
  403. result = result[:len(result)-1]
  404. return
  405. }
  406. func (self *_parser) readMultiLineComment() (result []rune) {
  407. self.read()
  408. for self.chr >= 0 {
  409. chr := self.chr
  410. self.read()
  411. if chr == '*' && self.chr == '/' {
  412. self.read()
  413. return
  414. }
  415. result = append(result, chr)
  416. }
  417. self.errorUnexpected(0, self.chr)
  418. return
  419. }
  420. func (self *_parser) skipSingleLineComment() {
  421. for self.chr != -1 {
  422. self.read()
  423. if isLineTerminator(self.chr) {
  424. return
  425. }
  426. }
  427. }
  428. func (self *_parser) skipMultiLineComment() {
  429. self.read()
  430. for self.chr >= 0 {
  431. chr := self.chr
  432. self.read()
  433. if chr == '*' && self.chr == '/' {
  434. self.read()
  435. return
  436. }
  437. }
  438. self.errorUnexpected(0, self.chr)
  439. }
  440. func (self *_parser) skipWhiteSpace() {
  441. for {
  442. switch self.chr {
  443. case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
  444. self.read()
  445. continue
  446. case '\r':
  447. if self._peek() == '\n' {
  448. self.comments.AtLineBreak()
  449. self.read()
  450. }
  451. fallthrough
  452. case '\u2028', '\u2029', '\n':
  453. if self.insertSemicolon {
  454. return
  455. }
  456. self.comments.AtLineBreak()
  457. self.read()
  458. continue
  459. }
  460. if self.chr >= utf8.RuneSelf {
  461. if unicode.IsSpace(self.chr) {
  462. self.read()
  463. continue
  464. }
  465. }
  466. break
  467. }
  468. }
  469. func (self *_parser) skipLineWhiteSpace() {
  470. for isLineWhiteSpace(self.chr) {
  471. self.read()
  472. }
  473. }
  474. func (self *_parser) scanMantissa(base int) {
  475. for digitValue(self.chr) < base {
  476. self.read()
  477. }
  478. }
  479. func (self *_parser) scanEscape(quote rune) {
  480. var length, base uint32
  481. switch self.chr {
  482. //case '0', '1', '2', '3', '4', '5', '6', '7':
  483. // Octal:
  484. // length, base, limit = 3, 8, 255
  485. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'', '0':
  486. self.read()
  487. return
  488. case '\r', '\n', '\u2028', '\u2029':
  489. self.scanNewline()
  490. return
  491. case 'x':
  492. self.read()
  493. length, base = 2, 16
  494. case 'u':
  495. self.read()
  496. length, base = 4, 16
  497. default:
  498. self.read() // Always make progress
  499. return
  500. }
  501. var value uint32
  502. for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
  503. digit := uint32(digitValue(self.chr))
  504. if digit >= base {
  505. break
  506. }
  507. value = value*base + digit
  508. self.read()
  509. }
  510. }
  511. func (self *_parser) scanString(offset int) (string, error) {
  512. // " ' /
  513. quote := rune(self.str[offset])
  514. for self.chr != quote {
  515. chr := self.chr
  516. if chr == '\n' || chr == '\r' || chr == '\u2028' || chr == '\u2029' || chr < 0 {
  517. goto newline
  518. }
  519. self.read()
  520. if chr == '\\' {
  521. if quote == '/' {
  522. if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
  523. goto newline
  524. }
  525. self.read()
  526. } else {
  527. self.scanEscape(quote)
  528. }
  529. } else if chr == '[' && quote == '/' {
  530. // Allow a slash (/) in a bracket character class ([...])
  531. // TODO Fix this, this is hacky...
  532. quote = -1
  533. } else if chr == ']' && quote == -1 {
  534. quote = '/'
  535. }
  536. }
  537. // " ' /
  538. self.read()
  539. return string(self.str[offset:self.chrOffset]), nil
  540. newline:
  541. self.scanNewline()
  542. err := "String not terminated"
  543. if quote == '/' {
  544. err = "Invalid regular expression: missing /"
  545. self.error(self.idxOf(offset), err)
  546. }
  547. return "", errors.New(err)
  548. }
  549. func (self *_parser) scanNewline() {
  550. if self.chr == '\r' {
  551. self.read()
  552. if self.chr != '\n' {
  553. return
  554. }
  555. }
  556. self.read()
  557. }
  558. func hex2decimal(chr byte) (value rune, ok bool) {
  559. {
  560. chr := rune(chr)
  561. switch {
  562. case '0' <= chr && chr <= '9':
  563. return chr - '0', true
  564. case 'a' <= chr && chr <= 'f':
  565. return chr - 'a' + 10, true
  566. case 'A' <= chr && chr <= 'F':
  567. return chr - 'A' + 10, true
  568. }
  569. return
  570. }
  571. }
  572. func parseNumberLiteral(literal string) (value interface{}, err error) {
  573. // TODO Is Uint okay? What about -MAX_UINT
  574. value, err = strconv.ParseInt(literal, 0, 64)
  575. if err == nil {
  576. return value, nil
  577. }
  578. parseIntErr := err // Save this first error, just in case
  579. value, err = strconv.ParseFloat(literal, 64)
  580. if err == nil {
  581. return value, nil
  582. } else if err.(*strconv.NumError).Err == strconv.ErrRange {
  583. // Infinity, etc.
  584. return value, nil
  585. }
  586. err = parseIntErr
  587. if err.(*strconv.NumError).Err == strconv.ErrRange {
  588. if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') {
  589. // Could just be a very large number (e.g. 0x8000000000000000)
  590. var value float64
  591. literal = literal[2:]
  592. for _, chr := range literal {
  593. digit := digitValue(chr)
  594. if digit >= 16 {
  595. return nil, errors.New("Illegal numeric literal")
  596. }
  597. value = value*16 + float64(digit)
  598. }
  599. return value, nil
  600. }
  601. }
  602. return nil, errors.New("Illegal numeric literal")
  603. }
  604. func parseStringLiteral(literal string) (string, error) {
  605. // Best case scenario...
  606. if literal == "" {
  607. return "", nil
  608. }
  609. // Slightly less-best case scenario...
  610. if !strings.ContainsRune(literal, '\\') {
  611. return literal, nil
  612. }
  613. str := literal
  614. buffer := bytes.NewBuffer(make([]byte, 0, 3*len(literal)/2))
  615. for len(str) > 0 {
  616. switch chr := str[0]; {
  617. // We do not explicitly handle the case of the quote
  618. // value, which can be: " ' /
  619. // This assumes we're already passed a partially well-formed literal
  620. case chr >= utf8.RuneSelf:
  621. chr, size := utf8.DecodeRuneInString(str)
  622. buffer.WriteRune(chr)
  623. str = str[size:]
  624. continue
  625. case chr != '\\':
  626. buffer.WriteByte(chr)
  627. str = str[1:]
  628. continue
  629. }
  630. if len(str) <= 1 {
  631. panic("len(str) <= 1")
  632. }
  633. chr := str[1]
  634. var value rune
  635. if chr >= utf8.RuneSelf {
  636. str = str[1:]
  637. var size int
  638. value, size = utf8.DecodeRuneInString(str)
  639. str = str[size:] // \ + <character>
  640. } else {
  641. str = str[2:] // \<character>
  642. switch chr {
  643. case 'b':
  644. value = '\b'
  645. case 'f':
  646. value = '\f'
  647. case 'n':
  648. value = '\n'
  649. case 'r':
  650. value = '\r'
  651. case 't':
  652. value = '\t'
  653. case 'v':
  654. value = '\v'
  655. case 'x', 'u':
  656. size := 0
  657. switch chr {
  658. case 'x':
  659. size = 2
  660. case 'u':
  661. size = 4
  662. }
  663. if len(str) < size {
  664. return "", fmt.Errorf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
  665. }
  666. for j := 0; j < size; j++ {
  667. decimal, ok := hex2decimal(str[j])
  668. if !ok {
  669. return "", fmt.Errorf("invalid escape: \\%s: %q", string(chr), str[:size])
  670. }
  671. value = value<<4 | decimal
  672. }
  673. str = str[size:]
  674. if chr == 'x' {
  675. break
  676. }
  677. if value > utf8.MaxRune {
  678. panic("value > utf8.MaxRune")
  679. }
  680. case '0':
  681. if len(str) == 0 || '0' > str[0] || str[0] > '7' {
  682. value = 0
  683. break
  684. }
  685. fallthrough
  686. case '1', '2', '3', '4', '5', '6', '7':
  687. // TODO strict
  688. value = rune(chr) - '0'
  689. j := 0
  690. for ; j < 2; j++ {
  691. if len(str) < j+1 {
  692. break
  693. }
  694. chr := str[j]
  695. if '0' > chr || chr > '7' {
  696. break
  697. }
  698. decimal := rune(str[j]) - '0'
  699. value = (value << 3) | decimal
  700. }
  701. str = str[j:]
  702. case '\\':
  703. value = '\\'
  704. case '\'', '"':
  705. value = rune(chr)
  706. case '\r':
  707. if len(str) > 0 {
  708. if str[0] == '\n' {
  709. str = str[1:]
  710. }
  711. }
  712. fallthrough
  713. case '\n':
  714. continue
  715. default:
  716. value = rune(chr)
  717. }
  718. }
  719. buffer.WriteRune(value)
  720. }
  721. return buffer.String(), nil
  722. }
  723. func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
  724. offset := self.chrOffset
  725. tkn := token.NUMBER
  726. if decimalPoint {
  727. offset--
  728. self.scanMantissa(10)
  729. goto exponent
  730. }
  731. if self.chr == '0' {
  732. offset := self.chrOffset
  733. self.read()
  734. if self.chr == 'x' || self.chr == 'X' {
  735. // Hexadecimal
  736. self.read()
  737. if isDigit(self.chr, 16) {
  738. self.read()
  739. } else {
  740. return token.ILLEGAL, self.str[offset:self.chrOffset]
  741. }
  742. self.scanMantissa(16)
  743. if self.chrOffset-offset <= 2 {
  744. // Only "0x" or "0X"
  745. self.error(0, "Illegal hexadecimal number")
  746. }
  747. goto hexadecimal
  748. } else if self.chr == '.' {
  749. // Float
  750. goto float
  751. } else {
  752. // Octal, Float
  753. if self.chr == 'e' || self.chr == 'E' {
  754. goto exponent
  755. }
  756. self.scanMantissa(8)
  757. if self.chr == '8' || self.chr == '9' {
  758. return token.ILLEGAL, self.str[offset:self.chrOffset]
  759. }
  760. goto octal
  761. }
  762. }
  763. self.scanMantissa(10)
  764. float:
  765. if self.chr == '.' {
  766. self.read()
  767. self.scanMantissa(10)
  768. }
  769. exponent:
  770. if self.chr == 'e' || self.chr == 'E' {
  771. self.read()
  772. if self.chr == '-' || self.chr == '+' {
  773. self.read()
  774. }
  775. if isDecimalDigit(self.chr) {
  776. self.read()
  777. self.scanMantissa(10)
  778. } else {
  779. return token.ILLEGAL, self.str[offset:self.chrOffset]
  780. }
  781. }
  782. hexadecimal:
  783. octal:
  784. if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) {
  785. return token.ILLEGAL, self.str[offset:self.chrOffset]
  786. }
  787. return tkn, self.str[offset:self.chrOffset]
  788. }