parser.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. /*
  2. Package parser implements a parser for JavaScript.
  3. import (
  4. "github.com/robertkrimen/otto/parser"
  5. )
  6. Parse and return an AST
  7. filename := "" // A filename is optional
  8. src := `
  9. // Sample xyzzy example
  10. (function(){
  11. if (3.14159 > 0) {
  12. console.log("Hello, World.");
  13. return;
  14. }
  15. var xyzzy = NaN;
  16. console.log("Nothing happens.");
  17. return xyzzy;
  18. })();
  19. `
  20. // Parse some JavaScript, yielding a *ast.Program and/or an ErrorList
  21. program, err := parser.ParseFile(nil, filename, src, 0)
  22. # Warning
  23. The parser and AST interfaces are still works-in-progress (particularly where
  24. node types are concerned) and may change in the future.
  25. */
  26. package parser
  27. import (
  28. "bytes"
  29. "encoding/base64"
  30. "fmt"
  31. "io"
  32. "os"
  33. "github.com/robertkrimen/otto/ast"
  34. "github.com/robertkrimen/otto/file"
  35. "github.com/robertkrimen/otto/token"
  36. "gopkg.in/sourcemap.v1"
  37. )
  38. // A Mode value is a set of flags (or 0). They control optional parser functionality.
  39. type Mode uint
  40. const (
  41. // IgnoreRegExpErrors ignores RegExp compatibility errors (allow backtracking).
  42. IgnoreRegExpErrors Mode = 1 << iota
  43. // StoreComments stores the comments from source to the comments map.
  44. StoreComments
  45. )
  46. type parser struct { //nolint: maligned
  47. str string
  48. length int
  49. base int
  50. chr rune // The current character
  51. chrOffset int // The offset of current character
  52. offset int // The offset after current character (may be greater than 1)
  53. idx file.Idx // The index of token
  54. token token.Token // The token
  55. literal string // The literal of the token, if any
  56. scope *scope
  57. insertSemicolon bool // If we see a newline, then insert an implicit semicolon
  58. implicitSemicolon bool // An implicit semicolon exists
  59. errors ErrorList
  60. recover struct {
  61. // Scratch when trying to seek to the next statement, etc.
  62. idx file.Idx
  63. count int
  64. }
  65. mode Mode
  66. file *file.File
  67. comments *ast.Comments
  68. }
  69. // Parser is implemented by types which can parse JavaScript Code.
  70. type Parser interface {
  71. Scan() (tkn token.Token, literal string, idx file.Idx)
  72. }
  73. func newParser(filename, src string, base int, sm *sourcemap.Consumer) *parser {
  74. return &parser{
  75. chr: ' ', // This is set so we can start scanning by skipping whitespace
  76. str: src,
  77. length: len(src),
  78. base: base,
  79. file: file.NewFile(filename, src, base).WithSourceMap(sm),
  80. comments: ast.NewComments(),
  81. }
  82. }
  83. // NewParser returns a new Parser.
  84. func NewParser(filename, src string) Parser {
  85. return newParser(filename, src, 1, nil)
  86. }
  87. // ReadSource reads code from src if not nil, otherwise reads from filename.
  88. func ReadSource(filename string, src interface{}) ([]byte, error) {
  89. if src != nil {
  90. switch src := src.(type) {
  91. case string:
  92. return []byte(src), nil
  93. case []byte:
  94. return src, nil
  95. case *bytes.Buffer:
  96. if src != nil {
  97. return src.Bytes(), nil
  98. }
  99. case io.Reader:
  100. var bfr bytes.Buffer
  101. if _, err := io.Copy(&bfr, src); err != nil {
  102. return nil, err
  103. }
  104. return bfr.Bytes(), nil
  105. default:
  106. return nil, fmt.Errorf("invalid src type %T", src)
  107. }
  108. }
  109. return os.ReadFile(filename) //nolint: gosec
  110. }
  111. // ReadSourceMap reads the source map from src if not nil, otherwise is a noop.
  112. func ReadSourceMap(filename string, src interface{}) (*sourcemap.Consumer, error) {
  113. if src == nil {
  114. return nil, nil //nolint: nilnil
  115. }
  116. switch src := src.(type) {
  117. case string:
  118. return sourcemap.Parse(filename, []byte(src))
  119. case []byte:
  120. return sourcemap.Parse(filename, src)
  121. case *bytes.Buffer:
  122. return sourcemap.Parse(filename, src.Bytes())
  123. case io.Reader:
  124. var bfr bytes.Buffer
  125. if _, err := io.Copy(&bfr, src); err != nil {
  126. return nil, err
  127. }
  128. return sourcemap.Parse(filename, bfr.Bytes())
  129. case *sourcemap.Consumer:
  130. return src, nil
  131. default:
  132. return nil, fmt.Errorf("invalid sourcemap type %T", src)
  133. }
  134. }
  135. // ParseFileWithSourceMap parses the sourcemap returning the resulting Program.
  136. func ParseFileWithSourceMap(fileSet *file.FileSet, filename string, javascriptSource, sourcemapSource interface{}, mode Mode) (*ast.Program, error) {
  137. src, err := ReadSource(filename, javascriptSource)
  138. if err != nil {
  139. return nil, err
  140. }
  141. if sourcemapSource == nil {
  142. lines := bytes.Split(src, []byte("\n"))
  143. lastLine := lines[len(lines)-1]
  144. if bytes.HasPrefix(lastLine, []byte("//# sourceMappingURL=data:application/json")) {
  145. bits := bytes.SplitN(lastLine, []byte(","), 2)
  146. if len(bits) == 2 {
  147. if d, err := base64.StdEncoding.DecodeString(string(bits[1])); err == nil {
  148. sourcemapSource = d
  149. }
  150. }
  151. }
  152. }
  153. sm, err := ReadSourceMap(filename, sourcemapSource)
  154. if err != nil {
  155. return nil, err
  156. }
  157. base := 1
  158. if fileSet != nil {
  159. base = fileSet.AddFile(filename, string(src))
  160. }
  161. p := newParser(filename, string(src), base, sm)
  162. p.mode = mode
  163. program, err := p.parse()
  164. program.Comments = p.comments.CommentMap
  165. return program, err
  166. }
  167. // ParseFile parses the source code of a single JavaScript/ECMAScript source file and returns
  168. // the corresponding ast.Program node.
  169. //
  170. // If fileSet == nil, ParseFile parses source without a FileSet.
  171. // If fileSet != nil, ParseFile first adds filename and src to fileSet.
  172. //
  173. // The filename argument is optional and is used for labelling errors, etc.
  174. //
  175. // src may be a string, a byte slice, a bytes.Buffer, or an io.Reader, but it MUST always be in UTF-8.
  176. //
  177. // // Parse some JavaScript, yielding a *ast.Program and/or an ErrorList
  178. // program, err := parser.ParseFile(nil, "", `if (abc > 1) {}`, 0)
  179. func ParseFile(fileSet *file.FileSet, filename string, src interface{}, mode Mode) (*ast.Program, error) {
  180. return ParseFileWithSourceMap(fileSet, filename, src, nil, mode)
  181. }
  182. // ParseFunction parses a given parameter list and body as a function and returns the
  183. // corresponding ast.FunctionLiteral node.
  184. //
  185. // The parameter list, if any, should be a comma-separated list of identifiers.
  186. func ParseFunction(parameterList, body string) (*ast.FunctionLiteral, error) {
  187. src := "(function(" + parameterList + ") {\n" + body + "\n})"
  188. p := newParser("", src, 1, nil)
  189. program, err := p.parse()
  190. if err != nil {
  191. return nil, err
  192. }
  193. return program.Body[0].(*ast.ExpressionStatement).Expression.(*ast.FunctionLiteral), nil
  194. }
  195. // Scan reads a single token from the source at the current offset, increments the offset and
  196. // returns the token.Token token, a string literal representing the value of the token (if applicable)
  197. // and it's current file.Idx index.
  198. func (p *parser) Scan() (token.Token, string, file.Idx) {
  199. return p.scan()
  200. }
  201. func (p *parser) slice(idx0, idx1 file.Idx) string {
  202. from := int(idx0) - p.base
  203. to := int(idx1) - p.base
  204. if from >= 0 && to <= len(p.str) {
  205. return p.str[from:to]
  206. }
  207. return ""
  208. }
  209. func (p *parser) parse() (*ast.Program, error) {
  210. p.next()
  211. program := p.parseProgram()
  212. if false {
  213. p.errors.Sort()
  214. }
  215. if p.mode&StoreComments != 0 {
  216. p.comments.CommentMap.AddComments(program, p.comments.FetchAll(), ast.TRAILING)
  217. }
  218. return program, p.errors.Err()
  219. }
  220. func (p *parser) next() {
  221. p.token, p.literal, p.idx = p.scan()
  222. }
  223. func (p *parser) optionalSemicolon() {
  224. if p.token == token.SEMICOLON {
  225. p.next()
  226. return
  227. }
  228. if p.implicitSemicolon {
  229. p.implicitSemicolon = false
  230. return
  231. }
  232. if p.token != token.EOF && p.token != token.RIGHT_BRACE {
  233. p.expect(token.SEMICOLON)
  234. }
  235. }
  236. func (p *parser) semicolon() {
  237. if p.token != token.RIGHT_PARENTHESIS && p.token != token.RIGHT_BRACE {
  238. if p.implicitSemicolon {
  239. p.implicitSemicolon = false
  240. return
  241. }
  242. p.expect(token.SEMICOLON)
  243. }
  244. }
  245. func (p *parser) idxOf(offset int) file.Idx {
  246. return file.Idx(p.base + offset)
  247. }
  248. func (p *parser) expect(value token.Token) file.Idx {
  249. idx := p.idx
  250. if p.token != value {
  251. p.errorUnexpectedToken(p.token)
  252. }
  253. p.next()
  254. return idx
  255. }
  256. func lineCount(str string) (int, int) {
  257. line, last := 0, -1
  258. pair := false
  259. for index, chr := range str {
  260. switch chr {
  261. case '\r':
  262. line++
  263. last = index
  264. pair = true
  265. continue
  266. case '\n':
  267. if !pair {
  268. line++
  269. }
  270. last = index
  271. case '\u2028', '\u2029':
  272. line++
  273. last = index + 2
  274. }
  275. pair = false
  276. }
  277. return line, last
  278. }
  279. func (p *parser) position(idx file.Idx) file.Position {
  280. position := file.Position{}
  281. offset := int(idx) - p.base
  282. str := p.str[:offset]
  283. position.Filename = p.file.Name()
  284. line, last := lineCount(str)
  285. position.Line = 1 + line
  286. if last >= 0 {
  287. position.Column = offset - last
  288. } else {
  289. position.Column = 1 + len(str)
  290. }
  291. return position
  292. }