parse.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811
  1. package toml
  2. import (
  3. "fmt"
  4. "os"
  5. "strconv"
  6. "strings"
  7. "time"
  8. "unicode/utf8"
  9. "github.com/BurntSushi/toml/internal"
  10. )
  11. type parser struct {
  12. lx *lexer
  13. context Key // Full key for the current hash in scope.
  14. currentKey string // Base key name for everything except hashes.
  15. pos Position // Current position in the TOML file.
  16. tomlNext bool
  17. ordered []Key // List of keys in the order that they appear in the TOML data.
  18. keyInfo map[string]keyInfo // Map keyname → info about the TOML key.
  19. mapping map[string]interface{} // Map keyname → key value.
  20. implicits map[string]struct{} // Record implicit keys (e.g. "key.group.names").
  21. }
  22. type keyInfo struct {
  23. pos Position
  24. tomlType tomlType
  25. }
  26. func parse(data string) (p *parser, err error) {
  27. _, tomlNext := os.LookupEnv("BURNTSUSHI_TOML_110")
  28. defer func() {
  29. if r := recover(); r != nil {
  30. if pErr, ok := r.(ParseError); ok {
  31. pErr.input = data
  32. err = pErr
  33. return
  34. }
  35. panic(r)
  36. }
  37. }()
  38. // Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
  39. // which mangles stuff. UTF-16 BOM isn't strictly valid, but some tools add
  40. // it anyway.
  41. if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { // UTF-16
  42. data = data[2:]
  43. } else if strings.HasPrefix(data, "\xef\xbb\xbf") { // UTF-8
  44. data = data[3:]
  45. }
  46. // Examine first few bytes for NULL bytes; this probably means it's a UTF-16
  47. // file (second byte in surrogate pair being NULL). Again, do this here to
  48. // avoid having to deal with UTF-8/16 stuff in the lexer.
  49. ex := 6
  50. if len(data) < 6 {
  51. ex = len(data)
  52. }
  53. if i := strings.IndexRune(data[:ex], 0); i > -1 {
  54. return nil, ParseError{
  55. Message: "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8",
  56. Position: Position{Line: 1, Start: i, Len: 1},
  57. Line: 1,
  58. input: data,
  59. }
  60. }
  61. p = &parser{
  62. keyInfo: make(map[string]keyInfo),
  63. mapping: make(map[string]interface{}),
  64. lx: lex(data, tomlNext),
  65. ordered: make([]Key, 0),
  66. implicits: make(map[string]struct{}),
  67. tomlNext: tomlNext,
  68. }
  69. for {
  70. item := p.next()
  71. if item.typ == itemEOF {
  72. break
  73. }
  74. p.topLevel(item)
  75. }
  76. return p, nil
  77. }
  78. func (p *parser) panicErr(it item, err error) {
  79. panic(ParseError{
  80. err: err,
  81. Position: it.pos,
  82. Line: it.pos.Len,
  83. LastKey: p.current(),
  84. })
  85. }
  86. func (p *parser) panicItemf(it item, format string, v ...interface{}) {
  87. panic(ParseError{
  88. Message: fmt.Sprintf(format, v...),
  89. Position: it.pos,
  90. Line: it.pos.Len,
  91. LastKey: p.current(),
  92. })
  93. }
  94. func (p *parser) panicf(format string, v ...interface{}) {
  95. panic(ParseError{
  96. Message: fmt.Sprintf(format, v...),
  97. Position: p.pos,
  98. Line: p.pos.Line,
  99. LastKey: p.current(),
  100. })
  101. }
  102. func (p *parser) next() item {
  103. it := p.lx.nextItem()
  104. //fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.pos.Line, it.val)
  105. if it.typ == itemError {
  106. if it.err != nil {
  107. panic(ParseError{
  108. Position: it.pos,
  109. Line: it.pos.Line,
  110. LastKey: p.current(),
  111. err: it.err,
  112. })
  113. }
  114. p.panicItemf(it, "%s", it.val)
  115. }
  116. return it
  117. }
  118. func (p *parser) nextPos() item {
  119. it := p.next()
  120. p.pos = it.pos
  121. return it
  122. }
  123. func (p *parser) bug(format string, v ...interface{}) {
  124. panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
  125. }
  126. func (p *parser) expect(typ itemType) item {
  127. it := p.next()
  128. p.assertEqual(typ, it.typ)
  129. return it
  130. }
  131. func (p *parser) assertEqual(expected, got itemType) {
  132. if expected != got {
  133. p.bug("Expected '%s' but got '%s'.", expected, got)
  134. }
  135. }
  136. func (p *parser) topLevel(item item) {
  137. switch item.typ {
  138. case itemCommentStart: // # ..
  139. p.expect(itemText)
  140. case itemTableStart: // [ .. ]
  141. name := p.nextPos()
  142. var key Key
  143. for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() {
  144. key = append(key, p.keyString(name))
  145. }
  146. p.assertEqual(itemTableEnd, name.typ)
  147. p.addContext(key, false)
  148. p.setType("", tomlHash, item.pos)
  149. p.ordered = append(p.ordered, key)
  150. case itemArrayTableStart: // [[ .. ]]
  151. name := p.nextPos()
  152. var key Key
  153. for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() {
  154. key = append(key, p.keyString(name))
  155. }
  156. p.assertEqual(itemArrayTableEnd, name.typ)
  157. p.addContext(key, true)
  158. p.setType("", tomlArrayHash, item.pos)
  159. p.ordered = append(p.ordered, key)
  160. case itemKeyStart: // key = ..
  161. outerContext := p.context
  162. /// Read all the key parts (e.g. 'a' and 'b' in 'a.b')
  163. k := p.nextPos()
  164. var key Key
  165. for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
  166. key = append(key, p.keyString(k))
  167. }
  168. p.assertEqual(itemKeyEnd, k.typ)
  169. /// The current key is the last part.
  170. p.currentKey = key[len(key)-1]
  171. /// All the other parts (if any) are the context; need to set each part
  172. /// as implicit.
  173. context := key[:len(key)-1]
  174. for i := range context {
  175. p.addImplicitContext(append(p.context, context[i:i+1]...))
  176. }
  177. p.ordered = append(p.ordered, p.context.add(p.currentKey))
  178. /// Set value.
  179. vItem := p.next()
  180. val, typ := p.value(vItem, false)
  181. p.set(p.currentKey, val, typ, vItem.pos)
  182. /// Remove the context we added (preserving any context from [tbl] lines).
  183. p.context = outerContext
  184. p.currentKey = ""
  185. default:
  186. p.bug("Unexpected type at top level: %s", item.typ)
  187. }
  188. }
  189. // Gets a string for a key (or part of a key in a table name).
  190. func (p *parser) keyString(it item) string {
  191. switch it.typ {
  192. case itemText:
  193. return it.val
  194. case itemString, itemMultilineString,
  195. itemRawString, itemRawMultilineString:
  196. s, _ := p.value(it, false)
  197. return s.(string)
  198. default:
  199. p.bug("Unexpected key type: %s", it.typ)
  200. }
  201. panic("unreachable")
  202. }
  203. var datetimeRepl = strings.NewReplacer(
  204. "z", "Z",
  205. "t", "T",
  206. " ", "T")
  207. // value translates an expected value from the lexer into a Go value wrapped
  208. // as an empty interface.
  209. func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
  210. switch it.typ {
  211. case itemString:
  212. return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
  213. case itemMultilineString:
  214. return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
  215. case itemRawString:
  216. return it.val, p.typeOfPrimitive(it)
  217. case itemRawMultilineString:
  218. return stripFirstNewline(it.val), p.typeOfPrimitive(it)
  219. case itemInteger:
  220. return p.valueInteger(it)
  221. case itemFloat:
  222. return p.valueFloat(it)
  223. case itemBool:
  224. switch it.val {
  225. case "true":
  226. return true, p.typeOfPrimitive(it)
  227. case "false":
  228. return false, p.typeOfPrimitive(it)
  229. default:
  230. p.bug("Expected boolean value, but got '%s'.", it.val)
  231. }
  232. case itemDatetime:
  233. return p.valueDatetime(it)
  234. case itemArray:
  235. return p.valueArray(it)
  236. case itemInlineTableStart:
  237. return p.valueInlineTable(it, parentIsArray)
  238. default:
  239. p.bug("Unexpected value type: %s", it.typ)
  240. }
  241. panic("unreachable")
  242. }
  243. func (p *parser) valueInteger(it item) (interface{}, tomlType) {
  244. if !numUnderscoresOK(it.val) {
  245. p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val)
  246. }
  247. if numHasLeadingZero(it.val) {
  248. p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val)
  249. }
  250. num, err := strconv.ParseInt(it.val, 0, 64)
  251. if err != nil {
  252. // Distinguish integer values. Normally, it'd be a bug if the lexer
  253. // provides an invalid integer, but it's possible that the number is
  254. // out of range of valid values (which the lexer cannot determine).
  255. // So mark the former as a bug but the latter as a legitimate user
  256. // error.
  257. if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
  258. p.panicErr(it, errParseRange{i: it.val, size: "int64"})
  259. } else {
  260. p.bug("Expected integer value, but got '%s'.", it.val)
  261. }
  262. }
  263. return num, p.typeOfPrimitive(it)
  264. }
  265. func (p *parser) valueFloat(it item) (interface{}, tomlType) {
  266. parts := strings.FieldsFunc(it.val, func(r rune) bool {
  267. switch r {
  268. case '.', 'e', 'E':
  269. return true
  270. }
  271. return false
  272. })
  273. for _, part := range parts {
  274. if !numUnderscoresOK(part) {
  275. p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val)
  276. }
  277. }
  278. if len(parts) > 0 && numHasLeadingZero(parts[0]) {
  279. p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val)
  280. }
  281. if !numPeriodsOK(it.val) {
  282. // As a special case, numbers like '123.' or '1.e2',
  283. // which are valid as far as Go/strconv are concerned,
  284. // must be rejected because TOML says that a fractional
  285. // part consists of '.' followed by 1+ digits.
  286. p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val)
  287. }
  288. val := strings.Replace(it.val, "_", "", -1)
  289. if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does.
  290. val = "nan"
  291. }
  292. num, err := strconv.ParseFloat(val, 64)
  293. if err != nil {
  294. if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
  295. p.panicErr(it, errParseRange{i: it.val, size: "float64"})
  296. } else {
  297. p.panicItemf(it, "Invalid float value: %q", it.val)
  298. }
  299. }
  300. return num, p.typeOfPrimitive(it)
  301. }
  302. var dtTypes = []struct {
  303. fmt string
  304. zone *time.Location
  305. next bool
  306. }{
  307. {time.RFC3339Nano, time.Local, false},
  308. {"2006-01-02T15:04:05.999999999", internal.LocalDatetime, false},
  309. {"2006-01-02", internal.LocalDate, false},
  310. {"15:04:05.999999999", internal.LocalTime, false},
  311. // tomlNext
  312. {"2006-01-02T15:04Z07:00", time.Local, true},
  313. {"2006-01-02T15:04", internal.LocalDatetime, true},
  314. {"15:04", internal.LocalTime, true},
  315. }
  316. func (p *parser) valueDatetime(it item) (interface{}, tomlType) {
  317. it.val = datetimeRepl.Replace(it.val)
  318. var (
  319. t time.Time
  320. ok bool
  321. err error
  322. )
  323. for _, dt := range dtTypes {
  324. if dt.next && !p.tomlNext {
  325. continue
  326. }
  327. t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
  328. if err == nil {
  329. ok = true
  330. break
  331. }
  332. }
  333. if !ok {
  334. p.panicItemf(it, "Invalid TOML Datetime: %q.", it.val)
  335. }
  336. return t, p.typeOfPrimitive(it)
  337. }
  338. func (p *parser) valueArray(it item) (interface{}, tomlType) {
  339. p.setType(p.currentKey, tomlArray, it.pos)
  340. var (
  341. types []tomlType
  342. // Initialize to a non-nil empty slice. This makes it consistent with
  343. // how S = [] decodes into a non-nil slice inside something like struct
  344. // { S []string }. See #338
  345. array = []interface{}{}
  346. )
  347. for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
  348. if it.typ == itemCommentStart {
  349. p.expect(itemText)
  350. continue
  351. }
  352. val, typ := p.value(it, true)
  353. array = append(array, val)
  354. types = append(types, typ)
  355. // XXX: types isn't used here, we need it to record the accurate type
  356. // information.
  357. //
  358. // Not entirely sure how to best store this; could use "key[0]",
  359. // "key[1]" notation, or maybe store it on the Array type?
  360. _ = types
  361. }
  362. return array, tomlArray
  363. }
  364. func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) {
  365. var (
  366. hash = make(map[string]interface{})
  367. outerContext = p.context
  368. outerKey = p.currentKey
  369. )
  370. p.context = append(p.context, p.currentKey)
  371. prevContext := p.context
  372. p.currentKey = ""
  373. p.addImplicit(p.context)
  374. p.addContext(p.context, parentIsArray)
  375. /// Loop over all table key/value pairs.
  376. for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
  377. if it.typ == itemCommentStart {
  378. p.expect(itemText)
  379. continue
  380. }
  381. /// Read all key parts.
  382. k := p.nextPos()
  383. var key Key
  384. for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
  385. key = append(key, p.keyString(k))
  386. }
  387. p.assertEqual(itemKeyEnd, k.typ)
  388. /// The current key is the last part.
  389. p.currentKey = key[len(key)-1]
  390. /// All the other parts (if any) are the context; need to set each part
  391. /// as implicit.
  392. context := key[:len(key)-1]
  393. for i := range context {
  394. p.addImplicitContext(append(p.context, context[i:i+1]...))
  395. }
  396. p.ordered = append(p.ordered, p.context.add(p.currentKey))
  397. /// Set the value.
  398. val, typ := p.value(p.next(), false)
  399. p.set(p.currentKey, val, typ, it.pos)
  400. hash[p.currentKey] = val
  401. /// Restore context.
  402. p.context = prevContext
  403. }
  404. p.context = outerContext
  405. p.currentKey = outerKey
  406. return hash, tomlHash
  407. }
  408. // numHasLeadingZero checks if this number has leading zeroes, allowing for '0',
  409. // +/- signs, and base prefixes.
  410. func numHasLeadingZero(s string) bool {
  411. if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') { // Allow 0b, 0o, 0x
  412. return true
  413. }
  414. if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' {
  415. return true
  416. }
  417. return false
  418. }
  419. // numUnderscoresOK checks whether each underscore in s is surrounded by
  420. // characters that are not underscores.
  421. func numUnderscoresOK(s string) bool {
  422. switch s {
  423. case "nan", "+nan", "-nan", "inf", "-inf", "+inf":
  424. return true
  425. }
  426. accept := false
  427. for _, r := range s {
  428. if r == '_' {
  429. if !accept {
  430. return false
  431. }
  432. }
  433. // isHexadecimal is a superset of all the permissable characters
  434. // surrounding an underscore.
  435. accept = isHexadecimal(r)
  436. }
  437. return accept
  438. }
  439. // numPeriodsOK checks whether every period in s is followed by a digit.
  440. func numPeriodsOK(s string) bool {
  441. period := false
  442. for _, r := range s {
  443. if period && !isDigit(r) {
  444. return false
  445. }
  446. period = r == '.'
  447. }
  448. return !period
  449. }
  450. // Set the current context of the parser, where the context is either a hash or
  451. // an array of hashes, depending on the value of the `array` parameter.
  452. //
  453. // Establishing the context also makes sure that the key isn't a duplicate, and
  454. // will create implicit hashes automatically.
  455. func (p *parser) addContext(key Key, array bool) {
  456. var ok bool
  457. // Always start at the top level and drill down for our context.
  458. hashContext := p.mapping
  459. keyContext := make(Key, 0)
  460. // We only need implicit hashes for key[0:-1]
  461. for _, k := range key[0 : len(key)-1] {
  462. _, ok = hashContext[k]
  463. keyContext = append(keyContext, k)
  464. // No key? Make an implicit hash and move on.
  465. if !ok {
  466. p.addImplicit(keyContext)
  467. hashContext[k] = make(map[string]interface{})
  468. }
  469. // If the hash context is actually an array of tables, then set
  470. // the hash context to the last element in that array.
  471. //
  472. // Otherwise, it better be a table, since this MUST be a key group (by
  473. // virtue of it not being the last element in a key).
  474. switch t := hashContext[k].(type) {
  475. case []map[string]interface{}:
  476. hashContext = t[len(t)-1]
  477. case map[string]interface{}:
  478. hashContext = t
  479. default:
  480. p.panicf("Key '%s' was already created as a hash.", keyContext)
  481. }
  482. }
  483. p.context = keyContext
  484. if array {
  485. // If this is the first element for this array, then allocate a new
  486. // list of tables for it.
  487. k := key[len(key)-1]
  488. if _, ok := hashContext[k]; !ok {
  489. hashContext[k] = make([]map[string]interface{}, 0, 4)
  490. }
  491. // Add a new table. But make sure the key hasn't already been used
  492. // for something else.
  493. if hash, ok := hashContext[k].([]map[string]interface{}); ok {
  494. hashContext[k] = append(hash, make(map[string]interface{}))
  495. } else {
  496. p.panicf("Key '%s' was already created and cannot be used as an array.", key)
  497. }
  498. } else {
  499. p.setValue(key[len(key)-1], make(map[string]interface{}))
  500. }
  501. p.context = append(p.context, key[len(key)-1])
  502. }
  503. // set calls setValue and setType.
  504. func (p *parser) set(key string, val interface{}, typ tomlType, pos Position) {
  505. p.setValue(key, val)
  506. p.setType(key, typ, pos)
  507. }
  508. // setValue sets the given key to the given value in the current context.
  509. // It will make sure that the key hasn't already been defined, account for
  510. // implicit key groups.
  511. func (p *parser) setValue(key string, value interface{}) {
  512. var (
  513. tmpHash interface{}
  514. ok bool
  515. hash = p.mapping
  516. keyContext Key
  517. )
  518. for _, k := range p.context {
  519. keyContext = append(keyContext, k)
  520. if tmpHash, ok = hash[k]; !ok {
  521. p.bug("Context for key '%s' has not been established.", keyContext)
  522. }
  523. switch t := tmpHash.(type) {
  524. case []map[string]interface{}:
  525. // The context is a table of hashes. Pick the most recent table
  526. // defined as the current hash.
  527. hash = t[len(t)-1]
  528. case map[string]interface{}:
  529. hash = t
  530. default:
  531. p.panicf("Key '%s' has already been defined.", keyContext)
  532. }
  533. }
  534. keyContext = append(keyContext, key)
  535. if _, ok := hash[key]; ok {
  536. // Normally redefining keys isn't allowed, but the key could have been
  537. // defined implicitly and it's allowed to be redefined concretely. (See
  538. // the `valid/implicit-and-explicit-after.toml` in toml-test)
  539. //
  540. // But we have to make sure to stop marking it as an implicit. (So that
  541. // another redefinition provokes an error.)
  542. //
  543. // Note that since it has already been defined (as a hash), we don't
  544. // want to overwrite it. So our business is done.
  545. if p.isArray(keyContext) {
  546. p.removeImplicit(keyContext)
  547. hash[key] = value
  548. return
  549. }
  550. if p.isImplicit(keyContext) {
  551. p.removeImplicit(keyContext)
  552. return
  553. }
  554. // Otherwise, we have a concrete key trying to override a previous
  555. // key, which is *always* wrong.
  556. p.panicf("Key '%s' has already been defined.", keyContext)
  557. }
  558. hash[key] = value
  559. }
  560. // setType sets the type of a particular value at a given key. It should be
  561. // called immediately AFTER setValue.
  562. //
  563. // Note that if `key` is empty, then the type given will be applied to the
  564. // current context (which is either a table or an array of tables).
  565. func (p *parser) setType(key string, typ tomlType, pos Position) {
  566. keyContext := make(Key, 0, len(p.context)+1)
  567. keyContext = append(keyContext, p.context...)
  568. if len(key) > 0 { // allow type setting for hashes
  569. keyContext = append(keyContext, key)
  570. }
  571. // Special case to make empty keys ("" = 1) work.
  572. // Without it it will set "" rather than `""`.
  573. // TODO: why is this needed? And why is this only needed here?
  574. if len(keyContext) == 0 {
  575. keyContext = Key{""}
  576. }
  577. p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos}
  578. }
  579. // Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and
  580. // "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly).
  581. func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = struct{}{} }
  582. func (p *parser) removeImplicit(key Key) { delete(p.implicits, key.String()) }
  583. func (p *parser) isImplicit(key Key) bool { _, ok := p.implicits[key.String()]; return ok }
  584. func (p *parser) isArray(key Key) bool { return p.keyInfo[key.String()].tomlType == tomlArray }
  585. func (p *parser) addImplicitContext(key Key) { p.addImplicit(key); p.addContext(key, false) }
  586. // current returns the full key name of the current context.
  587. func (p *parser) current() string {
  588. if len(p.currentKey) == 0 {
  589. return p.context.String()
  590. }
  591. if len(p.context) == 0 {
  592. return p.currentKey
  593. }
  594. return fmt.Sprintf("%s.%s", p.context, p.currentKey)
  595. }
  596. func stripFirstNewline(s string) string {
  597. if len(s) > 0 && s[0] == '\n' {
  598. return s[1:]
  599. }
  600. if len(s) > 1 && s[0] == '\r' && s[1] == '\n' {
  601. return s[2:]
  602. }
  603. return s
  604. }
  605. // stripEscapedNewlines removes whitespace after line-ending backslashes in
  606. // multiline strings.
  607. //
  608. // A line-ending backslash is an unescaped \ followed only by whitespace until
  609. // the next newline. After a line-ending backslash, all whitespace is removed
  610. // until the next non-whitespace character.
  611. func (p *parser) stripEscapedNewlines(s string) string {
  612. var b strings.Builder
  613. var i int
  614. for {
  615. ix := strings.Index(s[i:], `\`)
  616. if ix < 0 {
  617. b.WriteString(s)
  618. return b.String()
  619. }
  620. i += ix
  621. if len(s) > i+1 && s[i+1] == '\\' {
  622. // Escaped backslash.
  623. i += 2
  624. continue
  625. }
  626. // Scan until the next non-whitespace.
  627. j := i + 1
  628. whitespaceLoop:
  629. for ; j < len(s); j++ {
  630. switch s[j] {
  631. case ' ', '\t', '\r', '\n':
  632. default:
  633. break whitespaceLoop
  634. }
  635. }
  636. if j == i+1 {
  637. // Not a whitespace escape.
  638. i++
  639. continue
  640. }
  641. if !strings.Contains(s[i:j], "\n") {
  642. // This is not a line-ending backslash.
  643. // (It's a bad escape sequence, but we can let
  644. // replaceEscapes catch it.)
  645. i++
  646. continue
  647. }
  648. b.WriteString(s[:i])
  649. s = s[j:]
  650. i = 0
  651. }
  652. }
  653. func (p *parser) replaceEscapes(it item, str string) string {
  654. replaced := make([]rune, 0, len(str))
  655. s := []byte(str)
  656. r := 0
  657. for r < len(s) {
  658. if s[r] != '\\' {
  659. c, size := utf8.DecodeRune(s[r:])
  660. r += size
  661. replaced = append(replaced, c)
  662. continue
  663. }
  664. r += 1
  665. if r >= len(s) {
  666. p.bug("Escape sequence at end of string.")
  667. return ""
  668. }
  669. switch s[r] {
  670. default:
  671. p.bug("Expected valid escape code after \\, but got %q.", s[r])
  672. case ' ', '\t':
  673. p.panicItemf(it, "invalid escape: '\\%c'", s[r])
  674. case 'b':
  675. replaced = append(replaced, rune(0x0008))
  676. r += 1
  677. case 't':
  678. replaced = append(replaced, rune(0x0009))
  679. r += 1
  680. case 'n':
  681. replaced = append(replaced, rune(0x000A))
  682. r += 1
  683. case 'f':
  684. replaced = append(replaced, rune(0x000C))
  685. r += 1
  686. case 'r':
  687. replaced = append(replaced, rune(0x000D))
  688. r += 1
  689. case 'e':
  690. if p.tomlNext {
  691. replaced = append(replaced, rune(0x001B))
  692. r += 1
  693. }
  694. case '"':
  695. replaced = append(replaced, rune(0x0022))
  696. r += 1
  697. case '\\':
  698. replaced = append(replaced, rune(0x005C))
  699. r += 1
  700. case 'x':
  701. if p.tomlNext {
  702. escaped := p.asciiEscapeToUnicode(it, s[r+1:r+3])
  703. replaced = append(replaced, escaped)
  704. r += 3
  705. }
  706. case 'u':
  707. // At this point, we know we have a Unicode escape of the form
  708. // `uXXXX` at [r, r+5). (Because the lexer guarantees this
  709. // for us.)
  710. escaped := p.asciiEscapeToUnicode(it, s[r+1:r+5])
  711. replaced = append(replaced, escaped)
  712. r += 5
  713. case 'U':
  714. // At this point, we know we have a Unicode escape of the form
  715. // `uXXXX` at [r, r+9). (Because the lexer guarantees this
  716. // for us.)
  717. escaped := p.asciiEscapeToUnicode(it, s[r+1:r+9])
  718. replaced = append(replaced, escaped)
  719. r += 9
  720. }
  721. }
  722. return string(replaced)
  723. }
  724. func (p *parser) asciiEscapeToUnicode(it item, bs []byte) rune {
  725. s := string(bs)
  726. hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
  727. if err != nil {
  728. p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err)
  729. }
  730. if !utf8.ValidRune(rune(hex)) {
  731. p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s)
  732. }
  733. return rune(hex)
  734. }