yongxu
/
sparrow


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283
							package toml

import (
	"fmt"
	"reflect"
	"runtime"
	"strings"
	"unicode"
	"unicode/utf8"
)

type itemType int

const (
	itemError itemType = iota
	itemNIL            // used in the parser to indicate no type
	itemEOF
	itemText
	itemString
	itemRawString
	itemMultilineString
	itemRawMultilineString
	itemBool
	itemInteger
	itemFloat
	itemDatetime
	itemArray // the start of an array
	itemArrayEnd
	itemTableStart
	itemTableEnd
	itemArrayTableStart
	itemArrayTableEnd
	itemKeyStart
	itemKeyEnd
	itemCommentStart
	itemInlineTableStart
	itemInlineTableEnd
)

const eof = 0

type stateFn func(lx *lexer) stateFn

func (p Position) String() string {
	return fmt.Sprintf("at line %d; start %d; length %d", p.Line, p.Start, p.Len)
}

type lexer struct {
	input    string
	start    int
	pos      int
	line     int
	state    stateFn
	items    chan item
	tomlNext bool

	// Allow for backing up up to 4 runes. This is necessary because TOML
	// contains 3-rune tokens (""" and ''').
	prevWidths [4]int
	nprev      int  // how many of prevWidths are in use
	atEOF      bool // If we emit an eof, we can still back up, but it is not OK to call next again.

	// A stack of state functions used to maintain context.
	//
	// The idea is to reuse parts of the state machine in various places. For
	// example, values can appear at the top level or within arbitrarily nested
	// arrays. The last state on the stack is used after a value has been lexed.
	// Similarly for comments.
	stack []stateFn
}

type item struct {
	typ itemType
	val string
	err error
	pos Position
}

func (lx *lexer) nextItem() item {
	for {
		select {
		case item := <-lx.items:
			return item
		default:
			lx.state = lx.state(lx)
			//fmt.Printf("     STATE %-24s  current: %-10s	stack: %s\n", lx.state, lx.current(), lx.stack)
		}
	}
}

func lex(input string, tomlNext bool) *lexer {
	lx := &lexer{
		input:    input,
		state:    lexTop,
		items:    make(chan item, 10),
		stack:    make([]stateFn, 0, 10),
		line:     1,
		tomlNext: tomlNext,
	}
	return lx
}

func (lx *lexer) push(state stateFn) {
	lx.stack = append(lx.stack, state)
}

func (lx *lexer) pop() stateFn {
	if len(lx.stack) == 0 {
		return lx.errorf("BUG in lexer: no states to pop")
	}
	last := lx.stack[len(lx.stack)-1]
	lx.stack = lx.stack[0 : len(lx.stack)-1]
	return last
}

func (lx *lexer) current() string {
	return lx.input[lx.start:lx.pos]
}

func (lx lexer) getPos() Position {
	p := Position{
		Line:  lx.line,
		Start: lx.start,
		Len:   lx.pos - lx.start,
	}
	if p.Len <= 0 {
		p.Len = 1
	}
	return p
}

func (lx *lexer) emit(typ itemType) {
	// Needed for multiline strings ending with an incomplete UTF-8 sequence.
	if lx.start > lx.pos {
		lx.error(errLexUTF8{lx.input[lx.pos]})
		return
	}
	lx.items <- item{typ: typ, pos: lx.getPos(), val: lx.current()}
	lx.start = lx.pos
}

func (lx *lexer) emitTrim(typ itemType) {
	lx.items <- item{typ: typ, pos: lx.getPos(), val: strings.TrimSpace(lx.current())}
	lx.start = lx.pos
}

func (lx *lexer) next() (r rune) {
	if lx.atEOF {
		panic("BUG in lexer: next called after EOF")
	}
	if lx.pos >= len(lx.input) {
		lx.atEOF = true
		return eof
	}

	if lx.input[lx.pos] == '\n' {
		lx.line++
	}
	lx.prevWidths[3] = lx.prevWidths[2]
	lx.prevWidths[2] = lx.prevWidths[1]
	lx.prevWidths[1] = lx.prevWidths[0]
	if lx.nprev < 4 {
		lx.nprev++
	}

	r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
	if r == utf8.RuneError {
		lx.error(errLexUTF8{lx.input[lx.pos]})
		return utf8.RuneError
	}

	// Note: don't use peek() here, as this calls next().
	if isControl(r) || (r == '\r' && (len(lx.input)-1 == lx.pos || lx.input[lx.pos+1] != '\n')) {
		lx.errorControlChar(r)
		return utf8.RuneError
	}

	lx.prevWidths[0] = w
	lx.pos += w
	return r
}

// ignore skips over the pending input before this point.
func (lx *lexer) ignore() {
	lx.start = lx.pos
}

// backup steps back one rune. Can be called 4 times between calls to next.
func (lx *lexer) backup() {
	if lx.atEOF {
		lx.atEOF = false
		return
	}
	if lx.nprev < 1 {
		panic("BUG in lexer: backed up too far")
	}
	w := lx.prevWidths[0]
	lx.prevWidths[0] = lx.prevWidths[1]
	lx.prevWidths[1] = lx.prevWidths[2]
	lx.prevWidths[2] = lx.prevWidths[3]
	lx.nprev--

	lx.pos -= w
	if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
		lx.line--
	}
}

// accept consumes the next rune if it's equal to `valid`.
func (lx *lexer) accept(valid rune) bool {
	if lx.next() == valid {
		return true
	}
	lx.backup()
	return false
}

// peek returns but does not consume the next rune in the input.
func (lx *lexer) peek() rune {
	r := lx.next()
	lx.backup()
	return r
}

// skip ignores all input that matches the given predicate.
func (lx *lexer) skip(pred func(rune) bool) {
	for {
		r := lx.next()
		if pred(r) {
			continue
		}
		lx.backup()
		lx.ignore()
		return
	}
}

// error stops all lexing by emitting an error and returning `nil`.
//
// Note that any value that is a character is escaped if it's a special
// character (newlines, tabs, etc.).
func (lx *lexer) error(err error) stateFn {
	if lx.atEOF {
		return lx.errorPrevLine(err)
	}
	lx.items <- item{typ: itemError, pos: lx.getPos(), err: err}
	return nil
}

// errorfPrevline is like error(), but sets the position to the last column of
// the previous line.
//
// This is so that unexpected EOF or NL errors don't show on a new blank line.
func (lx *lexer) errorPrevLine(err error) stateFn {
	pos := lx.getPos()
	pos.Line--
	pos.Len = 1
	pos.Start = lx.pos - 1
	lx.items <- item{typ: itemError, pos: pos, err: err}
	return nil
}

// errorPos is like error(), but allows explicitly setting the position.
func (lx *lexer) errorPos(start, length int, err error) stateFn {
	pos := lx.getPos()
	pos.Start = start
	pos.Len = length
	lx.items <- item{typ: itemError, pos: pos, err: err}
	return nil
}

// errorf is like error, and creates a new error.
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
	if lx.atEOF {
		pos := lx.getPos()
		pos.Line--
		pos.Len = 1
		pos.Start = lx.pos - 1
		lx.items <- item{typ: itemError, pos: pos, err: fmt.Errorf(format, values...)}
		return nil
	}
	lx.items <- item{typ: itemError, pos: lx.getPos(), err: fmt.Errorf(format, values...)}
	return nil
}

func (lx *lexer) errorControlChar(cc rune) stateFn {
	return lx.errorPos(lx.pos-1, 1, errLexControl{cc})
}

// lexTop consumes elements at the top level of TOML data.
func lexTop(lx *lexer) stateFn {
	r := lx.next()
	if isWhitespace(r) || isNL(r) {
		return lexSkip(lx, lexTop)
	}
	switch r {
	case '#':
		lx.push(lexTop)
		return lexCommentStart
	case '[':
		return lexTableStart
	case eof:
		if lx.pos > lx.start {
			return lx.errorf("unexpected EOF")
		}
		lx.emit(itemEOF)
		return nil
	}

	// At this point, the only valid item can be a key, so we back up
	// and let the key lexer do the rest.
	lx.backup()
	lx.push(lexTopEnd)
	return lexKeyStart
}

// lexTopEnd is entered whenever a top-level item has been consumed. (A value
// or a table.) It must see only whitespace, and will turn back to lexTop
// upon a newline. If it sees EOF, it will quit the lexer successfully.
func lexTopEnd(lx *lexer) stateFn {
	r := lx.next()
	switch {
	case r == '#':
		// a comment will read to a newline for us.
		lx.push(lexTop)
		return lexCommentStart
	case isWhitespace(r):
		return lexTopEnd
	case isNL(r):
		lx.ignore()
		return lexTop
	case r == eof:
		lx.emit(itemEOF)
		return nil
	}
	return lx.errorf(
		"expected a top-level item to end with a newline, comment, or EOF, but got %q instead",
		r)
}

// lexTable lexes the beginning of a table. Namely, it makes sure that
// it starts with a character other than '.' and ']'.
// It assumes that '[' has already been consumed.
// It also handles the case that this is an item in an array of tables.
// e.g., '[[name]]'.
func lexTableStart(lx *lexer) stateFn {
	if lx.peek() == '[' {
		lx.next()
		lx.emit(itemArrayTableStart)
		lx.push(lexArrayTableEnd)
	} else {
		lx.emit(itemTableStart)
		lx.push(lexTableEnd)
	}
	return lexTableNameStart
}

func lexTableEnd(lx *lexer) stateFn {
	lx.emit(itemTableEnd)
	return lexTopEnd
}

func lexArrayTableEnd(lx *lexer) stateFn {
	if r := lx.next(); r != ']' {
		return lx.errorf("expected end of table array name delimiter ']', but got %q instead", r)
	}
	lx.emit(itemArrayTableEnd)
	return lexTopEnd
}

func lexTableNameStart(lx *lexer) stateFn {
	lx.skip(isWhitespace)
	switch r := lx.peek(); {
	case r == ']' || r == eof:
		return lx.errorf("unexpected end of table name (table names cannot be empty)")
	case r == '.':
		return lx.errorf("unexpected table separator (table names cannot be empty)")
	case r == '"' || r == '\'':
		lx.ignore()
		lx.push(lexTableNameEnd)
		return lexQuotedName
	default:
		lx.push(lexTableNameEnd)
		return lexBareName
	}
}

// lexTableNameEnd reads the end of a piece of a table name, optionally
// consuming whitespace.
func lexTableNameEnd(lx *lexer) stateFn {
	lx.skip(isWhitespace)
	switch r := lx.next(); {
	case isWhitespace(r):
		return lexTableNameEnd
	case r == '.':
		lx.ignore()
		return lexTableNameStart
	case r == ']':
		return lx.pop()
	default:
		return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
	}
}

// lexBareName lexes one part of a key or table.
//
// It assumes that at least one valid character for the table has already been
// read.
//
// Lexes only one part, e.g. only 'a' inside 'a.b'.
func lexBareName(lx *lexer) stateFn {
	r := lx.next()
	if isBareKeyChar(r, lx.tomlNext) {
		return lexBareName
	}
	lx.backup()
	lx.emit(itemText)
	return lx.pop()
}

// lexBareName lexes one part of a key or table.
//
// It assumes that at least one valid character for the table has already been
// read.
//
// Lexes only one part, e.g. only '"a"' inside '"a".b'.
func lexQuotedName(lx *lexer) stateFn {
	r := lx.next()
	switch {
	case isWhitespace(r):
		return lexSkip(lx, lexValue)
	case r == '"':
		lx.ignore() // ignore the '"'
		return lexString
	case r == '\'':
		lx.ignore() // ignore the "'"
		return lexRawString
	case r == eof:
		return lx.errorf("unexpected EOF; expected value")
	default:
		return lx.errorf("expected value but found %q instead", r)
	}
}

// lexKeyStart consumes all key parts until a '='.
func lexKeyStart(lx *lexer) stateFn {
	lx.skip(isWhitespace)
	switch r := lx.peek(); {
	case r == '=' || r == eof:
		return lx.errorf("unexpected '=': key name appears blank")
	case r == '.':
		return lx.errorf("unexpected '.': keys cannot start with a '.'")
	case r == '"' || r == '\'':
		lx.ignore()
		fallthrough
	default: // Bare key
		lx.emit(itemKeyStart)
		return lexKeyNameStart
	}
}

func lexKeyNameStart(lx *lexer) stateFn {
	lx.skip(isWhitespace)
	switch r := lx.peek(); {
	case r == '=' || r == eof:
		return lx.errorf("unexpected '='")
	case r == '.':
		return lx.errorf("unexpected '.'")
	case r == '"' || r == '\'':
		lx.ignore()
		lx.push(lexKeyEnd)
		return lexQuotedName
	default:
		lx.push(lexKeyEnd)
		return lexBareName
	}
}

// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
// separator).
func lexKeyEnd(lx *lexer) stateFn {
	lx.skip(isWhitespace)
	switch r := lx.next(); {
	case isWhitespace(r):
		return lexSkip(lx, lexKeyEnd)
	case r == eof:
		return lx.errorf("unexpected EOF; expected key separator '='")
	case r == '.':
		lx.ignore()
		return lexKeyNameStart
	case r == '=':
		lx.emit(itemKeyEnd)
		return lexSkip(lx, lexValue)
	default:
		return lx.errorf("expected '.' or '=', but got %q instead", r)
	}
}

// lexValue starts the consumption of a value anywhere a value is expected.
// lexValue will ignore whitespace.
// After a value is lexed, the last state on the next is popped and returned.
func lexValue(lx *lexer) stateFn {
	// We allow whitespace to precede a value, but NOT newlines.
	// In array syntax, the array states are responsible for ignoring newlines.
	r := lx.next()
	switch {
	case isWhitespace(r):
		return lexSkip(lx, lexValue)
	case isDigit(r):
		lx.backup() // avoid an extra state and use the same as above
		return lexNumberOrDateStart
	}
	switch r {
	case '[':
		lx.ignore()
		lx.emit(itemArray)
		return lexArrayValue
	case '{':
		lx.ignore()
		lx.emit(itemInlineTableStart)
		return lexInlineTableValue
	case '"':
		if lx.accept('"') {
			if lx.accept('"') {
				lx.ignore() // Ignore """
				return lexMultilineString
			}
			lx.backup()
		}
		lx.ignore() // ignore the '"'
		return lexString
	case '\'':
		if lx.accept('\'') {
			if lx.accept('\'') {
				lx.ignore() // Ignore """
				return lexMultilineRawString
			}
			lx.backup()
		}
		lx.ignore() // ignore the "'"
		return lexRawString
	case '.': // special error case, be kind to users
		return lx.errorf("floats must start with a digit, not '.'")
	case 'i', 'n':
		if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) {
			lx.emit(itemFloat)
			return lx.pop()
		}
	case '-', '+':
		return lexDecimalNumberStart
	}
	if unicode.IsLetter(r) {
		// Be permissive here; lexBool will give a nice error if the
		// user wrote something like
		//   x = foo
		// (i.e. not 'true' or 'false' but is something else word-like.)
		lx.backup()
		return lexBool
	}
	if r == eof {
		return lx.errorf("unexpected EOF; expected value")
	}
	return lx.errorf("expected value but found %q instead", r)
}

// lexArrayValue consumes one value in an array. It assumes that '[' or ','
// have already been consumed. All whitespace and newlines are ignored.
func lexArrayValue(lx *lexer) stateFn {
	r := lx.next()
	switch {
	case isWhitespace(r) || isNL(r):
		return lexSkip(lx, lexArrayValue)
	case r == '#':
		lx.push(lexArrayValue)
		return lexCommentStart
	case r == ',':
		return lx.errorf("unexpected comma")
	case r == ']':
		return lexArrayEnd
	}

	lx.backup()
	lx.push(lexArrayValueEnd)
	return lexValue
}

// lexArrayValueEnd consumes everything between the end of an array value and
// the next value (or the end of the array): it ignores whitespace and newlines
// and expects either a ',' or a ']'.
func lexArrayValueEnd(lx *lexer) stateFn {
	switch r := lx.next(); {
	case isWhitespace(r) || isNL(r):
		return lexSkip(lx, lexArrayValueEnd)
	case r == '#':
		lx.push(lexArrayValueEnd)
		return lexCommentStart
	case r == ',':
		lx.ignore()
		return lexArrayValue // move on to the next value
	case r == ']':
		return lexArrayEnd
	default:
		return lx.errorf("expected a comma (',') or array terminator (']'), but got %s", runeOrEOF(r))
	}
}

// lexArrayEnd finishes the lexing of an array.
// It assumes that a ']' has just been consumed.
func lexArrayEnd(lx *lexer) stateFn {
	lx.ignore()
	lx.emit(itemArrayEnd)
	return lx.pop()
}

// lexInlineTableValue consumes one key/value pair in an inline table.
// It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
func lexInlineTableValue(lx *lexer) stateFn {
	r := lx.next()
	switch {
	case isWhitespace(r):
		return lexSkip(lx, lexInlineTableValue)
	case isNL(r):
		if lx.tomlNext {
			return lexSkip(lx, lexInlineTableValue)
		}
		return lx.errorPrevLine(errLexInlineTableNL{})
	case r == '#':
		lx.push(lexInlineTableValue)
		return lexCommentStart
	case r == ',':
		return lx.errorf("unexpected comma")
	case r == '}':
		return lexInlineTableEnd
	}
	lx.backup()
	lx.push(lexInlineTableValueEnd)
	return lexKeyStart
}

// lexInlineTableValueEnd consumes everything between the end of an inline table
// key/value pair and the next pair (or the end of the table):
// it ignores whitespace and expects either a ',' or a '}'.
func lexInlineTableValueEnd(lx *lexer) stateFn {
	switch r := lx.next(); {
	case isWhitespace(r):
		return lexSkip(lx, lexInlineTableValueEnd)
	case isNL(r):
		if lx.tomlNext {
			return lexSkip(lx, lexInlineTableValueEnd)
		}
		return lx.errorPrevLine(errLexInlineTableNL{})
	case r == '#':
		lx.push(lexInlineTableValueEnd)
		return lexCommentStart
	case r == ',':
		lx.ignore()
		lx.skip(isWhitespace)
		if lx.peek() == '}' {
			if lx.tomlNext {
				return lexInlineTableValueEnd
			}
			return lx.errorf("trailing comma not allowed in inline tables")
		}
		return lexInlineTableValue
	case r == '}':
		return lexInlineTableEnd
	default:
		return lx.errorf("expected a comma or an inline table terminator '}', but got %s instead", runeOrEOF(r))
	}
}

func runeOrEOF(r rune) string {
	if r == eof {
		return "end of file"
	}
	return "'" + string(r) + "'"
}

// lexInlineTableEnd finishes the lexing of an inline table.
// It assumes that a '}' has just been consumed.
func lexInlineTableEnd(lx *lexer) stateFn {
	lx.ignore()
	lx.emit(itemInlineTableEnd)
	return lx.pop()
}

// lexString consumes the inner contents of a string. It assumes that the
// beginning '"' has already been consumed and ignored.
func lexString(lx *lexer) stateFn {
	r := lx.next()
	switch {
	case r == eof:
		return lx.errorf(`unexpected EOF; expected '"'`)
	case isNL(r):
		return lx.errorPrevLine(errLexStringNL{})
	case r == '\\':
		lx.push(lexString)
		return lexStringEscape
	case r == '"':
		lx.backup()
		lx.emit(itemString)
		lx.next()
		lx.ignore()
		return lx.pop()
	}
	return lexString
}

// lexMultilineString consumes the inner contents of a string. It assumes that
// the beginning '"""' has already been consumed and ignored.
func lexMultilineString(lx *lexer) stateFn {
	r := lx.next()
	switch r {
	default:
		return lexMultilineString
	case eof:
		return lx.errorf(`unexpected EOF; expected '"""'`)
	case '\\':
		return lexMultilineStringEscape
	case '"':
		/// Found " → try to read two more "".
		if lx.accept('"') {
			if lx.accept('"') {
				/// Peek ahead: the string can contain " and "", including at the
				/// end: """str"""""
				/// 6 or more at the end, however, is an error.
				if lx.peek() == '"' {
					/// Check if we already lexed 5 's; if so we have 6 now, and
					/// that's just too many man!
					///
					/// Second check is for the edge case:
					///
					///            two quotes allowed.
					///            vv
					///   """lol \""""""
					///          ^^  ^^^---- closing three
					///     escaped
					///
					/// But ugly, but it works
					if strings.HasSuffix(lx.current(), `"""""`) && !strings.HasSuffix(lx.current(), `\"""""`) {
						return lx.errorf(`unexpected '""""""'`)
					}
					lx.backup()
					lx.backup()
					return lexMultilineString
				}

				lx.backup() /// backup: don't include the """ in the item.
				lx.backup()
				lx.backup()
				lx.emit(itemMultilineString)
				lx.next() /// Read over ''' again and discard it.
				lx.next()
				lx.next()
				lx.ignore()
				return lx.pop()
			}
			lx.backup()
		}
		return lexMultilineString
	}
}

// lexRawString consumes a raw string. Nothing can be escaped in such a string.
// It assumes that the beginning "'" has already been consumed and ignored.
func lexRawString(lx *lexer) stateFn {
	r := lx.next()
	switch {
	default:
		return lexRawString
	case r == eof:
		return lx.errorf(`unexpected EOF; expected "'"`)
	case isNL(r):
		return lx.errorPrevLine(errLexStringNL{})
	case r == '\'':
		lx.backup()
		lx.emit(itemRawString)
		lx.next()
		lx.ignore()
		return lx.pop()
	}
}

// lexMultilineRawString consumes a raw string. Nothing can be escaped in such a
// string. It assumes that the beginning triple-' has already been consumed and
// ignored.
func lexMultilineRawString(lx *lexer) stateFn {
	r := lx.next()
	switch r {
	default:
		return lexMultilineRawString
	case eof:
		return lx.errorf(`unexpected EOF; expected "'''"`)
	case '\'':
		/// Found ' → try to read two more ''.
		if lx.accept('\'') {
			if lx.accept('\'') {
				/// Peek ahead: the string can contain ' and '', including at the
				/// end: '''str'''''
				/// 6 or more at the end, however, is an error.
				if lx.peek() == '\'' {
					/// Check if we already lexed 5 's; if so we have 6 now, and
					/// that's just too many man!
					if strings.HasSuffix(lx.current(), "'''''") {
						return lx.errorf(`unexpected "''''''"`)
					}
					lx.backup()
					lx.backup()
					return lexMultilineRawString
				}

				lx.backup() /// backup: don't include the ''' in the item.
				lx.backup()
				lx.backup()
				lx.emit(itemRawMultilineString)
				lx.next() /// Read over ''' again and discard it.
				lx.next()
				lx.next()
				lx.ignore()
				return lx.pop()
			}
			lx.backup()
		}
		return lexMultilineRawString
	}
}

// lexMultilineStringEscape consumes an escaped character. It assumes that the
// preceding '\\' has already been consumed.
func lexMultilineStringEscape(lx *lexer) stateFn {
	if isNL(lx.next()) { /// \ escaping newline.
		return lexMultilineString
	}
	lx.backup()
	lx.push(lexMultilineString)
	return lexStringEscape(lx)
}

func lexStringEscape(lx *lexer) stateFn {
	r := lx.next()
	switch r {
	case 'e':
		if !lx.tomlNext {
			return lx.error(errLexEscape{r})
		}
		fallthrough
	case 'b':
		fallthrough
	case 't':
		fallthrough
	case 'n':
		fallthrough
	case 'f':
		fallthrough
	case 'r':
		fallthrough
	case '"':
		fallthrough
	case ' ', '\t':
		// Inside """ .. """ strings you can use \ to escape newlines, and any
		// amount of whitespace can be between the \ and \n.
		fallthrough
	case '\\':
		return lx.pop()
	case 'x':
		if !lx.tomlNext {
			return lx.error(errLexEscape{r})
		}
		return lexHexEscape
	case 'u':
		return lexShortUnicodeEscape
	case 'U':
		return lexLongUnicodeEscape
	}
	return lx.error(errLexEscape{r})
}

func lexHexEscape(lx *lexer) stateFn {
	var r rune
	for i := 0; i < 2; i++ {
		r = lx.next()
		if !isHexadecimal(r) {
			return lx.errorf(
				`expected two hexadecimal digits after '\x', but got %q instead`,
				lx.current())
		}
	}
	return lx.pop()
}

func lexShortUnicodeEscape(lx *lexer) stateFn {
	var r rune
	for i := 0; i < 4; i++ {
		r = lx.next()
		if !isHexadecimal(r) {
			return lx.errorf(
				`expected four hexadecimal digits after '\u', but got %q instead`,
				lx.current())
		}
	}
	return lx.pop()
}

func lexLongUnicodeEscape(lx *lexer) stateFn {
	var r rune
	for i := 0; i < 8; i++ {
		r = lx.next()
		if !isHexadecimal(r) {
			return lx.errorf(
				`expected eight hexadecimal digits after '\U', but got %q instead`,
				lx.current())
		}
	}
	return lx.pop()
}

// lexNumberOrDateStart processes the first character of a value which begins
// with a digit. It exists to catch values starting with '0', so that
// lexBaseNumberOrDate can differentiate base prefixed integers from other
// types.
func lexNumberOrDateStart(lx *lexer) stateFn {
	r := lx.next()
	switch r {
	case '0':
		return lexBaseNumberOrDate
	}

	if !isDigit(r) {
		// The only way to reach this state is if the value starts
		// with a digit, so specifically treat anything else as an
		// error.
		return lx.errorf("expected a digit but got %q", r)
	}

	return lexNumberOrDate
}

// lexNumberOrDate consumes either an integer, float or datetime.
func lexNumberOrDate(lx *lexer) stateFn {
	r := lx.next()
	if isDigit(r) {
		return lexNumberOrDate
	}
	switch r {
	case '-', ':':
		return lexDatetime
	case '_':
		return lexDecimalNumber
	case '.', 'e', 'E':
		return lexFloat
	}

	lx.backup()
	lx.emit(itemInteger)
	return lx.pop()
}

// lexDatetime consumes a Datetime, to a first approximation.
// The parser validates that it matches one of the accepted formats.
func lexDatetime(lx *lexer) stateFn {
	r := lx.next()
	if isDigit(r) {
		return lexDatetime
	}
	switch r {
	case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+':
		return lexDatetime
	}

	lx.backup()
	lx.emitTrim(itemDatetime)
	return lx.pop()
}

// lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix.
func lexHexInteger(lx *lexer) stateFn {
	r := lx.next()
	if isHexadecimal(r) {
		return lexHexInteger
	}
	switch r {
	case '_':
		return lexHexInteger
	}

	lx.backup()
	lx.emit(itemInteger)
	return lx.pop()
}

// lexOctalInteger consumes an octal integer after seeing the '0o' prefix.
func lexOctalInteger(lx *lexer) stateFn {
	r := lx.next()
	if isOctal(r) {
		return lexOctalInteger
	}
	switch r {
	case '_':
		return lexOctalInteger
	}

	lx.backup()
	lx.emit(itemInteger)
	return lx.pop()
}

// lexBinaryInteger consumes a binary integer after seeing the '0b' prefix.
func lexBinaryInteger(lx *lexer) stateFn {
	r := lx.next()
	if isBinary(r) {
		return lexBinaryInteger
	}
	switch r {
	case '_':
		return lexBinaryInteger
	}

	lx.backup()
	lx.emit(itemInteger)
	return lx.pop()
}

// lexDecimalNumber consumes a decimal float or integer.
func lexDecimalNumber(lx *lexer) stateFn {
	r := lx.next()
	if isDigit(r) {
		return lexDecimalNumber
	}
	switch r {
	case '.', 'e', 'E':
		return lexFloat
	case '_':
		return lexDecimalNumber
	}

	lx.backup()
	lx.emit(itemInteger)
	return lx.pop()
}

// lexDecimalNumber consumes the first digit of a number beginning with a sign.
// It assumes the sign has already been consumed. Values which start with a sign
// are only allowed to be decimal integers or floats.
//
// The special "nan" and "inf" values are also recognized.
func lexDecimalNumberStart(lx *lexer) stateFn {
	r := lx.next()

	// Special error cases to give users better error messages
	switch r {
	case 'i':
		if !lx.accept('n') || !lx.accept('f') {
			return lx.errorf("invalid float: '%s'", lx.current())
		}
		lx.emit(itemFloat)
		return lx.pop()
	case 'n':
		if !lx.accept('a') || !lx.accept('n') {
			return lx.errorf("invalid float: '%s'", lx.current())
		}
		lx.emit(itemFloat)
		return lx.pop()
	case '0':
		p := lx.peek()
		switch p {
		case 'b', 'o', 'x':
			return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p)
		}
	case '.':
		return lx.errorf("floats must start with a digit, not '.'")
	}

	if isDigit(r) {
		return lexDecimalNumber
	}

	return lx.errorf("expected a digit but got %q", r)
}

// lexBaseNumberOrDate differentiates between the possible values which
// start with '0'. It assumes that before reaching this state, the initial '0'
// has been consumed.
func lexBaseNumberOrDate(lx *lexer) stateFn {
	r := lx.next()
	// Note: All datetimes start with at least two digits, so we don't
	// handle date characters (':', '-', etc.) here.
	if isDigit(r) {
		return lexNumberOrDate
	}
	switch r {
	case '_':
		// Can only be decimal, because there can't be an underscore
		// between the '0' and the base designator, and dates can't
		// contain underscores.
		return lexDecimalNumber
	case '.', 'e', 'E':
		return lexFloat
	case 'b':
		r = lx.peek()
		if !isBinary(r) {
			lx.errorf("not a binary number: '%s%c'", lx.current(), r)
		}
		return lexBinaryInteger
	case 'o':
		r = lx.peek()
		if !isOctal(r) {
			lx.errorf("not an octal number: '%s%c'", lx.current(), r)
		}
		return lexOctalInteger
	case 'x':
		r = lx.peek()
		if !isHexadecimal(r) {
			lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r)
		}
		return lexHexInteger
	}

	lx.backup()
	lx.emit(itemInteger)
	return lx.pop()
}

// lexFloat consumes the elements of a float. It allows any sequence of
// float-like characters, so floats emitted by the lexer are only a first
// approximation and must be validated by the parser.
func lexFloat(lx *lexer) stateFn {
	r := lx.next()
	if isDigit(r) {
		return lexFloat
	}
	switch r {
	case '_', '.', '-', '+', 'e', 'E':
		return lexFloat
	}

	lx.backup()
	lx.emit(itemFloat)
	return lx.pop()
}

// lexBool consumes a bool string: 'true' or 'false.
func lexBool(lx *lexer) stateFn {
	var rs []rune
	for {
		r := lx.next()
		if !unicode.IsLetter(r) {
			lx.backup()
			break
		}
		rs = append(rs, r)
	}
	s := string(rs)
	switch s {
	case "true", "false":
		lx.emit(itemBool)
		return lx.pop()
	}
	return lx.errorf("expected value but found %q instead", s)
}

// lexCommentStart begins the lexing of a comment. It will emit
// itemCommentStart and consume no characters, passing control to lexComment.
func lexCommentStart(lx *lexer) stateFn {
	lx.ignore()
	lx.emit(itemCommentStart)
	return lexComment
}

// lexComment lexes an entire comment. It assumes that '#' has been consumed.
// It will consume *up to* the first newline character, and pass control
// back to the last state on the stack.
func lexComment(lx *lexer) stateFn {
	switch r := lx.next(); {
	case isNL(r) || r == eof:
		lx.backup()
		lx.emit(itemText)
		return lx.pop()
	default:
		return lexComment
	}
}

// lexSkip ignores all slurped input and moves on to the next state.
func lexSkip(lx *lexer, nextState stateFn) stateFn {
	lx.ignore()
	return nextState
}

func (s stateFn) String() string {
	name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
	if i := strings.LastIndexByte(name, '.'); i > -1 {
		name = name[i+1:]
	}
	if s == nil {
		name = "<nil>"
	}
	return name + "()"
}

func (itype itemType) String() string {
	switch itype {
	case itemError:
		return "Error"
	case itemNIL:
		return "NIL"
	case itemEOF:
		return "EOF"
	case itemText:
		return "Text"
	case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
		return "String"
	case itemBool:
		return "Bool"
	case itemInteger:
		return "Integer"
	case itemFloat:
		return "Float"
	case itemDatetime:
		return "DateTime"
	case itemTableStart:
		return "TableStart"
	case itemTableEnd:
		return "TableEnd"
	case itemKeyStart:
		return "KeyStart"
	case itemKeyEnd:
		return "KeyEnd"
	case itemArray:
		return "Array"
	case itemArrayEnd:
		return "ArrayEnd"
	case itemCommentStart:
		return "CommentStart"
	case itemInlineTableStart:
		return "InlineTableStart"
	case itemInlineTableEnd:
		return "InlineTableEnd"
	}
	panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
}

func (item item) String() string {
	return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
}

func isWhitespace(r rune) bool { return r == '\t' || r == ' ' }
func isNL(r rune) bool         { return r == '\n' || r == '\r' }
func isControl(r rune) bool { // Control characters except \t, \r, \n
	switch r {
	case '\t', '\r', '\n':
		return false
	default:
		return (r >= 0x00 && r <= 0x1f) || r == 0x7f
	}
}
func isDigit(r rune) bool  { return r >= '0' && r <= '9' }
func isBinary(r rune) bool { return r == '0' || r == '1' }
func isOctal(r rune) bool  { return r >= '0' && r <= '7' }
func isHexadecimal(r rune) bool {
	return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')
}

func isBareKeyChar(r rune, tomlNext bool) bool {
	if tomlNext {
		return (r >= 'A' && r <= 'Z') ||
			(r >= 'a' && r <= 'z') ||
			(r >= '0' && r <= '9') ||
			r == '_' || r == '-' ||
			r == 0xb2 || r == 0xb3 || r == 0xb9 || (r >= 0xbc && r <= 0xbe) ||
			(r >= 0xc0 && r <= 0xd6) || (r >= 0xd8 && r <= 0xf6) || (r >= 0xf8 && r <= 0x037d) ||
			(r >= 0x037f && r <= 0x1fff) ||
			(r >= 0x200c && r <= 0x200d) || (r >= 0x203f && r <= 0x2040) ||
			(r >= 0x2070 && r <= 0x218f) || (r >= 0x2460 && r <= 0x24ff) ||
			(r >= 0x2c00 && r <= 0x2fef) || (r >= 0x3001 && r <= 0xd7ff) ||
			(r >= 0xf900 && r <= 0xfdcf) || (r >= 0xfdf0 && r <= 0xfffd) ||
			(r >= 0x10000 && r <= 0xeffff)
	}

	return (r >= 'A' && r <= 'Z') ||
		(r >= 'a' && r <= 'z') ||
		(r >= '0' && r <= '9') ||
		r == '_' || r == '-'
}