lexer.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. package buffer
  2. import (
  3. "io"
  4. "io/ioutil"
  5. )
  6. var nullBuffer = []byte{0}
  7. // Lexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader.
  8. // It keeps data in-memory until Free, taking a byte length, is called to move beyond the data.
  9. type Lexer struct {
  10. buf []byte
  11. pos int // index in buf
  12. start int // index in buf
  13. err error
  14. restore func()
  15. }
  16. // NewLexer returns a new Lexer for a given io.Reader, and uses ioutil.ReadAll to read it into a byte slice.
  17. // If the io.Reader implements Bytes, that is used instead.
  18. // It will append a NULL at the end of the buffer.
  19. func NewLexer(r io.Reader) *Lexer {
  20. var b []byte
  21. if r != nil {
  22. if buffer, ok := r.(interface {
  23. Bytes() []byte
  24. }); ok {
  25. b = buffer.Bytes()
  26. } else {
  27. var err error
  28. b, err = ioutil.ReadAll(r)
  29. if err != nil {
  30. return &Lexer{
  31. buf: nullBuffer,
  32. err: err,
  33. }
  34. }
  35. }
  36. }
  37. return NewLexerBytes(b)
  38. }
  39. // NewLexerBytes returns a new Lexer for a given byte slice, and appends NULL at the end.
  40. // To avoid reallocation, make sure the capacity has room for one more byte.
  41. func NewLexerBytes(b []byte) *Lexer {
  42. z := &Lexer{
  43. buf: b,
  44. }
  45. n := len(b)
  46. if n == 0 {
  47. z.buf = nullBuffer
  48. } else {
  49. // Append NULL to buffer, but try to avoid reallocation
  50. if cap(b) > n {
  51. // Overwrite next byte but restore when done
  52. b = b[:n+1]
  53. c := b[n]
  54. b[n] = 0
  55. z.buf = b
  56. z.restore = func() {
  57. b[n] = c
  58. }
  59. } else {
  60. z.buf = append(b, 0)
  61. }
  62. }
  63. return z
  64. }
  65. // Restore restores the replaced byte past the end of the buffer by NULL.
  66. func (z *Lexer) Restore() {
  67. if z.restore != nil {
  68. z.restore()
  69. z.restore = nil
  70. }
  71. }
  72. // Err returns the error returned from io.Reader or io.EOF when the end has been reached.
  73. func (z *Lexer) Err() error {
  74. return z.PeekErr(0)
  75. }
  76. // PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err().
  77. func (z *Lexer) PeekErr(pos int) error {
  78. if z.err != nil {
  79. return z.err
  80. } else if z.pos+pos >= len(z.buf)-1 {
  81. return io.EOF
  82. }
  83. return nil
  84. }
  85. // Peek returns the ith byte relative to the end position.
  86. // Peek returns 0 when an error has occurred, Err returns the error.
  87. func (z *Lexer) Peek(pos int) byte {
  88. pos += z.pos
  89. return z.buf[pos]
  90. }
  91. // PeekRune returns the rune and rune length of the ith byte relative to the end position.
  92. func (z *Lexer) PeekRune(pos int) (rune, int) {
  93. // from unicode/utf8
  94. c := z.Peek(pos)
  95. if c < 0xC0 || z.Peek(pos+1) == 0 {
  96. return rune(c), 1
  97. } else if c < 0xE0 || z.Peek(pos+2) == 0 {
  98. return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2
  99. } else if c < 0xF0 || z.Peek(pos+3) == 0 {
  100. return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3
  101. }
  102. return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4
  103. }
  104. // Move advances the position.
  105. func (z *Lexer) Move(n int) {
  106. z.pos += n
  107. }
  108. // Pos returns a mark to which can be rewinded.
  109. func (z *Lexer) Pos() int {
  110. return z.pos - z.start
  111. }
  112. // Rewind rewinds the position to the given position.
  113. func (z *Lexer) Rewind(pos int) {
  114. z.pos = z.start + pos
  115. }
  116. // Lexeme returns the bytes of the current selection.
  117. func (z *Lexer) Lexeme() []byte {
  118. return z.buf[z.start:z.pos:z.pos]
  119. }
  120. // Skip collapses the position to the end of the selection.
  121. func (z *Lexer) Skip() {
  122. z.start = z.pos
  123. }
  124. // Shift returns the bytes of the current selection and collapses the position to the end of the selection.
  125. func (z *Lexer) Shift() []byte {
  126. b := z.buf[z.start:z.pos:z.pos]
  127. z.start = z.pos
  128. return b
  129. }
  130. // Offset returns the character position in the buffer.
  131. func (z *Lexer) Offset() int {
  132. return z.pos
  133. }
  134. // Bytes returns the underlying buffer.
  135. func (z *Lexer) Bytes() []byte {
  136. return z.buf[: len(z.buf)-1 : len(z.buf)-1]
  137. }
  138. // Reset resets position to the underlying buffer.
  139. func (z *Lexer) Reset() {
  140. z.start = 0
  141. z.pos = 0
  142. }