input.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. package parse
  2. import (
  3. "io"
  4. "io/ioutil"
  5. )
  6. var nullBuffer = []byte{0}
  7. // Input is a buffered reader that allows peeking forward and shifting, taking an io.Input.
  8. // It keeps data in-memory until Free, taking a byte length, is called to move beyond the data.
  9. type Input struct {
  10. buf []byte
  11. pos int // index in buf
  12. start int // index in buf
  13. err error
  14. restore func()
  15. }
  16. // NewInput returns a new Input for a given io.Input and uses ioutil.ReadAll to read it into a byte slice.
  17. // If the io.Input implements Bytes, that is used instead. It will append a NULL at the end of the buffer.
  18. func NewInput(r io.Reader) *Input {
  19. var b []byte
  20. if r != nil {
  21. if buffer, ok := r.(interface {
  22. Bytes() []byte
  23. }); ok {
  24. b = buffer.Bytes()
  25. } else {
  26. var err error
  27. b, err = ioutil.ReadAll(r)
  28. if err != nil {
  29. return &Input{
  30. buf: nullBuffer,
  31. err: err,
  32. }
  33. }
  34. }
  35. }
  36. return NewInputBytes(b)
  37. }
  38. // NewInputString returns a new Input for a given string and appends NULL at the end.
  39. func NewInputString(s string) *Input {
  40. return NewInputBytes([]byte(s))
  41. }
  42. // NewInputBytes returns a new Input for a given byte slice and appends NULL at the end.
  43. // To avoid reallocation, make sure the capacity has room for one more byte.
  44. func NewInputBytes(b []byte) *Input {
  45. z := &Input{
  46. buf: b,
  47. }
  48. n := len(b)
  49. if n == 0 {
  50. z.buf = nullBuffer
  51. } else {
  52. // Append NULL to buffer, but try to avoid reallocation
  53. if cap(b) > n {
  54. // Overwrite next byte but restore when done
  55. b = b[:n+1]
  56. c := b[n]
  57. b[n] = 0
  58. z.buf = b
  59. z.restore = func() {
  60. b[n] = c
  61. }
  62. } else {
  63. z.buf = append(b, 0)
  64. }
  65. }
  66. return z
  67. }
  68. // Restore restores the replaced byte past the end of the buffer by NULL.
  69. func (z *Input) Restore() {
  70. if z.restore != nil {
  71. z.restore()
  72. z.restore = nil
  73. }
  74. }
  75. // Err returns the error returned from io.Input or io.EOF when the end has been reached.
  76. func (z *Input) Err() error {
  77. return z.PeekErr(0)
  78. }
  79. // PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err().
  80. func (z *Input) PeekErr(pos int) error {
  81. if z.err != nil {
  82. return z.err
  83. } else if z.pos+pos >= len(z.buf)-1 {
  84. return io.EOF
  85. }
  86. return nil
  87. }
  88. // Peek returns the ith byte relative to the end position.
  89. // Peek returns 0 when an error has occurred, Err returns the erroz.
  90. func (z *Input) Peek(pos int) byte {
  91. pos += z.pos
  92. return z.buf[pos]
  93. }
  94. // PeekRune returns the rune and rune length of the ith byte relative to the end position.
  95. func (z *Input) PeekRune(pos int) (rune, int) {
  96. // from unicode/utf8
  97. c := z.Peek(pos)
  98. if c < 0xC0 || z.Peek(pos+1) == 0 {
  99. return rune(c), 1
  100. } else if c < 0xE0 || z.Peek(pos+2) == 0 {
  101. return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2
  102. } else if c < 0xF0 || z.Peek(pos+3) == 0 {
  103. return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3
  104. }
  105. return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4
  106. }
  107. // Move advances the position.
  108. func (z *Input) Move(n int) {
  109. z.pos += n
  110. }
  111. // Pos returns a mark to which can be rewinded.
  112. func (z *Input) Pos() int {
  113. return z.pos - z.start
  114. }
  115. // Rewind rewinds the position to the given position.
  116. func (z *Input) Rewind(pos int) {
  117. z.pos = z.start + pos
  118. }
  119. // Lexeme returns the bytes of the current selection.
  120. func (z *Input) Lexeme() []byte {
  121. return z.buf[z.start:z.pos:z.pos]
  122. }
  123. // Skip collapses the position to the end of the selection.
  124. func (z *Input) Skip() {
  125. z.start = z.pos
  126. }
  127. // Shift returns the bytes of the current selection and collapses the position to the end of the selection.
  128. func (z *Input) Shift() []byte {
  129. b := z.buf[z.start:z.pos:z.pos]
  130. z.start = z.pos
  131. return b
  132. }
  133. // Offset returns the character position in the buffez.
  134. func (z *Input) Offset() int {
  135. return z.pos
  136. }
  137. // Bytes returns the underlying buffez.
  138. func (z *Input) Bytes() []byte {
  139. return z.buf[: len(z.buf)-1 : len(z.buf)-1]
  140. }
  141. // Len returns the length of the underlying buffez.
  142. func (z *Input) Len() int {
  143. return len(z.buf) - 1
  144. }
  145. // Reset resets position to the underlying buffez.
  146. func (z *Input) Reset() {
  147. z.start = 0
  148. z.pos = 0
  149. }