123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223 |
- package buffer
- import (
- "io"
- )
- type block struct {
- buf []byte
- next int // index in pool plus one
- active bool
- }
- type bufferPool struct {
- pool []block
- head int // index in pool plus one
- tail int // index in pool plus one
- pos int // byte pos in tail
- }
- func (z *bufferPool) swap(oldBuf []byte, size int) []byte {
- // find new buffer that can be reused
- swap := -1
- for i := 0; i < len(z.pool); i++ {
- if !z.pool[i].active && size <= cap(z.pool[i].buf) {
- swap = i
- break
- }
- }
- if swap == -1 { // no free buffer found for reuse
- if z.tail == 0 && z.pos >= len(oldBuf) && size <= cap(oldBuf) { // but we can reuse the current buffer!
- z.pos -= len(oldBuf)
- return oldBuf[:0]
- }
- // allocate new
- z.pool = append(z.pool, block{make([]byte, 0, size), 0, true})
- swap = len(z.pool) - 1
- }
- newBuf := z.pool[swap].buf
- // put current buffer into pool
- z.pool[swap] = block{oldBuf, 0, true}
- if z.head != 0 {
- z.pool[z.head-1].next = swap + 1
- }
- z.head = swap + 1
- if z.tail == 0 {
- z.tail = swap + 1
- }
- return newBuf[:0]
- }
- func (z *bufferPool) free(n int) {
- z.pos += n
- // move the tail over to next buffers
- for z.tail != 0 && z.pos >= len(z.pool[z.tail-1].buf) {
- z.pos -= len(z.pool[z.tail-1].buf)
- newTail := z.pool[z.tail-1].next
- z.pool[z.tail-1].active = false // after this, any thread may pick up the inactive buffer, so it can't be used anymore
- z.tail = newTail
- }
- if z.tail == 0 {
- z.head = 0
- }
- }
- // StreamLexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader.
- // It keeps data in-memory until Free, taking a byte length, is called to move beyond the data.
- type StreamLexer struct {
- r io.Reader
- err error
- pool bufferPool
- buf []byte
- start int // index in buf
- pos int // index in buf
- prevStart int
- free int
- }
- // NewStreamLexer returns a new StreamLexer for a given io.Reader with a 4kB estimated buffer size.
- // If the io.Reader implements Bytes, that buffer is used instead.
- func NewStreamLexer(r io.Reader) *StreamLexer {
- return NewStreamLexerSize(r, defaultBufSize)
- }
- // NewStreamLexerSize returns a new StreamLexer for a given io.Reader and estimated required buffer size.
- // If the io.Reader implements Bytes, that buffer is used instead.
- func NewStreamLexerSize(r io.Reader, size int) *StreamLexer {
- // if reader has the bytes in memory already, use that instead
- if buffer, ok := r.(interface {
- Bytes() []byte
- }); ok {
- return &StreamLexer{
- err: io.EOF,
- buf: buffer.Bytes(),
- }
- }
- return &StreamLexer{
- r: r,
- buf: make([]byte, 0, size),
- }
- }
- func (z *StreamLexer) read(pos int) byte {
- if z.err != nil {
- return 0
- }
- // free unused bytes
- z.pool.free(z.free)
- z.free = 0
- // get new buffer
- c := cap(z.buf)
- p := pos - z.start + 1
- if 2*p > c { // if the token is larger than half the buffer, increase buffer size
- c = 2*c + p
- }
- d := len(z.buf) - z.start
- buf := z.pool.swap(z.buf[:z.start], c)
- copy(buf[:d], z.buf[z.start:]) // copy the left-overs (unfinished token) from the old buffer
- // read in new data for the rest of the buffer
- var n int
- for pos-z.start >= d && z.err == nil {
- n, z.err = z.r.Read(buf[d:cap(buf)])
- d += n
- }
- pos -= z.start
- z.pos -= z.start
- z.start, z.buf = 0, buf[:d]
- if pos >= d {
- return 0
- }
- return z.buf[pos]
- }
- // Err returns the error returned from io.Reader. It may still return valid bytes for a while though.
- func (z *StreamLexer) Err() error {
- if z.err == io.EOF && z.pos < len(z.buf) {
- return nil
- }
- return z.err
- }
- // Free frees up bytes of length n from previously shifted tokens.
- // Each call to Shift should at one point be followed by a call to Free with a length returned by ShiftLen.
- func (z *StreamLexer) Free(n int) {
- z.free += n
- }
- // Peek returns the ith byte relative to the end position and possibly does an allocation.
- // Peek returns zero when an error has occurred, Err returns the error.
- // TODO: inline function
- func (z *StreamLexer) Peek(pos int) byte {
- pos += z.pos
- if uint(pos) < uint(len(z.buf)) { // uint for BCE
- return z.buf[pos]
- }
- return z.read(pos)
- }
- // PeekRune returns the rune and rune length of the ith byte relative to the end position.
- func (z *StreamLexer) PeekRune(pos int) (rune, int) {
- // from unicode/utf8
- c := z.Peek(pos)
- if c < 0xC0 {
- return rune(c), 1
- } else if c < 0xE0 {
- return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2
- } else if c < 0xF0 {
- return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3
- }
- return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4
- }
- // Move advances the position.
- func (z *StreamLexer) Move(n int) {
- z.pos += n
- }
- // Pos returns a mark to which can be rewinded.
- func (z *StreamLexer) Pos() int {
- return z.pos - z.start
- }
- // Rewind rewinds the position to the given position.
- func (z *StreamLexer) Rewind(pos int) {
- z.pos = z.start + pos
- }
- // Lexeme returns the bytes of the current selection.
- func (z *StreamLexer) Lexeme() []byte {
- return z.buf[z.start:z.pos]
- }
- // Skip collapses the position to the end of the selection.
- func (z *StreamLexer) Skip() {
- z.start = z.pos
- }
- // Shift returns the bytes of the current selection and collapses the position to the end of the selection.
- // It also returns the number of bytes we moved since the last call to Shift. This can be used in calls to Free.
- func (z *StreamLexer) Shift() []byte {
- if z.pos > len(z.buf) { // make sure we peeked at least as much as we shift
- z.read(z.pos - 1)
- }
- b := z.buf[z.start:z.pos]
- z.start = z.pos
- return b
- }
- // ShiftLen returns the number of bytes moved since the last call to ShiftLen. This can be used in calls to Free because it takes into account multiple Shifts or Skips.
- func (z *StreamLexer) ShiftLen() int {
- n := z.start - z.prevStart
- z.prevStart = z.start
- return n
- }
|