1package buffer
2
3import (
4 "io"
5)
6
7type block struct {
8 buf []byte
9 next int // index in pool plus one
10 active bool
11}
12
13type bufferPool struct {
14 pool []block
15 head int // index in pool plus one
16 tail int // index in pool plus one
17
18 pos int // byte pos in tail
19}
20
21func (z *bufferPool) swap(oldBuf []byte, size int) []byte {
22 // find new buffer that can be reused
23 swap := -1
24 for i := 0; i < len(z.pool); i++ {
25 if !z.pool[i].active && size <= cap(z.pool[i].buf) {
26 swap = i
27 break
28 }
29 }
30 if swap == -1 { // no free buffer found for reuse
31 if z.tail == 0 && z.pos >= len(oldBuf) && size <= cap(oldBuf) { // but we can reuse the current buffer!
32 z.pos -= len(oldBuf)
33 return oldBuf[:0]
34 }
35 // allocate new
36 z.pool = append(z.pool, block{make([]byte, 0, size), 0, true})
37 swap = len(z.pool) - 1
38 }
39
40 newBuf := z.pool[swap].buf
41
42 // put current buffer into pool
43 z.pool[swap] = block{oldBuf, 0, true}
44 if z.head != 0 {
45 z.pool[z.head-1].next = swap + 1
46 }
47 z.head = swap + 1
48 if z.tail == 0 {
49 z.tail = swap + 1
50 }
51
52 return newBuf[:0]
53}
54
55func (z *bufferPool) free(n int) {
56 z.pos += n
57 // move the tail over to next buffers
58 for z.tail != 0 && z.pos >= len(z.pool[z.tail-1].buf) {
59 z.pos -= len(z.pool[z.tail-1].buf)
60 newTail := z.pool[z.tail-1].next
61 z.pool[z.tail-1].active = false // after this, any thread may pick up the inactive buffer, so it can't be used anymore
62 z.tail = newTail
63 }
64 if z.tail == 0 {
65 z.head = 0
66 }
67}
68
69// StreamLexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader.
70// It keeps data in-memory until Free, taking a byte length, is called to move beyond the data.
71type StreamLexer struct {
72 r io.Reader
73 err error
74
75 pool bufferPool
76
77 buf []byte
78 start int // index in buf
79 pos int // index in buf
80 prevStart int
81
82 free int
83}
84
85// NewStreamLexer returns a new StreamLexer for a given io.Reader with a 4kB estimated buffer size.
86// If the io.Reader implements Bytes, that buffer is used instead.
87func NewStreamLexer(r io.Reader) *StreamLexer {
88 return NewStreamLexerSize(r, defaultBufSize)
89}
90
91// NewStreamLexerSize returns a new StreamLexer for a given io.Reader and estimated required buffer size.
92// If the io.Reader implements Bytes, that buffer is used instead.
93func NewStreamLexerSize(r io.Reader, size int) *StreamLexer {
94 // if reader has the bytes in memory already, use that instead
95 if buffer, ok := r.(interface {
96 Bytes() []byte
97 }); ok {
98 return &StreamLexer{
99 err: io.EOF,
100 buf: buffer.Bytes(),
101 }
102 }
103 return &StreamLexer{
104 r: r,
105 buf: make([]byte, 0, size),
106 }
107}
108
109func (z *StreamLexer) read(pos int) byte {
110 if z.err != nil {
111 return 0
112 }
113
114 // free unused bytes
115 z.pool.free(z.free)
116 z.free = 0
117
118 // get new buffer
119 c := cap(z.buf)
120 p := pos - z.start + 1
121 if 2*p > c { // if the token is larger than half the buffer, increase buffer size
122 c = 2*c + p
123 }
124 d := len(z.buf) - z.start
125 buf := z.pool.swap(z.buf[:z.start], c)
126 copy(buf[:d], z.buf[z.start:]) // copy the left-overs (unfinished token) from the old buffer
127
128 // read in new data for the rest of the buffer
129 var n int
130 for pos-z.start >= d && z.err == nil {
131 n, z.err = z.r.Read(buf[d:cap(buf)])
132 d += n
133 }
134 pos -= z.start
135 z.pos -= z.start
136 z.start, z.buf = 0, buf[:d]
137 if pos >= d {
138 return 0
139 }
140 return z.buf[pos]
141}
142
143// Err returns the error returned from io.Reader. It may still return valid bytes for a while though.
144func (z *StreamLexer) Err() error {
145 if z.err == io.EOF && z.pos < len(z.buf) {
146 return nil
147 }
148 return z.err
149}
150
151// Free frees up bytes of length n from previously shifted tokens.
152// Each call to Shift should at one point be followed by a call to Free with a length returned by ShiftLen.
153func (z *StreamLexer) Free(n int) {
154 z.free += n
155}
156
157// Peek returns the ith byte relative to the end position and possibly does an allocation.
158// Peek returns zero when an error has occurred, Err returns the error.
159// TODO: inline function
160func (z *StreamLexer) Peek(pos int) byte {
161 pos += z.pos
162 if uint(pos) < uint(len(z.buf)) { // uint for BCE
163 return z.buf[pos]
164 }
165 return z.read(pos)
166}
167
168// PeekRune returns the rune and rune length of the ith byte relative to the end position.
169func (z *StreamLexer) PeekRune(pos int) (rune, int) {
170 // from unicode/utf8
171 c := z.Peek(pos)
172 if c < 0xC0 {
173 return rune(c), 1
174 } else if c < 0xE0 {
175 return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2
176 } else if c < 0xF0 {
177 return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3
178 }
179 return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4
180}
181
182// Move advances the position.
183func (z *StreamLexer) Move(n int) {
184 z.pos += n
185}
186
187// Pos returns a mark to which can be rewinded.
188func (z *StreamLexer) Pos() int {
189 return z.pos - z.start
190}
191
192// Rewind rewinds the position to the given position.
193func (z *StreamLexer) Rewind(pos int) {
194 z.pos = z.start + pos
195}
196
197// Lexeme returns the bytes of the current selection.
198func (z *StreamLexer) Lexeme() []byte {
199 return z.buf[z.start:z.pos]
200}
201
202// Skip collapses the position to the end of the selection.
203func (z *StreamLexer) Skip() {
204 z.start = z.pos
205}
206
207// Shift returns the bytes of the current selection and collapses the position to the end of the selection.
208// It also returns the number of bytes we moved since the last call to Shift. This can be used in calls to Free.
209func (z *StreamLexer) Shift() []byte {
210 if z.pos > len(z.buf) { // make sure we peeked at least as much as we shift
211 z.read(z.pos - 1)
212 }
213 b := z.buf[z.start:z.pos]
214 z.start = z.pos
215 return b
216}
217
218// ShiftLen returns the number of bytes moved since the last call to ShiftLen. This can be used in calls to Free because it takes into account multiple Shifts or Skips.
219func (z *StreamLexer) ShiftLen() int {
220 n := z.start - z.prevStart
221 z.prevStart = z.start
222 return n
223}