1package buffer
2
3import (
4 "io"
5 "io/ioutil"
6)
7
8var nullBuffer = []byte{0}
9
10// Lexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader.
11// It keeps data in-memory until Free, taking a byte length, is called to move beyond the data.
12type Lexer struct {
13 buf []byte
14 pos int // index in buf
15 start int // index in buf
16 err error
17
18 restore func()
19}
20
21// NewLexer returns a new Lexer for a given io.Reader, and uses ioutil.ReadAll to read it into a byte slice.
22// If the io.Reader implements Bytes, that is used instead.
23// It will append a NULL at the end of the buffer.
24func NewLexer(r io.Reader) *Lexer {
25 var b []byte
26 if r != nil {
27 if buffer, ok := r.(interface {
28 Bytes() []byte
29 }); ok {
30 b = buffer.Bytes()
31 } else {
32 var err error
33 b, err = ioutil.ReadAll(r)
34 if err != nil {
35 return &Lexer{
36 buf: nullBuffer,
37 err: err,
38 }
39 }
40 }
41 }
42 return NewLexerBytes(b)
43}
44
45// NewLexerBytes returns a new Lexer for a given byte slice, and appends NULL at the end.
46// To avoid reallocation, make sure the capacity has room for one more byte.
47func NewLexerBytes(b []byte) *Lexer {
48 z := &Lexer{
49 buf: b,
50 }
51
52 n := len(b)
53 if n == 0 {
54 z.buf = nullBuffer
55 } else {
56 // Append NULL to buffer, but try to avoid reallocation
57 if cap(b) > n {
58 // Overwrite next byte but restore when done
59 b = b[:n+1]
60 c := b[n]
61 b[n] = 0
62
63 z.buf = b
64 z.restore = func() {
65 b[n] = c
66 }
67 } else {
68 z.buf = append(b, 0)
69 }
70 }
71 return z
72}
73
74// Restore restores the replaced byte past the end of the buffer by NULL.
75func (z *Lexer) Restore() {
76 if z.restore != nil {
77 z.restore()
78 z.restore = nil
79 }
80}
81
82// Err returns the error returned from io.Reader or io.EOF when the end has been reached.
83func (z *Lexer) Err() error {
84 return z.PeekErr(0)
85}
86
87// PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err().
88func (z *Lexer) PeekErr(pos int) error {
89 if z.err != nil {
90 return z.err
91 } else if z.pos+pos >= len(z.buf)-1 {
92 return io.EOF
93 }
94 return nil
95}
96
97// Peek returns the ith byte relative to the end position.
98// Peek returns 0 when an error has occurred, Err returns the error.
99func (z *Lexer) Peek(pos int) byte {
100 pos += z.pos
101 return z.buf[pos]
102}
103
104// PeekRune returns the rune and rune length of the ith byte relative to the end position.
105func (z *Lexer) PeekRune(pos int) (rune, int) {
106 // from unicode/utf8
107 c := z.Peek(pos)
108 if c < 0xC0 || z.Peek(pos+1) == 0 {
109 return rune(c), 1
110 } else if c < 0xE0 || z.Peek(pos+2) == 0 {
111 return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2
112 } else if c < 0xF0 || z.Peek(pos+3) == 0 {
113 return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3
114 }
115 return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4
116}
117
118// Move advances the position.
119func (z *Lexer) Move(n int) {
120 z.pos += n
121}
122
123// Pos returns a mark to which can be rewinded.
124func (z *Lexer) Pos() int {
125 return z.pos - z.start
126}
127
128// Rewind rewinds the position to the given position.
129func (z *Lexer) Rewind(pos int) {
130 z.pos = z.start + pos
131}
132
133// Lexeme returns the bytes of the current selection.
134func (z *Lexer) Lexeme() []byte {
135 return z.buf[z.start:z.pos:z.pos]
136}
137
138// Skip collapses the position to the end of the selection.
139func (z *Lexer) Skip() {
140 z.start = z.pos
141}
142
143// Shift returns the bytes of the current selection and collapses the position to the end of the selection.
144func (z *Lexer) Shift() []byte {
145 b := z.buf[z.start:z.pos:z.pos]
146 z.start = z.pos
147 return b
148}
149
150// Offset returns the character position in the buffer.
151func (z *Lexer) Offset() int {
152 return z.pos
153}
154
155// Bytes returns the underlying buffer.
156func (z *Lexer) Bytes() []byte {
157 return z.buf[: len(z.buf)-1 : len(z.buf)-1]
158}
159
160// Reset resets position to the underlying buffer.
161func (z *Lexer) Reset() {
162 z.start = 0
163 z.pos = 0
164}