1package buffer
  2
  3import (
  4	"io"
  5	"io/ioutil"
  6)
  7
  8var nullBuffer = []byte{0}
  9
 10// Lexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader.
 11// It keeps data in-memory until Free, taking a byte length, is called to move beyond the data.
 12type Lexer struct {
 13	buf   []byte
 14	pos   int // index in buf
 15	start int // index in buf
 16	err   error
 17
 18	restore func()
 19}
 20
 21// NewLexer returns a new Lexer for a given io.Reader, and uses ioutil.ReadAll to read it into a byte slice.
 22// If the io.Reader implements Bytes, that is used instead.
 23// It will append a NULL at the end of the buffer.
 24func NewLexer(r io.Reader) *Lexer {
 25	var b []byte
 26	if r != nil {
 27		if buffer, ok := r.(interface {
 28			Bytes() []byte
 29		}); ok {
 30			b = buffer.Bytes()
 31		} else {
 32			var err error
 33			b, err = ioutil.ReadAll(r)
 34			if err != nil {
 35				return &Lexer{
 36					buf: nullBuffer,
 37					err: err,
 38				}
 39			}
 40		}
 41	}
 42	return NewLexerBytes(b)
 43}
 44
 45// NewLexerBytes returns a new Lexer for a given byte slice, and appends NULL at the end.
 46// To avoid reallocation, make sure the capacity has room for one more byte.
 47func NewLexerBytes(b []byte) *Lexer {
 48	z := &Lexer{
 49		buf: b,
 50	}
 51
 52	n := len(b)
 53	if n == 0 {
 54		z.buf = nullBuffer
 55	} else {
 56		// Append NULL to buffer, but try to avoid reallocation
 57		if cap(b) > n {
 58			// Overwrite next byte but restore when done
 59			b = b[:n+1]
 60			c := b[n]
 61			b[n] = 0
 62
 63			z.buf = b
 64			z.restore = func() {
 65				b[n] = c
 66			}
 67		} else {
 68			z.buf = append(b, 0)
 69		}
 70	}
 71	return z
 72}
 73
 74// Restore restores the replaced byte past the end of the buffer by NULL.
 75func (z *Lexer) Restore() {
 76	if z.restore != nil {
 77		z.restore()
 78		z.restore = nil
 79	}
 80}
 81
 82// Err returns the error returned from io.Reader or io.EOF when the end has been reached.
 83func (z *Lexer) Err() error {
 84	return z.PeekErr(0)
 85}
 86
 87// PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err().
 88func (z *Lexer) PeekErr(pos int) error {
 89	if z.err != nil {
 90		return z.err
 91	} else if z.pos+pos >= len(z.buf)-1 {
 92		return io.EOF
 93	}
 94	return nil
 95}
 96
 97// Peek returns the ith byte relative to the end position.
 98// Peek returns 0 when an error has occurred, Err returns the error.
 99func (z *Lexer) Peek(pos int) byte {
100	pos += z.pos
101	return z.buf[pos]
102}
103
104// PeekRune returns the rune and rune length of the ith byte relative to the end position.
105func (z *Lexer) PeekRune(pos int) (rune, int) {
106	// from unicode/utf8
107	c := z.Peek(pos)
108	if c < 0xC0 || z.Peek(pos+1) == 0 {
109		return rune(c), 1
110	} else if c < 0xE0 || z.Peek(pos+2) == 0 {
111		return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2
112	} else if c < 0xF0 || z.Peek(pos+3) == 0 {
113		return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3
114	}
115	return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4
116}
117
118// Move advances the position.
119func (z *Lexer) Move(n int) {
120	z.pos += n
121}
122
123// Pos returns a mark to which can be rewinded.
124func (z *Lexer) Pos() int {
125	return z.pos - z.start
126}
127
128// Rewind rewinds the position to the given position.
129func (z *Lexer) Rewind(pos int) {
130	z.pos = z.start + pos
131}
132
133// Lexeme returns the bytes of the current selection.
134func (z *Lexer) Lexeme() []byte {
135	return z.buf[z.start:z.pos:z.pos]
136}
137
138// Skip collapses the position to the end of the selection.
139func (z *Lexer) Skip() {
140	z.start = z.pos
141}
142
143// Shift returns the bytes of the current selection and collapses the position to the end of the selection.
144func (z *Lexer) Shift() []byte {
145	b := z.buf[z.start:z.pos:z.pos]
146	z.start = z.pos
147	return b
148}
149
150// Offset returns the character position in the buffer.
151func (z *Lexer) Offset() int {
152	return z.pos
153}
154
155// Bytes returns the underlying buffer.
156func (z *Lexer) Bytes() []byte {
157	return z.buf[: len(z.buf)-1 : len(z.buf)-1]
158}
159
160// Reset resets position to the underlying buffer.
161func (z *Lexer) Reset() {
162	z.start = 0
163	z.pos = 0
164}