1package jsoncolor
  2
  3// Tokenizer is an iterator-style type which can be used to progressively parse
  4// through a json input.
  5//
  6// Tokenizing json is useful to build highly efficient parsing operations, for
  7// example when doing tranformations on-the-fly where as the program reads the
  8// input and produces the transformed json to an output buffer.
  9//
 10// Here is a common pattern to use a tokenizer:
 11//
 12//	for t := json.NewTokenizer(b); t.Next(); {
 13//		switch t.Delim {
 14//		case '{':
 15//			...
 16//		case '}':
 17//			...
 18//		case '[':
 19//			...
 20//		case ']':
 21//			...
 22//		case ':':
 23//			...
 24//		case ',':
 25//			...
 26//		}
 27//
 28//		switch {
 29//		case t.Value.String():
 30//			...
 31//		case t.Value.Null():
 32//			...
 33//		case t.Value.True():
 34//			...
 35//		case t.Value.False():
 36//			...
 37//		case t.Value.Number():
 38//			...
 39//		}
 40//	}
 41//
 42type Tokenizer struct {
 43	// When the tokenizer is positioned on a json delimiter this field is not
 44	// zero. In this case the possible values are '{', '}', '[', ']', ':', and
 45	// ','.
 46	Delim Delim
 47
 48	// This field contains the raw json token that the tokenizer is pointing at.
 49	// When Delim is not zero, this field is a single-element byte slice
 50	// continaing the delimiter value. Otherwise, this field holds values like
 51	// null, true, false, numbers, or quoted strings.
 52	Value RawValue
 53
 54	// When the tokenizer has encountered invalid content this field is not nil.
 55	Err error
 56
 57	// When the value is in an array or an object, this field contains the depth
 58	// at which it was found.
 59	Depth int
 60
 61	// When the value is in an array or an object, this field contains the
 62	// position at which it was found.
 63	Index int
 64
 65	// This field is true when the value is the key of an object.
 66	IsKey bool
 67
 68	// Tells whether the next value read from the tokenizer is a key.
 69	isKey bool
 70
 71	// json input for the tokenizer, pointing at data right after the last token
 72	// that was parsed.
 73	json []byte
 74
 75	// Stack used to track entering and leaving arrays, objects, and keys. The
 76	// buffer is used as a AppendPre-allocated space to
 77	stack  []state
 78	buffer [8]state
 79}
 80
 81type state struct {
 82	typ scope
 83	len int
 84}
 85
 86type scope int
 87
 88const (
 89	inArray scope = iota
 90	inObject
 91)
 92
 93// NewTokenizer constructs a new Tokenizer which reads its json input from b.
 94func NewTokenizer(b []byte) *Tokenizer { return &Tokenizer{json: b} }
 95
 96// Reset erases the state of t and re-initializes it with the json input from b.
 97func (t *Tokenizer) Reset(b []byte) {
 98	// This code is similar to:
 99	//
100	//	*t = Tokenizer{json: b}
101	//
102	// However, it does not compile down to an invocation of duff-copy, which
103	// ends up being slower and prevents the code from being inlined.
104	t.Delim = 0
105	t.Value = nil
106	t.Err = nil
107	t.Depth = 0
108	t.Index = 0
109	t.IsKey = false
110	t.isKey = false
111	t.json = b
112	t.stack = nil
113}
114
115// Next returns a new tokenizer pointing at the next token, or the zero-value of
116// Tokenizer if the end of the json input has been reached.
117//
118// If the tokenizer encounters malformed json while reading the input the method
119// sets t.Err to an error describing the issue, and returns false. Once an error
120// has been encountered, the tokenizer will always fail until its input is
121// cleared by a call to its Reset method.
122func (t *Tokenizer) Next() bool {
123	if t.Err != nil {
124		return false
125	}
126
127	// Inlined code of the skipSpaces function, this give a ~15% speed boost.
128	i := 0
129skipLoop:
130	for _, c := range t.json {
131		switch c {
132		case sp, ht, nl, cr:
133			i++
134		default:
135			break skipLoop
136		}
137	}
138
139	if t.json = t.json[i:]; len(t.json) == 0 {
140		t.Reset(nil)
141		return false
142	}
143
144	var d Delim
145	var v []byte
146	var b []byte
147	var err error
148
149	switch t.json[0] {
150	case '"':
151		v, b, err = parseString(t.json)
152	case 'n':
153		v, b, err = parseNull(t.json)
154	case 't':
155		v, b, err = parseTrue(t.json)
156	case 'f':
157		v, b, err = parseFalse(t.json)
158	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
159		v, b, err = parseNumber(t.json)
160	case '{', '}', '[', ']', ':', ',':
161		d, v, b = Delim(t.json[0]), t.json[:1], t.json[1:]
162	default:
163		v, b, err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0])
164	}
165
166	t.Delim = d
167	t.Value = RawValue(v)
168	t.Err = err
169	t.Depth = t.depth()
170	t.Index = t.index()
171	t.IsKey = d == 0 && t.isKey
172	t.json = b
173
174	if d != 0 {
175		switch d {
176		case '{':
177			t.isKey = true
178			t.push(inObject)
179		case '[':
180			t.push(inArray)
181		case '}':
182			err = t.pop(inObject)
183			t.Depth--
184			t.Index = t.index()
185		case ']':
186			err = t.pop(inArray)
187			t.Depth--
188			t.Index = t.index()
189		case ':':
190			t.isKey = false
191		case ',':
192			if t.is(inObject) {
193				t.isKey = true
194			}
195			t.stack[len(t.stack)-1].len++
196		}
197	}
198
199	return (d != 0 || len(v) != 0) && err == nil
200}
201
202func (t *Tokenizer) push(typ scope) {
203	if t.stack == nil {
204		t.stack = t.buffer[:0]
205	}
206	t.stack = append(t.stack, state{typ: typ, len: 1})
207}
208
209func (t *Tokenizer) pop(expect scope) error {
210	i := len(t.stack) - 1
211
212	if i < 0 {
213		return syntaxError(t.json, "found unexpected character while tokenizing json input")
214	}
215
216	if found := t.stack[i]; expect != found.typ {
217		return syntaxError(t.json, "found unexpected character while tokenizing json input")
218	}
219
220	t.stack = t.stack[:i]
221	return nil
222}
223
224func (t *Tokenizer) is(typ scope) bool {
225	return len(t.stack) != 0 && t.stack[len(t.stack)-1].typ == typ
226}
227
228func (t *Tokenizer) depth() int {
229	return len(t.stack)
230}
231
232func (t *Tokenizer) index() int {
233	if len(t.stack) == 0 {
234		return 0
235	}
236	return t.stack[len(t.stack)-1].len - 1
237}
238
239// RawValue represents a raw json value, it is intended to carry null, true,
240// false, number, and string values only.
241type RawValue []byte
242
243// String returns true if v contains a string value.
244func (v RawValue) String() bool { return len(v) != 0 && v[0] == '"' }
245
246// Null returns true if v contains a null value.
247func (v RawValue) Null() bool { return len(v) != 0 && v[0] == 'n' }
248
249// True returns true if v contains a true value.
250func (v RawValue) True() bool { return len(v) != 0 && v[0] == 't' }
251
252// False returns true if v contains a false value.
253func (v RawValue) False() bool { return len(v) != 0 && v[0] == 'f' }
254
255// Number returns true if v contains a number value.
256func (v RawValue) Number() bool {
257	if len(v) != 0 {
258		switch v[0] {
259		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
260			return true
261		}
262	}
263	return false
264}
265
266// AppendUnquote writes the unquoted version of the string value in v into b.
267func (v RawValue) AppendUnquote(b []byte) []byte {
268	s, r, new, err := parseStringUnquote([]byte(v), b)
269	if err != nil {
270		panic(err)
271	}
272	if len(r) != 0 {
273		panic(syntaxError(r, "unexpected trailing tokens after json value"))
274	}
275	if new {
276		b = s
277	} else {
278		b = append(b, s...)
279	}
280	return b
281}
282
283// Unquote returns the unquoted version of the string value in v.
284func (v RawValue) Unquote() []byte {
285	return v.AppendUnquote(nil)
286}