1package jsoncolor
2
3// Tokenizer is an iterator-style type which can be used to progressively parse
4// through a json input.
5//
6// Tokenizing json is useful to build highly efficient parsing operations, for
7// example when doing tranformations on-the-fly where as the program reads the
8// input and produces the transformed json to an output buffer.
9//
10// Here is a common pattern to use a tokenizer:
11//
12// for t := json.NewTokenizer(b); t.Next(); {
13// switch t.Delim {
14// case '{':
15// ...
16// case '}':
17// ...
18// case '[':
19// ...
20// case ']':
21// ...
22// case ':':
23// ...
24// case ',':
25// ...
26// }
27//
28// switch {
29// case t.Value.String():
30// ...
31// case t.Value.Null():
32// ...
33// case t.Value.True():
34// ...
35// case t.Value.False():
36// ...
37// case t.Value.Number():
38// ...
39// }
40// }
41//
42type Tokenizer struct {
43 // When the tokenizer is positioned on a json delimiter this field is not
44 // zero. In this case the possible values are '{', '}', '[', ']', ':', and
45 // ','.
46 Delim Delim
47
48 // This field contains the raw json token that the tokenizer is pointing at.
49 // When Delim is not zero, this field is a single-element byte slice
50 // continaing the delimiter value. Otherwise, this field holds values like
51 // null, true, false, numbers, or quoted strings.
52 Value RawValue
53
54 // When the tokenizer has encountered invalid content this field is not nil.
55 Err error
56
57 // When the value is in an array or an object, this field contains the depth
58 // at which it was found.
59 Depth int
60
61 // When the value is in an array or an object, this field contains the
62 // position at which it was found.
63 Index int
64
65 // This field is true when the value is the key of an object.
66 IsKey bool
67
68 // Tells whether the next value read from the tokenizer is a key.
69 isKey bool
70
71 // json input for the tokenizer, pointing at data right after the last token
72 // that was parsed.
73 json []byte
74
75 // Stack used to track entering and leaving arrays, objects, and keys. The
76 // buffer is used as a AppendPre-allocated space to
77 stack []state
78 buffer [8]state
79}
80
81type state struct {
82 typ scope
83 len int
84}
85
86type scope int
87
88const (
89 inArray scope = iota
90 inObject
91)
92
93// NewTokenizer constructs a new Tokenizer which reads its json input from b.
94func NewTokenizer(b []byte) *Tokenizer { return &Tokenizer{json: b} }
95
96// Reset erases the state of t and re-initializes it with the json input from b.
97func (t *Tokenizer) Reset(b []byte) {
98 // This code is similar to:
99 //
100 // *t = Tokenizer{json: b}
101 //
102 // However, it does not compile down to an invocation of duff-copy, which
103 // ends up being slower and prevents the code from being inlined.
104 t.Delim = 0
105 t.Value = nil
106 t.Err = nil
107 t.Depth = 0
108 t.Index = 0
109 t.IsKey = false
110 t.isKey = false
111 t.json = b
112 t.stack = nil
113}
114
115// Next returns a new tokenizer pointing at the next token, or the zero-value of
116// Tokenizer if the end of the json input has been reached.
117//
118// If the tokenizer encounters malformed json while reading the input the method
119// sets t.Err to an error describing the issue, and returns false. Once an error
120// has been encountered, the tokenizer will always fail until its input is
121// cleared by a call to its Reset method.
122func (t *Tokenizer) Next() bool {
123 if t.Err != nil {
124 return false
125 }
126
127 // Inlined code of the skipSpaces function, this give a ~15% speed boost.
128 i := 0
129skipLoop:
130 for _, c := range t.json {
131 switch c {
132 case sp, ht, nl, cr:
133 i++
134 default:
135 break skipLoop
136 }
137 }
138
139 if t.json = t.json[i:]; len(t.json) == 0 {
140 t.Reset(nil)
141 return false
142 }
143
144 var d Delim
145 var v []byte
146 var b []byte
147 var err error
148
149 switch t.json[0] {
150 case '"':
151 v, b, err = parseString(t.json)
152 case 'n':
153 v, b, err = parseNull(t.json)
154 case 't':
155 v, b, err = parseTrue(t.json)
156 case 'f':
157 v, b, err = parseFalse(t.json)
158 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
159 v, b, err = parseNumber(t.json)
160 case '{', '}', '[', ']', ':', ',':
161 d, v, b = Delim(t.json[0]), t.json[:1], t.json[1:]
162 default:
163 v, b, err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0])
164 }
165
166 t.Delim = d
167 t.Value = RawValue(v)
168 t.Err = err
169 t.Depth = t.depth()
170 t.Index = t.index()
171 t.IsKey = d == 0 && t.isKey
172 t.json = b
173
174 if d != 0 {
175 switch d {
176 case '{':
177 t.isKey = true
178 t.push(inObject)
179 case '[':
180 t.push(inArray)
181 case '}':
182 err = t.pop(inObject)
183 t.Depth--
184 t.Index = t.index()
185 case ']':
186 err = t.pop(inArray)
187 t.Depth--
188 t.Index = t.index()
189 case ':':
190 t.isKey = false
191 case ',':
192 if t.is(inObject) {
193 t.isKey = true
194 }
195 t.stack[len(t.stack)-1].len++
196 }
197 }
198
199 return (d != 0 || len(v) != 0) && err == nil
200}
201
202func (t *Tokenizer) push(typ scope) {
203 if t.stack == nil {
204 t.stack = t.buffer[:0]
205 }
206 t.stack = append(t.stack, state{typ: typ, len: 1})
207}
208
209func (t *Tokenizer) pop(expect scope) error {
210 i := len(t.stack) - 1
211
212 if i < 0 {
213 return syntaxError(t.json, "found unexpected character while tokenizing json input")
214 }
215
216 if found := t.stack[i]; expect != found.typ {
217 return syntaxError(t.json, "found unexpected character while tokenizing json input")
218 }
219
220 t.stack = t.stack[:i]
221 return nil
222}
223
224func (t *Tokenizer) is(typ scope) bool {
225 return len(t.stack) != 0 && t.stack[len(t.stack)-1].typ == typ
226}
227
228func (t *Tokenizer) depth() int {
229 return len(t.stack)
230}
231
232func (t *Tokenizer) index() int {
233 if len(t.stack) == 0 {
234 return 0
235 }
236 return t.stack[len(t.stack)-1].len - 1
237}
238
239// RawValue represents a raw json value, it is intended to carry null, true,
240// false, number, and string values only.
241type RawValue []byte
242
243// String returns true if v contains a string value.
244func (v RawValue) String() bool { return len(v) != 0 && v[0] == '"' }
245
246// Null returns true if v contains a null value.
247func (v RawValue) Null() bool { return len(v) != 0 && v[0] == 'n' }
248
249// True returns true if v contains a true value.
250func (v RawValue) True() bool { return len(v) != 0 && v[0] == 't' }
251
252// False returns true if v contains a false value.
253func (v RawValue) False() bool { return len(v) != 0 && v[0] == 'f' }
254
255// Number returns true if v contains a number value.
256func (v RawValue) Number() bool {
257 if len(v) != 0 {
258 switch v[0] {
259 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
260 return true
261 }
262 }
263 return false
264}
265
266// AppendUnquote writes the unquoted version of the string value in v into b.
267func (v RawValue) AppendUnquote(b []byte) []byte {
268 s, r, new, err := parseStringUnquote([]byte(v), b)
269 if err != nil {
270 panic(err)
271 }
272 if len(r) != 0 {
273 panic(syntaxError(r, "unexpected trailing tokens after json value"))
274 }
275 if new {
276 b = s
277 } else {
278 b = append(b, s...)
279 }
280 return b
281}
282
283// Unquote returns the unquoted version of the string value in v.
284func (v RawValue) Unquote() []byte {
285 return v.AppendUnquote(nil)
286}