1package jsoncolor
2
3import (
4 "bytes"
5 "math"
6 "reflect"
7 "unicode"
8 "unicode/utf16"
9 "unicode/utf8"
10)
11
12// All spaces characters defined in the json specification.
13const (
14 sp = ' '
15 ht = '\t'
16 nl = '\n'
17 cr = '\r'
18)
19
20const (
21 escape = '\\'
22 quote = '"' //nolint:varcheck // from original code
23)
24
25func skipSpaces(b []byte) []byte {
26 b, _ = skipSpacesN(b)
27 return b
28}
29
30func skipSpacesN(b []byte) ([]byte, int) {
31 for i := range b {
32 switch b[i] {
33 case sp, ht, nl, cr:
34 default:
35 return b[i:], i
36 }
37 }
38 return nil, 0
39}
40
41// parseInt parses a decimanl representation of an int64 from b.
42//
43// The function is equivalent to calling strconv.ParseInt(string(b), 10, 64) but
44// it prevents Go from making a memory allocation for converting a byte slice to
45// a string (escape analysis fails due to the error returned by strconv.ParseInt).
46//
47// Because it only works with base 10 the function is also significantly faster
48// than strconv.ParseInt.
49func parseInt(b []byte, t reflect.Type) (int64, []byte, error) {
50 var value int64
51 var count int
52
53 if len(b) == 0 {
54 return 0, b, syntaxError(b, "cannot decode integer from an empty input")
55 }
56
57 if b[0] == '-' {
58 const max = math.MinInt64
59 const lim = max / 10
60
61 if len(b) == 1 {
62 return 0, b, syntaxError(b, "cannot decode integer from '-'")
63 }
64
65 if len(b) > 2 && b[1] == '0' && '0' <= b[2] && b[2] <= '9' {
66 return 0, b, syntaxError(b, "invalid leading character '0' in integer")
67 }
68
69 for _, d := range b[1:] {
70 if !(d >= '0' && d <= '9') {
71 if count == 0 {
72 b, err := inputError(b, t)
73 return 0, b, err
74 }
75 break
76 }
77
78 if value < lim {
79 return 0, b, unmarshalOverflow(b, t)
80 }
81
82 value *= 10
83 x := int64(d - '0')
84
85 if value < (max + x) {
86 return 0, b, unmarshalOverflow(b, t)
87 }
88
89 value -= x
90 count++
91 }
92
93 count++
94 } else {
95 const max = math.MaxInt64
96 const lim = max / 10
97
98 if len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' {
99 return 0, b, syntaxError(b, "invalid leading character '0' in integer")
100 }
101
102 for _, d := range b {
103 if !(d >= '0' && d <= '9') {
104 if count == 0 {
105 b, err := inputError(b, t)
106 return 0, b, err
107 }
108 break
109 }
110 x := int64(d - '0')
111
112 if value > lim {
113 return 0, b, unmarshalOverflow(b, t)
114 }
115
116 if value *= 10; value > (max - x) {
117 return 0, b, unmarshalOverflow(b, t)
118 }
119
120 value += x
121 count++
122 }
123 }
124
125 if count < len(b) {
126 switch b[count] {
127 case '.', 'e', 'E': // was this actually a float?
128 v, r, err := parseNumber(b)
129 if err != nil {
130 v, r = b[:count+1], b[count+1:]
131 }
132 return 0, r, unmarshalTypeError(v, t)
133 }
134 }
135
136 return value, b[count:], nil
137}
138
139// parseUint is like parseInt but for unsigned integers.
140func parseUint(b []byte, t reflect.Type) (uint64, []byte, error) {
141 const max = math.MaxUint64
142 const lim = max / 10
143
144 var value uint64
145 var count int
146
147 if len(b) == 0 {
148 return 0, b, syntaxError(b, "cannot decode integer value from an empty input")
149 }
150
151 if len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' {
152 return 0, b, syntaxError(b, "invalid leading character '0' in integer")
153 }
154
155 for _, d := range b {
156 if !(d >= '0' && d <= '9') {
157 if count == 0 {
158 b, err := inputError(b, t)
159 return 0, b, err
160 }
161 break
162 }
163 x := uint64(d - '0')
164
165 if value > lim {
166 return 0, b, unmarshalOverflow(b, t)
167 }
168
169 if value *= 10; value > (max - x) {
170 return 0, b, unmarshalOverflow(b, t)
171 }
172
173 value += x
174 count++
175 }
176
177 if count < len(b) {
178 switch b[count] {
179 case '.', 'e', 'E': // was this actually a float?
180 v, r, err := parseNumber(b)
181 if err != nil {
182 v, r = b[:count+1], b[count+1:]
183 }
184 return 0, r, unmarshalTypeError(v, t)
185 }
186 }
187
188 return value, b[count:], nil
189}
190
191// parseUintHex parses a hexadecimanl representation of a uint64 from b.
192//
193// The function is equivalent to calling strconv.ParseUint(string(b), 16, 64) but
194// it prevents Go from making a memory allocation for converting a byte slice to
195// a string (escape analysis fails due to the error returned by strconv.ParseUint).
196//
197// Because it only works with base 16 the function is also significantly faster
198// than strconv.ParseUint.
199func parseUintHex(b []byte) (uint64, []byte, error) {
200 const max = math.MaxUint64
201 const lim = max / 0x10
202
203 var value uint64
204 var count int
205
206 if len(b) == 0 {
207 return 0, b, syntaxError(b, "cannot decode hexadecimal value from an empty input")
208 }
209
210parseLoop:
211 for i, d := range b {
212 var x uint64
213
214 switch {
215 case d >= '0' && d <= '9':
216 x = uint64(d - '0')
217
218 case d >= 'A' && d <= 'F':
219 x = uint64(d-'A') + 0xA
220
221 case d >= 'a' && d <= 'f':
222 x = uint64(d-'a') + 0xA
223
224 default:
225 if i == 0 {
226 return 0, b, syntaxError(b, "expected hexadecimal digit but found '%c'", d)
227 }
228 break parseLoop
229 }
230
231 if value > lim {
232 return 0, b, syntaxError(b, "hexadecimal value out of range")
233 }
234
235 if value *= 0x10; value > (max - x) {
236 return 0, b, syntaxError(b, "hexadecimal value out of range")
237 }
238
239 value += x
240 count++
241 }
242
243 return value, b[count:], nil
244}
245
246func parseNull(b []byte) ([]byte, []byte, error) {
247 if hasNullPrefix(b) {
248 return b[:4], b[4:], nil
249 }
250 if len(b) < 4 {
251 return nil, b[len(b):], unexpectedEOF(b)
252 }
253 return nil, b, syntaxError(b, "expected 'null' but found invalid token")
254}
255
256func parseTrue(b []byte) ([]byte, []byte, error) {
257 if hasTruePrefix(b) {
258 return b[:4], b[4:], nil
259 }
260 if len(b) < 4 {
261 return nil, b[len(b):], unexpectedEOF(b)
262 }
263 return nil, b, syntaxError(b, "expected 'true' but found invalid token")
264}
265
266func parseFalse(b []byte) ([]byte, []byte, error) {
267 if hasFalsePrefix(b) {
268 return b[:5], b[5:], nil
269 }
270 if len(b) < 5 {
271 return nil, b[len(b):], unexpectedEOF(b)
272 }
273 return nil, b, syntaxError(b, "expected 'false' but found invalid token")
274}
275
276func parseNumber(b []byte) (v, r []byte, err error) {
277 if len(b) == 0 {
278 r, err = b, unexpectedEOF(b)
279 return
280 }
281
282 i := 0
283 // sign
284 if b[i] == '-' {
285 i++
286 }
287
288 if i == len(b) {
289 r, err = b[i:], syntaxError(b, "missing number value after sign")
290 return
291 }
292
293 if b[i] < '0' || b[i] > '9' {
294 r, err = b[i:], syntaxError(b, "expected digit but got '%c'", b[i])
295 return
296 }
297
298 // integer part
299 if b[i] == '0' {
300 i++
301 if i == len(b) || (b[i] != '.' && b[i] != 'e' && b[i] != 'E') {
302 v, r = b[:i], b[i:]
303 return
304 }
305 if '0' <= b[i] && b[i] <= '9' {
306 r, err = b[i:], syntaxError(b, "cannot decode number with leading '0' character")
307 return
308 }
309 }
310
311 for i < len(b) && '0' <= b[i] && b[i] <= '9' {
312 i++
313 }
314
315 // decimal part
316 if i < len(b) && b[i] == '.' {
317 i++
318 decimalStart := i
319
320 for i < len(b) {
321 if c := b[i]; !('0' <= c && c <= '9') {
322 if i == decimalStart {
323 r, err = b[i:], syntaxError(b, "expected digit but found '%c'", c)
324 return
325 }
326 break
327 }
328 i++
329 }
330
331 if i == decimalStart {
332 r, err = b[i:], syntaxError(b, "expected decimal part after '.'")
333 return
334 }
335 }
336
337 // exponent part
338 if i < len(b) && (b[i] == 'e' || b[i] == 'E') {
339 i++
340
341 if i < len(b) {
342 if c := b[i]; c == '+' || c == '-' {
343 i++
344 }
345 }
346
347 if i == len(b) {
348 r, err = b[i:], syntaxError(b, "missing exponent in number")
349 return
350 }
351
352 exponentStart := i
353
354 for i < len(b) {
355 if c := b[i]; !('0' <= c && c <= '9') {
356 if i == exponentStart {
357 err = syntaxError(b, "expected digit but found '%c'", c)
358 return
359 }
360 break
361 }
362 i++
363 }
364 }
365
366 v, r = b[:i], b[i:]
367 return
368}
369
370func parseUnicode(b []byte) (rune, int, error) {
371 if len(b) < 4 {
372 return 0, 0, syntaxError(b, "unicode code point must have at least 4 characters")
373 }
374
375 u, r, err := parseUintHex(b[:4])
376 if err != nil {
377 return 0, 0, syntaxError(b, "parsing unicode code point: %s", err)
378 }
379
380 if len(r) != 0 {
381 return 0, 0, syntaxError(b, "invalid unicode code point")
382 }
383
384 return rune(u), 4, nil
385}
386
387func parseStringFast(b []byte) ([]byte, []byte, bool, error) {
388 if len(b) < 2 {
389 return nil, b[len(b):], false, unexpectedEOF(b)
390 }
391 if b[0] != '"' {
392 return nil, b, false, syntaxError(b, "expected '\"' at the beginning of a string value")
393 }
394
395 n := bytes.IndexByte(b[1:], '"') + 2
396 if n <= 1 {
397 return nil, b[len(b):], false, syntaxError(b, "missing '\"' at the end of a string value")
398 }
399 if bytes.IndexByte(b[1:n], '\\') < 0 && asciiValidPrint(b[1:n]) {
400 return b[:n], b[n:], false, nil
401 }
402
403 for i := 1; i < len(b); i++ {
404 switch b[i] {
405 case '\\':
406 if i++; i < len(b) {
407 switch b[i] {
408 case '"', '\\', '/', 'n', 'r', 't', 'f', 'b':
409 case 'u':
410 _, n, err := parseUnicode(b[i+1:])
411 if err != nil {
412 return nil, b, false, err
413 }
414 i += n
415 default:
416 return nil, b, false, syntaxError(b, "invalid character '%c' in string escape code", b[i])
417 }
418 }
419
420 case '"':
421 return b[:i+1], b[i+1:], true, nil
422
423 default:
424 if b[i] < 0x20 {
425 return nil, b, false, syntaxError(b, "invalid character '%c' in string escape code", b[i])
426 }
427 }
428 }
429
430 return nil, b[len(b):], false, syntaxError(b, "missing '\"' at the end of a string value")
431}
432
433func parseString(b []byte) ([]byte, []byte, error) {
434 s, b, _, err := parseStringFast(b)
435 return s, b, err
436}
437
438func parseStringUnquote(b []byte, r []byte) ([]byte, []byte, bool, error) {
439 s, b, escaped, err := parseStringFast(b)
440 if err != nil {
441 return s, b, false, err
442 }
443
444 s = s[1 : len(s)-1] // trim the quotes
445
446 if !escaped {
447 return s, b, false, nil
448 }
449
450 if r == nil {
451 r = make([]byte, 0, len(s))
452 }
453
454 for len(s) != 0 {
455 i := bytes.IndexByte(s, '\\')
456
457 if i < 0 {
458 r = appendCoerceInvalidUTF8(r, s)
459 break
460 }
461
462 r = appendCoerceInvalidUTF8(r, s[:i])
463 s = s[i+1:]
464
465 c := s[0]
466 switch c {
467 case '"', '\\', '/':
468 // simple escaped character
469 case 'n':
470 c = '\n'
471
472 case 'r':
473 c = '\r'
474
475 case 't':
476 c = '\t'
477
478 case 'b':
479 c = '\b'
480
481 case 'f':
482 c = '\f'
483
484 case 'u':
485 s = s[1:]
486
487 r1, n1, err := parseUnicode(s)
488 if err != nil {
489 return r, b, true, err
490 }
491 s = s[n1:]
492
493 if utf16.IsSurrogate(r1) {
494 if !hasPrefix(s, `\u`) {
495 r1 = unicode.ReplacementChar
496 } else {
497 r2, n2, err := parseUnicode(s[2:])
498 if err != nil {
499 return r, b, true, err
500 }
501 if r1 = utf16.DecodeRune(r1, r2); r1 != unicode.ReplacementChar {
502 s = s[2+n2:]
503 }
504 }
505 }
506
507 r = appendRune(r, r1)
508 continue
509
510 default: // not sure what this escape sequence is
511 return r, b, false, syntaxError(s, "invalid character '%c' in string escape code", c)
512 }
513
514 r = append(r, c)
515 s = s[1:]
516 }
517
518 return r, b, true, nil
519}
520
521func appendRune(b []byte, r rune) []byte {
522 n := len(b)
523 b = append(b, 0, 0, 0, 0)
524 return b[:n+utf8.EncodeRune(b[n:], r)]
525}
526
527func appendCoerceInvalidUTF8(b []byte, s []byte) []byte {
528 c := [4]byte{}
529
530 for _, r := range string(s) {
531 b = append(b, c[:utf8.EncodeRune(c[:], r)]...)
532 }
533
534 return b
535}
536
537func parseObject(b []byte) ([]byte, []byte, error) {
538 if len(b) < 2 {
539 return nil, b[len(b):], unexpectedEOF(b)
540 }
541
542 if b[0] != '{' {
543 return nil, b, syntaxError(b, "expected '{' at the beginning of an object value")
544 }
545
546 var err error
547 var a = b
548 var n = len(b)
549 var i = 0
550
551 b = b[1:]
552 for {
553 b = skipSpaces(b)
554
555 if len(b) == 0 {
556 return nil, b, syntaxError(b, "cannot decode object from empty input")
557 }
558
559 if b[0] == '}' {
560 j := (n - len(b)) + 1
561 return a[:j], a[j:], nil
562 }
563
564 if i != 0 {
565 if len(b) == 0 {
566 return nil, b, syntaxError(b, "unexpected EOF after object field value")
567 }
568 if b[0] != ',' {
569 return nil, b, syntaxError(b, "expected ',' after object field value but found '%c'", b[0])
570 }
571 b = skipSpaces(b[1:])
572 if len(b) == 0 {
573 return nil, b, unexpectedEOF(b)
574 }
575 if b[0] == '}' {
576 return nil, b, syntaxError(b, "unexpected trailing comma after object field")
577 }
578 }
579
580 _, b, err = parseString(b)
581 if err != nil {
582 return nil, b, err
583 }
584 b = skipSpaces(b)
585
586 if len(b) == 0 {
587 return nil, b, syntaxError(b, "unexpected EOF after object field key")
588 }
589 if b[0] != ':' {
590 return nil, b, syntaxError(b, "expected ':' after object field key but found '%c'", b[0])
591 }
592 b = skipSpaces(b[1:])
593
594 _, b, err = parseValue(b)
595 if err != nil {
596 return nil, b, err
597 }
598
599 i++
600 }
601}
602
603func parseArray(b []byte) ([]byte, []byte, error) {
604 if len(b) < 2 {
605 return nil, b[len(b):], unexpectedEOF(b)
606 }
607
608 if b[0] != '[' {
609 return nil, b, syntaxError(b, "expected '[' at the beginning of array value")
610 }
611
612 var err error
613 var a = b
614 var n = len(b)
615 var i = 0
616
617 b = b[1:]
618 for {
619 b = skipSpaces(b)
620
621 if len(b) == 0 {
622 return nil, b, syntaxError(b, "missing closing ']' after array value")
623 }
624
625 if b[0] == ']' {
626 j := (n - len(b)) + 1
627 return a[:j], a[j:], nil
628 }
629
630 if i != 0 {
631 if len(b) == 0 {
632 return nil, b, syntaxError(b, "unexpected EOF after array element")
633 }
634 if b[0] != ',' {
635 return nil, b, syntaxError(b, "expected ',' after array element but found '%c'", b[0])
636 }
637 b = skipSpaces(b[1:])
638 if len(b) == 0 {
639 return nil, b, unexpectedEOF(b)
640 }
641 if b[0] == ']' {
642 return nil, b, syntaxError(b, "unexpected trailing comma after object field")
643 }
644 }
645
646 _, b, err = parseValue(b)
647 if err != nil {
648 return nil, b, err
649 }
650
651 i++
652 }
653}
654
655func parseValue(b []byte) ([]byte, []byte, error) {
656 if len(b) != 0 {
657 switch b[0] {
658 case '{':
659 return parseObject(b)
660 case '[':
661 return parseArray(b)
662 case '"':
663 return parseString(b)
664 case 'n':
665 return parseNull(b)
666 case 't':
667 return parseTrue(b)
668 case 'f':
669 return parseFalse(b)
670 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
671 return parseNumber(b)
672 default:
673 return nil, b, syntaxError(b, "invalid character '%c' looking for beginning of value", b[0])
674 }
675 }
676 return nil, b, syntaxError(b, "unexpected end of JSON input")
677}
678
679func hasNullPrefix(b []byte) bool {
680 return len(b) >= 4 && string(b[:4]) == "null"
681}
682
683func hasTruePrefix(b []byte) bool {
684 return len(b) >= 4 && string(b[:4]) == "true"
685}
686
687func hasFalsePrefix(b []byte) bool {
688 return len(b) >= 5 && string(b[:5]) == "false"
689}
690
691func hasPrefix(b []byte, s string) bool {
692 return len(b) >= len(s) && s == string(b[:len(s)])
693}
694
695func hasLeadingSign(b []byte) bool {
696 return len(b) > 0 && (b[0] == '+' || b[0] == '-')
697}
698
699func hasLeadingZeroes(b []byte) bool {
700 if hasLeadingSign(b) {
701 b = b[1:]
702 }
703 return len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9'
704}
705
706func appendToLower(b, s []byte) []byte {
707 if asciiValid(s) { // fast path for ascii strings
708 i := 0
709
710 for j := range s {
711 c := s[j]
712
713 if 'A' <= c && c <= 'Z' {
714 b = append(b, s[i:j]...)
715 b = append(b, c+('a'-'A'))
716 i = j + 1
717 }
718 }
719
720 return append(b, s[i:]...)
721 }
722
723 for _, r := range string(s) {
724 b = appendRune(b, foldRune(r))
725 }
726
727 return b
728}
729
730func foldRune(r rune) rune {
731 if r = unicode.SimpleFold(r); 'A' <= r && r <= 'Z' {
732 r = r + ('a' - 'A')
733 }
734 return r
735}