hepi - vendor/github.com/neilotoole/jsoncolor/parse.go

Path: hepi / vendor / github.com / neilotoole / jsoncolor / parse.go (raw)
  1package jsoncolor
  2
  3import (
  4	"bytes"
  5	"math"
  6	"reflect"
  7	"unicode"
  8	"unicode/utf16"
  9	"unicode/utf8"
 10)
 11
 12// All spaces characters defined in the json specification.
 13const (
 14	sp = ' '
 15	ht = '\t'
 16	nl = '\n'
 17	cr = '\r'
 18)
 19
 20const (
 21	escape = '\\'
 22	quote  = '"' //nolint:varcheck // from original code
 23)
 24
 25func skipSpaces(b []byte) []byte {
 26	b, _ = skipSpacesN(b)
 27	return b
 28}
 29
 30func skipSpacesN(b []byte) ([]byte, int) {
 31	for i := range b {
 32		switch b[i] {
 33		case sp, ht, nl, cr:
 34		default:
 35			return b[i:], i
 36		}
 37	}
 38	return nil, 0
 39}
 40
 41// parseInt parses a decimanl representation of an int64 from b.
 42//
 43// The function is equivalent to calling strconv.ParseInt(string(b), 10, 64) but
 44// it prevents Go from making a memory allocation for converting a byte slice to
 45// a string (escape analysis fails due to the error returned by strconv.ParseInt).
 46//
 47// Because it only works with base 10 the function is also significantly faster
 48// than strconv.ParseInt.
 49func parseInt(b []byte, t reflect.Type) (int64, []byte, error) {
 50	var value int64
 51	var count int
 52
 53	if len(b) == 0 {
 54		return 0, b, syntaxError(b, "cannot decode integer from an empty input")
 55	}
 56
 57	if b[0] == '-' {
 58		const max = math.MinInt64
 59		const lim = max / 10
 60
 61		if len(b) == 1 {
 62			return 0, b, syntaxError(b, "cannot decode integer from '-'")
 63		}
 64
 65		if len(b) > 2 && b[1] == '0' && '0' <= b[2] && b[2] <= '9' {
 66			return 0, b, syntaxError(b, "invalid leading character '0' in integer")
 67		}
 68
 69		for _, d := range b[1:] {
 70			if !(d >= '0' && d <= '9') {
 71				if count == 0 {
 72					b, err := inputError(b, t)
 73					return 0, b, err
 74				}
 75				break
 76			}
 77
 78			if value < lim {
 79				return 0, b, unmarshalOverflow(b, t)
 80			}
 81
 82			value *= 10
 83			x := int64(d - '0')
 84
 85			if value < (max + x) {
 86				return 0, b, unmarshalOverflow(b, t)
 87			}
 88
 89			value -= x
 90			count++
 91		}
 92
 93		count++
 94	} else {
 95		const max = math.MaxInt64
 96		const lim = max / 10
 97
 98		if len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' {
 99			return 0, b, syntaxError(b, "invalid leading character '0' in integer")
100		}
101
102		for _, d := range b {
103			if !(d >= '0' && d <= '9') {
104				if count == 0 {
105					b, err := inputError(b, t)
106					return 0, b, err
107				}
108				break
109			}
110			x := int64(d - '0')
111
112			if value > lim {
113				return 0, b, unmarshalOverflow(b, t)
114			}
115
116			if value *= 10; value > (max - x) {
117				return 0, b, unmarshalOverflow(b, t)
118			}
119
120			value += x
121			count++
122		}
123	}
124
125	if count < len(b) {
126		switch b[count] {
127		case '.', 'e', 'E': // was this actually a float?
128			v, r, err := parseNumber(b)
129			if err != nil {
130				v, r = b[:count+1], b[count+1:]
131			}
132			return 0, r, unmarshalTypeError(v, t)
133		}
134	}
135
136	return value, b[count:], nil
137}
138
139// parseUint is like parseInt but for unsigned integers.
140func parseUint(b []byte, t reflect.Type) (uint64, []byte, error) {
141	const max = math.MaxUint64
142	const lim = max / 10
143
144	var value uint64
145	var count int
146
147	if len(b) == 0 {
148		return 0, b, syntaxError(b, "cannot decode integer value from an empty input")
149	}
150
151	if len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' {
152		return 0, b, syntaxError(b, "invalid leading character '0' in integer")
153	}
154
155	for _, d := range b {
156		if !(d >= '0' && d <= '9') {
157			if count == 0 {
158				b, err := inputError(b, t)
159				return 0, b, err
160			}
161			break
162		}
163		x := uint64(d - '0')
164
165		if value > lim {
166			return 0, b, unmarshalOverflow(b, t)
167		}
168
169		if value *= 10; value > (max - x) {
170			return 0, b, unmarshalOverflow(b, t)
171		}
172
173		value += x
174		count++
175	}
176
177	if count < len(b) {
178		switch b[count] {
179		case '.', 'e', 'E': // was this actually a float?
180			v, r, err := parseNumber(b)
181			if err != nil {
182				v, r = b[:count+1], b[count+1:]
183			}
184			return 0, r, unmarshalTypeError(v, t)
185		}
186	}
187
188	return value, b[count:], nil
189}
190
191// parseUintHex parses a hexadecimanl representation of a uint64 from b.
192//
193// The function is equivalent to calling strconv.ParseUint(string(b), 16, 64) but
194// it prevents Go from making a memory allocation for converting a byte slice to
195// a string (escape analysis fails due to the error returned by strconv.ParseUint).
196//
197// Because it only works with base 16 the function is also significantly faster
198// than strconv.ParseUint.
199func parseUintHex(b []byte) (uint64, []byte, error) {
200	const max = math.MaxUint64
201	const lim = max / 0x10
202
203	var value uint64
204	var count int
205
206	if len(b) == 0 {
207		return 0, b, syntaxError(b, "cannot decode hexadecimal value from an empty input")
208	}
209
210parseLoop:
211	for i, d := range b {
212		var x uint64
213
214		switch {
215		case d >= '0' && d <= '9':
216			x = uint64(d - '0')
217
218		case d >= 'A' && d <= 'F':
219			x = uint64(d-'A') + 0xA
220
221		case d >= 'a' && d <= 'f':
222			x = uint64(d-'a') + 0xA
223
224		default:
225			if i == 0 {
226				return 0, b, syntaxError(b, "expected hexadecimal digit but found '%c'", d)
227			}
228			break parseLoop
229		}
230
231		if value > lim {
232			return 0, b, syntaxError(b, "hexadecimal value out of range")
233		}
234
235		if value *= 0x10; value > (max - x) {
236			return 0, b, syntaxError(b, "hexadecimal value out of range")
237		}
238
239		value += x
240		count++
241	}
242
243	return value, b[count:], nil
244}
245
246func parseNull(b []byte) ([]byte, []byte, error) {
247	if hasNullPrefix(b) {
248		return b[:4], b[4:], nil
249	}
250	if len(b) < 4 {
251		return nil, b[len(b):], unexpectedEOF(b)
252	}
253	return nil, b, syntaxError(b, "expected 'null' but found invalid token")
254}
255
256func parseTrue(b []byte) ([]byte, []byte, error) {
257	if hasTruePrefix(b) {
258		return b[:4], b[4:], nil
259	}
260	if len(b) < 4 {
261		return nil, b[len(b):], unexpectedEOF(b)
262	}
263	return nil, b, syntaxError(b, "expected 'true' but found invalid token")
264}
265
266func parseFalse(b []byte) ([]byte, []byte, error) {
267	if hasFalsePrefix(b) {
268		return b[:5], b[5:], nil
269	}
270	if len(b) < 5 {
271		return nil, b[len(b):], unexpectedEOF(b)
272	}
273	return nil, b, syntaxError(b, "expected 'false' but found invalid token")
274}
275
276func parseNumber(b []byte) (v, r []byte, err error) {
277	if len(b) == 0 {
278		r, err = b, unexpectedEOF(b)
279		return
280	}
281
282	i := 0
283	// sign
284	if b[i] == '-' {
285		i++
286	}
287
288	if i == len(b) {
289		r, err = b[i:], syntaxError(b, "missing number value after sign")
290		return
291	}
292
293	if b[i] < '0' || b[i] > '9' {
294		r, err = b[i:], syntaxError(b, "expected digit but got '%c'", b[i])
295		return
296	}
297
298	// integer part
299	if b[i] == '0' {
300		i++
301		if i == len(b) || (b[i] != '.' && b[i] != 'e' && b[i] != 'E') {
302			v, r = b[:i], b[i:]
303			return
304		}
305		if '0' <= b[i] && b[i] <= '9' {
306			r, err = b[i:], syntaxError(b, "cannot decode number with leading '0' character")
307			return
308		}
309	}
310
311	for i < len(b) && '0' <= b[i] && b[i] <= '9' {
312		i++
313	}
314
315	// decimal part
316	if i < len(b) && b[i] == '.' {
317		i++
318		decimalStart := i
319
320		for i < len(b) {
321			if c := b[i]; !('0' <= c && c <= '9') {
322				if i == decimalStart {
323					r, err = b[i:], syntaxError(b, "expected digit but found '%c'", c)
324					return
325				}
326				break
327			}
328			i++
329		}
330
331		if i == decimalStart {
332			r, err = b[i:], syntaxError(b, "expected decimal part after '.'")
333			return
334		}
335	}
336
337	// exponent part
338	if i < len(b) && (b[i] == 'e' || b[i] == 'E') {
339		i++
340
341		if i < len(b) {
342			if c := b[i]; c == '+' || c == '-' {
343				i++
344			}
345		}
346
347		if i == len(b) {
348			r, err = b[i:], syntaxError(b, "missing exponent in number")
349			return
350		}
351
352		exponentStart := i
353
354		for i < len(b) {
355			if c := b[i]; !('0' <= c && c <= '9') {
356				if i == exponentStart {
357					err = syntaxError(b, "expected digit but found '%c'", c)
358					return
359				}
360				break
361			}
362			i++
363		}
364	}
365
366	v, r = b[:i], b[i:]
367	return
368}
369
370func parseUnicode(b []byte) (rune, int, error) {
371	if len(b) < 4 {
372		return 0, 0, syntaxError(b, "unicode code point must have at least 4 characters")
373	}
374
375	u, r, err := parseUintHex(b[:4])
376	if err != nil {
377		return 0, 0, syntaxError(b, "parsing unicode code point: %s", err)
378	}
379
380	if len(r) != 0 {
381		return 0, 0, syntaxError(b, "invalid unicode code point")
382	}
383
384	return rune(u), 4, nil
385}
386
387func parseStringFast(b []byte) ([]byte, []byte, bool, error) {
388	if len(b) < 2 {
389		return nil, b[len(b):], false, unexpectedEOF(b)
390	}
391	if b[0] != '"' {
392		return nil, b, false, syntaxError(b, "expected '\"' at the beginning of a string value")
393	}
394
395	n := bytes.IndexByte(b[1:], '"') + 2
396	if n <= 1 {
397		return nil, b[len(b):], false, syntaxError(b, "missing '\"' at the end of a string value")
398	}
399	if bytes.IndexByte(b[1:n], '\\') < 0 && asciiValidPrint(b[1:n]) {
400		return b[:n], b[n:], false, nil
401	}
402
403	for i := 1; i < len(b); i++ {
404		switch b[i] {
405		case '\\':
406			if i++; i < len(b) {
407				switch b[i] {
408				case '"', '\\', '/', 'n', 'r', 't', 'f', 'b':
409				case 'u':
410					_, n, err := parseUnicode(b[i+1:])
411					if err != nil {
412						return nil, b, false, err
413					}
414					i += n
415				default:
416					return nil, b, false, syntaxError(b, "invalid character '%c' in string escape code", b[i])
417				}
418			}
419
420		case '"':
421			return b[:i+1], b[i+1:], true, nil
422
423		default:
424			if b[i] < 0x20 {
425				return nil, b, false, syntaxError(b, "invalid character '%c' in string escape code", b[i])
426			}
427		}
428	}
429
430	return nil, b[len(b):], false, syntaxError(b, "missing '\"' at the end of a string value")
431}
432
433func parseString(b []byte) ([]byte, []byte, error) {
434	s, b, _, err := parseStringFast(b)
435	return s, b, err
436}
437
438func parseStringUnquote(b []byte, r []byte) ([]byte, []byte, bool, error) {
439	s, b, escaped, err := parseStringFast(b)
440	if err != nil {
441		return s, b, false, err
442	}
443
444	s = s[1 : len(s)-1] // trim the quotes
445
446	if !escaped {
447		return s, b, false, nil
448	}
449
450	if r == nil {
451		r = make([]byte, 0, len(s))
452	}
453
454	for len(s) != 0 {
455		i := bytes.IndexByte(s, '\\')
456
457		if i < 0 {
458			r = appendCoerceInvalidUTF8(r, s)
459			break
460		}
461
462		r = appendCoerceInvalidUTF8(r, s[:i])
463		s = s[i+1:]
464
465		c := s[0]
466		switch c {
467		case '"', '\\', '/':
468			// simple escaped character
469		case 'n':
470			c = '\n'
471
472		case 'r':
473			c = '\r'
474
475		case 't':
476			c = '\t'
477
478		case 'b':
479			c = '\b'
480
481		case 'f':
482			c = '\f'
483
484		case 'u':
485			s = s[1:]
486
487			r1, n1, err := parseUnicode(s)
488			if err != nil {
489				return r, b, true, err
490			}
491			s = s[n1:]
492
493			if utf16.IsSurrogate(r1) {
494				if !hasPrefix(s, `\u`) {
495					r1 = unicode.ReplacementChar
496				} else {
497					r2, n2, err := parseUnicode(s[2:])
498					if err != nil {
499						return r, b, true, err
500					}
501					if r1 = utf16.DecodeRune(r1, r2); r1 != unicode.ReplacementChar {
502						s = s[2+n2:]
503					}
504				}
505			}
506
507			r = appendRune(r, r1)
508			continue
509
510		default: // not sure what this escape sequence is
511			return r, b, false, syntaxError(s, "invalid character '%c' in string escape code", c)
512		}
513
514		r = append(r, c)
515		s = s[1:]
516	}
517
518	return r, b, true, nil
519}
520
521func appendRune(b []byte, r rune) []byte {
522	n := len(b)
523	b = append(b, 0, 0, 0, 0)
524	return b[:n+utf8.EncodeRune(b[n:], r)]
525}
526
527func appendCoerceInvalidUTF8(b []byte, s []byte) []byte {
528	c := [4]byte{}
529
530	for _, r := range string(s) {
531		b = append(b, c[:utf8.EncodeRune(c[:], r)]...)
532	}
533
534	return b
535}
536
537func parseObject(b []byte) ([]byte, []byte, error) {
538	if len(b) < 2 {
539		return nil, b[len(b):], unexpectedEOF(b)
540	}
541
542	if b[0] != '{' {
543		return nil, b, syntaxError(b, "expected '{' at the beginning of an object value")
544	}
545
546	var err error
547	var a = b
548	var n = len(b)
549	var i = 0
550
551	b = b[1:]
552	for {
553		b = skipSpaces(b)
554
555		if len(b) == 0 {
556			return nil, b, syntaxError(b, "cannot decode object from empty input")
557		}
558
559		if b[0] == '}' {
560			j := (n - len(b)) + 1
561			return a[:j], a[j:], nil
562		}
563
564		if i != 0 {
565			if len(b) == 0 {
566				return nil, b, syntaxError(b, "unexpected EOF after object field value")
567			}
568			if b[0] != ',' {
569				return nil, b, syntaxError(b, "expected ',' after object field value but found '%c'", b[0])
570			}
571			b = skipSpaces(b[1:])
572			if len(b) == 0 {
573				return nil, b, unexpectedEOF(b)
574			}
575			if b[0] == '}' {
576				return nil, b, syntaxError(b, "unexpected trailing comma after object field")
577			}
578		}
579
580		_, b, err = parseString(b)
581		if err != nil {
582			return nil, b, err
583		}
584		b = skipSpaces(b)
585
586		if len(b) == 0 {
587			return nil, b, syntaxError(b, "unexpected EOF after object field key")
588		}
589		if b[0] != ':' {
590			return nil, b, syntaxError(b, "expected ':' after object field key but found '%c'", b[0])
591		}
592		b = skipSpaces(b[1:])
593
594		_, b, err = parseValue(b)
595		if err != nil {
596			return nil, b, err
597		}
598
599		i++
600	}
601}
602
603func parseArray(b []byte) ([]byte, []byte, error) {
604	if len(b) < 2 {
605		return nil, b[len(b):], unexpectedEOF(b)
606	}
607
608	if b[0] != '[' {
609		return nil, b, syntaxError(b, "expected '[' at the beginning of array value")
610	}
611
612	var err error
613	var a = b
614	var n = len(b)
615	var i = 0
616
617	b = b[1:]
618	for {
619		b = skipSpaces(b)
620
621		if len(b) == 0 {
622			return nil, b, syntaxError(b, "missing closing ']' after array value")
623		}
624
625		if b[0] == ']' {
626			j := (n - len(b)) + 1
627			return a[:j], a[j:], nil
628		}
629
630		if i != 0 {
631			if len(b) == 0 {
632				return nil, b, syntaxError(b, "unexpected EOF after array element")
633			}
634			if b[0] != ',' {
635				return nil, b, syntaxError(b, "expected ',' after array element but found '%c'", b[0])
636			}
637			b = skipSpaces(b[1:])
638			if len(b) == 0 {
639				return nil, b, unexpectedEOF(b)
640			}
641			if b[0] == ']' {
642				return nil, b, syntaxError(b, "unexpected trailing comma after object field")
643			}
644		}
645
646		_, b, err = parseValue(b)
647		if err != nil {
648			return nil, b, err
649		}
650
651		i++
652	}
653}
654
655func parseValue(b []byte) ([]byte, []byte, error) {
656	if len(b) != 0 {
657		switch b[0] {
658		case '{':
659			return parseObject(b)
660		case '[':
661			return parseArray(b)
662		case '"':
663			return parseString(b)
664		case 'n':
665			return parseNull(b)
666		case 't':
667			return parseTrue(b)
668		case 'f':
669			return parseFalse(b)
670		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
671			return parseNumber(b)
672		default:
673			return nil, b, syntaxError(b, "invalid character '%c' looking for beginning of value", b[0])
674		}
675	}
676	return nil, b, syntaxError(b, "unexpected end of JSON input")
677}
678
679func hasNullPrefix(b []byte) bool {
680	return len(b) >= 4 && string(b[:4]) == "null"
681}
682
683func hasTruePrefix(b []byte) bool {
684	return len(b) >= 4 && string(b[:4]) == "true"
685}
686
687func hasFalsePrefix(b []byte) bool {
688	return len(b) >= 5 && string(b[:5]) == "false"
689}
690
691func hasPrefix(b []byte, s string) bool {
692	return len(b) >= len(s) && s == string(b[:len(s)])
693}
694
695func hasLeadingSign(b []byte) bool {
696	return len(b) > 0 && (b[0] == '+' || b[0] == '-')
697}
698
699func hasLeadingZeroes(b []byte) bool {
700	if hasLeadingSign(b) {
701		b = b[1:]
702	}
703	return len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9'
704}
705
706func appendToLower(b, s []byte) []byte {
707	if asciiValid(s) { // fast path for ascii strings
708		i := 0
709
710		for j := range s {
711			c := s[j]
712
713			if 'A' <= c && c <= 'Z' {
714				b = append(b, s[i:j]...)
715				b = append(b, c+('a'-'A'))
716				i = j + 1
717			}
718		}
719
720		return append(b, s[i:]...)
721	}
722
723	for _, r := range string(s) {
724		b = appendRune(b, foldRune(r))
725	}
726
727	return b
728}
729
730func foldRune(r rune) rune {
731	if r = unicode.SimpleFold(r); 'A' <= r && r <= 'Z' {
732		r = r + ('a' - 'A')
733	}
734	return r
735}