Engage!

author: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-02-05 00:37:32 +0100
committer: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-02-05 00:37:32 +0100
commit: 6960aecc25400320adee1b8802a86839326e15b6 (patch)
tree: 334f7ca9491080a5e6f9a9747da77281c4958ba2 /vendor/golang.org/x/text/internal/language/parse.go
download: hepi-6960aecc25400320adee1b8802a86839326e15b6.tar.gz
1 files changed, 608 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/internal/language/parse.go b/vendor/golang.org/x/text/internal/language/parse.go
new file mode 100644
index 0000000..aad1e0a
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/language/parse.go
@@ -0,0 +1,608 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package language
+import (
+        "bytes"
+        "errors"
+        "fmt"
+        "sort"
+        "golang.org/x/text/internal/tag"
+)
+// isAlpha returns true if the byte is not a digit.
+// b must be an ASCII letter or digit.
+func isAlpha(b byte) bool {
+        return b > '9'
+}
+// isAlphaNum returns true if the string contains only ASCII letters or digits.
+func isAlphaNum(s []byte) bool {
+        for _, c := range s {
+                if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
+                        return false
+                }
+        }
+        return true
+}
+// ErrSyntax is returned by any of the parsing functions when the
+// input is not well-formed, according to BCP 47.
+// TODO: return the position at which the syntax error occurred?
+var ErrSyntax = errors.New("language: tag is not well-formed")
+// ErrDuplicateKey is returned when a tag contains the same key twice with
+// different values in the -u section.
+var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
+// ValueError is returned by any of the parsing functions when the
+// input is well-formed but the respective subtag is not recognized
+// as a valid value.
+type ValueError struct {
+        v [8]byte
+}
+// NewValueError creates a new ValueError.
+func NewValueError(tag []byte) ValueError {
+        var e ValueError
+        copy(e.v[:], tag)
+        return e
+}
+func (e ValueError) tag() []byte {
+        n := bytes.IndexByte(e.v[:], 0)
+        if n == -1 {
+                n = 8
+        }
+        return e.v[:n]
+}
+// Error implements the error interface.
+func (e ValueError) Error() string {
+        return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
+}
+// Subtag returns the subtag for which the error occurred.
+func (e ValueError) Subtag() string {
+        return string(e.tag())
+}
+// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
+type scanner struct {
+        b     []byte
+        bytes [max99thPercentileSize]byte
+        token []byte
+        start int // start position of the current token
+        end   int // end position of the current token
+        next  int // next point for scan
+        err   error
+        done  bool
+}
+func makeScannerString(s string) scanner {
+        scan := scanner{}
+        if len(s) <= len(scan.bytes) {
+                scan.b = scan.bytes[:copy(scan.bytes[:], s)]
+        } else {
+                scan.b = []byte(s)
+        }
+        scan.init()
+        return scan
+}
+// makeScanner returns a scanner using b as the input buffer.
+// b is not copied and may be modified by the scanner routines.
+func makeScanner(b []byte) scanner {
+        scan := scanner{b: b}
+        scan.init()
+        return scan
+}
+func (s *scanner) init() {
+        for i, c := range s.b {
+                if c == '_' {
+                        s.b[i] = '-'
+                }
+        }
+        s.scan()
+}
+// restToLower converts the string between start and end to lower case.
+func (s *scanner) toLower(start, end int) {
+        for i := start; i < end; i++ {
+                c := s.b[i]
+                if 'A' <= c && c <= 'Z' {
+                        s.b[i] += 'a' - 'A'
+                }
+        }
+}
+func (s *scanner) setError(e error) {
+        if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
+                s.err = e
+        }
+}
+// resizeRange shrinks or grows the array at position oldStart such that
+// a new string of size newSize can fit between oldStart and oldEnd.
+// Sets the scan point to after the resized range.
+func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
+        s.start = oldStart
+        if end := oldStart + newSize; end != oldEnd {
+                diff := end - oldEnd
+                var b []byte
+                if n := len(s.b) + diff; n > cap(s.b) {
+                        b = make([]byte, n)
+                        copy(b, s.b[:oldStart])
+                } else {
+                        b = s.b[:n]
+                }
+                copy(b[end:], s.b[oldEnd:])
+                s.b = b
+                s.next = end + (s.next - s.end)
+                s.end = end
+        }
+}
+// replace replaces the current token with repl.
+func (s *scanner) replace(repl string) {
+        s.resizeRange(s.start, s.end, len(repl))
+        copy(s.b[s.start:], repl)
+}
+// gobble removes the current token from the input.
+// Caller must call scan after calling gobble.
+func (s *scanner) gobble(e error) {
+        s.setError(e)
+        if s.start == 0 {
+                s.b = s.b[:+copy(s.b, s.b[s.next:])]
+                s.end = 0
+        } else {
+                s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
+                s.end = s.start - 1
+        }
+        s.next = s.start
+}
+// deleteRange removes the given range from s.b before the current token.
+func (s *scanner) deleteRange(start, end int) {
+        s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
+        diff := end - start
+        s.next -= diff
+        s.start -= diff
+        s.end -= diff
+}
+// scan parses the next token of a BCP 47 string.  Tokens that are larger
+// than 8 characters or include non-alphanumeric characters result in an error
+// and are gobbled and removed from the output.
+// It returns the end position of the last token consumed.
+func (s *scanner) scan() (end int) {
+        end = s.end
+        s.token = nil
+        for s.start = s.next; s.next < len(s.b); {
+                i := bytes.IndexByte(s.b[s.next:], '-')
+                if i == -1 {
+                        s.end = len(s.b)
+                        s.next = len(s.b)
+                        i = s.end - s.start
+                } else {
+                        s.end = s.next + i
+                        s.next = s.end + 1
+                }
+                token := s.b[s.start:s.end]
+                if i < 1 || i > 8 || !isAlphaNum(token) {
+                        s.gobble(ErrSyntax)
+                        continue
+                }
+                s.token = token
+                return end
+        }
+        if n := len(s.b); n > 0 && s.b[n-1] == '-' {
+                s.setError(ErrSyntax)
+                s.b = s.b[:len(s.b)-1]
+        }
+        s.done = true
+        return end
+}
+// acceptMinSize parses multiple tokens of the given size or greater.
+// It returns the end position of the last token consumed.
+func (s *scanner) acceptMinSize(min int) (end int) {
+        end = s.end
+        s.scan()
+        for ; len(s.token) >= min; s.scan() {
+                end = s.end
+        }
+        return end
+}
+// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
+// failed it returns an error and any part of the tag that could be parsed.
+// If parsing succeeded but an unknown value was found, it returns
+// ValueError. The Tag returned in this case is just stripped of the unknown
+// value. All other values are preserved. It accepts tags in the BCP 47 format
+// and extensions to this standard defined in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+func Parse(s string) (t Tag, err error) {
+        // TODO: consider supporting old-style locale key-value pairs.
+        if s == "" {
+                return Und, ErrSyntax
+        }
+        defer func() {
+                if recover() != nil {
+                        t = Und
+                        err = ErrSyntax
+                        return
+                }
+        }()
+        if len(s) <= maxAltTaglen {
+                b := [maxAltTaglen]byte{}
+                for i, c := range s {
+                        // Generating invalid UTF-8 is okay as it won't match.
+                        if 'A' <= c && c <= 'Z' {
+                                c += 'a' - 'A'
+                        } else if c == '_' {
+                                c = '-'
+                        }
+                        b[i] = byte(c)
+                }
+                if t, ok := grandfathered(b); ok {
+                        return t, nil
+                }
+        }
+        scan := makeScannerString(s)
+        return parse(&scan, s)
+}
+func parse(scan *scanner, s string) (t Tag, err error) {
+        t = Und
+        var end int
+        if n := len(scan.token); n <= 1 {
+                scan.toLower(0, len(scan.b))
+                if n == 0 || scan.token[0] != 'x' {
+                        return t, ErrSyntax
+                }
+                end = parseExtensions(scan)
+        } else if n >= 4 {
+                return Und, ErrSyntax
+        } else { // the usual case
+                t, end = parseTag(scan, true)
+                if n := len(scan.token); n == 1 {
+                        t.pExt = uint16(end)
+                        end = parseExtensions(scan)
+                } else if end < len(scan.b) {
+                        scan.setError(ErrSyntax)
+                        scan.b = scan.b[:end]
+                }
+        }
+        if int(t.pVariant) < len(scan.b) {
+                if end < len(s) {
+                        s = s[:end]
+                }
+                if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
+                        t.str = s
+                } else {
+                        t.str = string(scan.b)
+                }
+        } else {
+                t.pVariant, t.pExt = 0, 0
+        }
+        return t, scan.err
+}
+// parseTag parses language, script, region and variants.
+// It returns a Tag and the end position in the input that was parsed.
+// If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>.
+func parseTag(scan *scanner, doNorm bool) (t Tag, end int) {
+        var e error
+        // TODO: set an error if an unknown lang, script or region is encountered.
+        t.LangID, e = getLangID(scan.token)
+        scan.setError(e)
+        scan.replace(t.LangID.String())
+        langStart := scan.start
+        end = scan.scan()
+        for len(scan.token) == 3 && isAlpha(scan.token[0]) {
+                // From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
+                // to a tag of the form <extlang>.
+                if doNorm {
+                        lang, e := getLangID(scan.token)
+                        if lang != 0 {
+                                t.LangID = lang
+                                langStr := lang.String()
+                                copy(scan.b[langStart:], langStr)
+                                scan.b[langStart+len(langStr)] = '-'
+                                scan.start = langStart + len(langStr) + 1
+                        }
+                        scan.gobble(e)
+                }
+                end = scan.scan()
+        }
+        if len(scan.token) == 4 && isAlpha(scan.token[0]) {
+                t.ScriptID, e = getScriptID(script, scan.token)
+                if t.ScriptID == 0 {
+                        scan.gobble(e)
+                }
+                end = scan.scan()
+        }
+        if n := len(scan.token); n >= 2 && n <= 3 {
+                t.RegionID, e = getRegionID(scan.token)
+                if t.RegionID == 0 {
+                        scan.gobble(e)
+                } else {
+                        scan.replace(t.RegionID.String())
+                }
+                end = scan.scan()
+        }
+        scan.toLower(scan.start, len(scan.b))
+        t.pVariant = byte(end)
+        end = parseVariants(scan, end, t)
+        t.pExt = uint16(end)
+        return t, end
+}
+var separator = []byte{'-'}
+// parseVariants scans tokens as long as each token is a valid variant string.
+// Duplicate variants are removed.
+func parseVariants(scan *scanner, end int, t Tag) int {
+        start := scan.start
+        varIDBuf := [4]uint8{}
+        variantBuf := [4][]byte{}
+        varID := varIDBuf[:0]
+        variant := variantBuf[:0]
+        last := -1
+        needSort := false
+        for ; len(scan.token) >= 4; scan.scan() {
+                // TODO: measure the impact of needing this conversion and redesign
+                // the data structure if there is an issue.
+                v, ok := variantIndex[string(scan.token)]
+                if !ok {
+                        // unknown variant
+                        // TODO: allow user-defined variants?
+                        scan.gobble(NewValueError(scan.token))
+                        continue
+                }
+                varID = append(varID, v)
+                variant = append(variant, scan.token)
+                if !needSort {
+                        if last < int(v) {
+                                last = int(v)
+                        } else {
+                                needSort = true
+                                // There is no legal combinations of more than 7 variants
+                                // (and this is by no means a useful sequence).
+                                const maxVariants = 8
+                                if len(varID) > maxVariants {
+                                        break
+                                }
+                        }
+                }
+                end = scan.end
+        }
+        if needSort {
+                sort.Sort(variantsSort{varID, variant})
+                k, l := 0, -1
+                for i, v := range varID {
+                        w := int(v)
+                        if l == w {
+                                // Remove duplicates.
+                                continue
+                        }
+                        varID[k] = varID[i]
+                        variant[k] = variant[i]
+                        k++
+                        l = w
+                }
+                if str := bytes.Join(variant[:k], separator); len(str) == 0 {
+                        end = start - 1
+                } else {
+                        scan.resizeRange(start, end, len(str))
+                        copy(scan.b[scan.start:], str)
+                        end = scan.end
+                }
+        }
+        return end
+}
+type variantsSort struct {
+        i []uint8
+        v [][]byte
+}
+func (s variantsSort) Len() int {
+        return len(s.i)
+}
+func (s variantsSort) Swap(i, j int) {
+        s.i[i], s.i[j] = s.i[j], s.i[i]
+        s.v[i], s.v[j] = s.v[j], s.v[i]
+}
+func (s variantsSort) Less(i, j int) bool {
+        return s.i[i] < s.i[j]
+}
+type bytesSort struct {
+        b [][]byte
+        n int // first n bytes to compare
+}
+func (b bytesSort) Len() int {
+        return len(b.b)
+}
+func (b bytesSort) Swap(i, j int) {
+        b.b[i], b.b[j] = b.b[j], b.b[i]
+}
+func (b bytesSort) Less(i, j int) bool {
+        for k := 0; k < b.n; k++ {
+                if b.b[i][k] == b.b[j][k] {
+                        continue
+                }
+                return b.b[i][k] < b.b[j][k]
+        }
+        return false
+}
+// parseExtensions parses and normalizes the extensions in the buffer.
+// It returns the last position of scan.b that is part of any extension.
+// It also trims scan.b to remove excess parts accordingly.
+func parseExtensions(scan *scanner) int {
+        start := scan.start
+        exts := [][]byte{}
+        private := []byte{}
+        end := scan.end
+        for len(scan.token) == 1 {
+                extStart := scan.start
+                ext := scan.token[0]
+                end = parseExtension(scan)
+                extension := scan.b[extStart:end]
+                if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
+                        scan.setError(ErrSyntax)
+                        end = extStart
+                        continue
+                } else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
+                        scan.b = scan.b[:end]
+                        return end
+                } else if ext == 'x' {
+                        private = extension
+                        break
+                }
+                exts = append(exts, extension)
+        }
+        sort.Sort(bytesSort{exts, 1})
+        if len(private) > 0 {
+                exts = append(exts, private)
+        }
+        scan.b = scan.b[:start]
+        if len(exts) > 0 {
+                scan.b = append(scan.b, bytes.Join(exts, separator)...)
+        } else if start > 0 {
+                // Strip trailing '-'.
+                scan.b = scan.b[:start-1]
+        }
+        return end
+}
+// parseExtension parses a single extension and returns the position of
+// the extension end.
+func parseExtension(scan *scanner) int {
+        start, end := scan.start, scan.end
+        switch scan.token[0] {
+        case 'u': // https://www.ietf.org/rfc/rfc6067.txt
+                attrStart := end
+                scan.scan()
+                for last := []byte{}; len(scan.token) > 2; scan.scan() {
+                        if bytes.Compare(scan.token, last) != -1 {
+                                // Attributes are unsorted. Start over from scratch.
+                                p := attrStart + 1
+                                scan.next = p
+                                attrs := [][]byte{}
+                                for scan.scan(); len(scan.token) > 2; scan.scan() {
+                                        attrs = append(attrs, scan.token)
+                                        end = scan.end
+                                }
+                                sort.Sort(bytesSort{attrs, 3})
+                                copy(scan.b[p:], bytes.Join(attrs, separator))
+                                break
+                        }
+                        last = scan.token
+                        end = scan.end
+                }
+                // Scan key-type sequences. A key is of length 2 and may be followed
+                // by 0 or more "type" subtags from 3 to the maximum of 8 letters.
+                var last, key []byte
+                for attrEnd := end; len(scan.token) == 2; last = key {
+                        key = scan.token
+                        end = scan.end
+                        for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
+                                end = scan.end
+                        }
+                        // TODO: check key value validity
+                        if bytes.Compare(key, last) != 1 || scan.err != nil {
+                                // We have an invalid key or the keys are not sorted.
+                                // Start scanning keys from scratch and reorder.
+                                p := attrEnd + 1
+                                scan.next = p
+                                keys := [][]byte{}
+                                for scan.scan(); len(scan.token) == 2; {
+                                        keyStart := scan.start
+                                        end = scan.end
+                                        for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
+                                                end = scan.end
+                                        }
+                                        keys = append(keys, scan.b[keyStart:end])
+                                }
+                                sort.Stable(bytesSort{keys, 2})
+                                if n := len(keys); n > 0 {
+                                        k := 0
+                                        for i := 1; i < n; i++ {
+                                                if !bytes.Equal(keys[k][:2], keys[i][:2]) {
+                                                        k++
+                                                        keys[k] = keys[i]
+                                                } else if !bytes.Equal(keys[k], keys[i]) {
+                                                        scan.setError(ErrDuplicateKey)
+                                                }
+                                        }
+                                        keys = keys[:k+1]
+                                }
+                                reordered := bytes.Join(keys, separator)
+                                if e := p + len(reordered); e < end {
+                                        scan.deleteRange(e, end)
+                                        end = e
+                                }
+                                copy(scan.b[p:], reordered)
+                                break
+                        }
+                }
+        case 't': // https://www.ietf.org/rfc/rfc6497.txt
+                scan.scan()
+                if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
+                        _, end = parseTag(scan, false)
+                        scan.toLower(start, end)
+                }
+                for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
+                        end = scan.acceptMinSize(3)
+                }
+        case 'x':
+                end = scan.acceptMinSize(1)
+        default:
+                end = scan.acceptMinSize(2)
+        }
+        return end
+}
+// getExtension returns the name, body and end position of the extension.
+func getExtension(s string, p int) (end int, ext string) {
+        if s[p] == '-' {
+                p++
+        }
+        if s[p] == 'x' {
+                return len(s), s[p:]
+        }
+        end = nextExtension(s, p)
+        return end, s[p:end]
+}
+// nextExtension finds the next extension within the string, searching
+// for the -<char>- pattern from position p.
+// In the fast majority of cases, language tags will have at most
+// one extension and extensions tend to be small.
+func nextExtension(s string, p int) int {
+        for n := len(s) - 3; p < n; {
+                if s[p] == '-' {
+                        if s[p+2] == '-' {
+                                return p
+                        }
+                        p += 3
+                } else {
+                        p++
+                }
+        }
+        return len(s)
+}
author	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-02-05 00:37:32 +0100
committer	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-02-05 00:37:32 +0100
commit	6960aecc25400320adee1b8802a86839326e15b6 (patch)
tree	334f7ca9491080a5e6f9a9747da77281c4958ba2 /vendor/golang.org/x/text/internal/language/parse.go
download	hepi-6960aecc25400320adee1b8802a86839326e15b6.tar.gz

diff --git a/vendor/golang.org/x/text/internal/language/parse.go b/vendor/golang.org/x/text/internal/language/parse.go new file mode 100644 index 0000000..aad1e0a --- /dev/null +++ b/vendor/golang.org/x/text/internal/language/parse.go
@@ -0,0 +1,608 @@
	1	// Copyright 2013 The Go Authors. All rights reserved.
	2	// Use of this source code is governed by a BSD-style
	3	// license that can be found in the LICENSE file.
	4
	5	package language
	6
	7	import (
	8	"bytes"
	9	"errors"
	10	"fmt"
	11	"sort"
	12
	13	"golang.org/x/text/internal/tag"
	14	)
	15
	16	// isAlpha returns true if the byte is not a digit.
	17	// b must be an ASCII letter or digit.
	18	func isAlpha(b byte) bool {
	19	return b > '9'
	20	}
	21
	22	// isAlphaNum returns true if the string contains only ASCII letters or digits.
	23	func isAlphaNum(s []byte) bool {
	24	for _, c := range s {
	25	if !('a' <= c && c <= 'z' \|\| 'A' <= c && c <= 'Z' \|\| '0' <= c && c <= '9') {
	26	return false
	27	}
	28	}
	29	return true
	30	}
	31
	32	// ErrSyntax is returned by any of the parsing functions when the
	33	// input is not well-formed, according to BCP 47.
	34	// TODO: return the position at which the syntax error occurred?
	35	var ErrSyntax = errors.New("language: tag is not well-formed")
	36
	37	// ErrDuplicateKey is returned when a tag contains the same key twice with
	38	// different values in the -u section.
	39	var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
	40
	41	// ValueError is returned by any of the parsing functions when the
	42	// input is well-formed but the respective subtag is not recognized
	43	// as a valid value.
	44	type ValueError struct {
	45	v [8]byte
	46	}
	47
	48	// NewValueError creates a new ValueError.
	49	func NewValueError(tag []byte) ValueError {
	50	var e ValueError
	51	copy(e.v[:], tag)
	52	return e
	53	}
	54
	55	func (e ValueError) tag() []byte {
	56	n := bytes.IndexByte(e.v[:], 0)
	57	if n == -1 {
	58	n = 8
	59	}
	60	return e.v[:n]
	61	}
	62
	63	// Error implements the error interface.
	64	func (e ValueError) Error() string {
	65	return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
	66	}
	67
	68	// Subtag returns the subtag for which the error occurred.
	69	func (e ValueError) Subtag() string {
	70	return string(e.tag())
	71	}
	72
	73	// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
	74	type scanner struct {
	75	b []byte
	76	bytes [max99thPercentileSize]byte
	77	token []byte
	78	start int // start position of the current token
	79	end int // end position of the current token
	80	next int // next point for scan
	81	err error
	82	done bool
	83	}
	84
	85	func makeScannerString(s string) scanner {
	86	scan := scanner{}
	87	if len(s) <= len(scan.bytes) {
	88	scan.b = scan.bytes[:copy(scan.bytes[:], s)]
	89	} else {
	90	scan.b = []byte(s)
	91	}
	92	scan.init()
	93	return scan
	94	}
	95
	96	// makeScanner returns a scanner using b as the input buffer.
	97	// b is not copied and may be modified by the scanner routines.
	98	func makeScanner(b []byte) scanner {
	99	scan := scanner{b: b}
	100	scan.init()
	101	return scan
	102	}
	103
	104	func (s *scanner) init() {
	105	for i, c := range s.b {
	106	if c == '_' {
	107	s.b[i] = '-'
	108	}
	109	}
	110	s.scan()
	111	}
	112
	113	// restToLower converts the string between start and end to lower case.
	114	func (s *scanner) toLower(start, end int) {
	115	for i := start; i < end; i++ {
	116	c := s.b[i]
	117	if 'A' <= c && c <= 'Z' {
	118	s.b[i] += 'a' - 'A'
	119	}
	120	}
	121	}
	122
	123	func (s *scanner) setError(e error) {
	124	if s.err == nil \|\| (e == ErrSyntax && s.err != ErrSyntax) {
	125	s.err = e
	126	}
	127	}
	128
	129	// resizeRange shrinks or grows the array at position oldStart such that
	130	// a new string of size newSize can fit between oldStart and oldEnd.
	131	// Sets the scan point to after the resized range.
	132	func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
	133	s.start = oldStart
	134	if end := oldStart + newSize; end != oldEnd {
	135	diff := end - oldEnd
	136	var b []byte
	137	if n := len(s.b) + diff; n > cap(s.b) {
	138	b = make([]byte, n)
	139	copy(b, s.b[:oldStart])
	140	} else {
	141	b = s.b[:n]
	142	}
	143	copy(b[end:], s.b[oldEnd:])
	144	s.b = b
	145	s.next = end + (s.next - s.end)
	146	s.end = end
	147	}
	148	}
	149
	150	// replace replaces the current token with repl.
	151	func (s *scanner) replace(repl string) {
	152	s.resizeRange(s.start, s.end, len(repl))
	153	copy(s.b[s.start:], repl)
	154	}
	155
	156	// gobble removes the current token from the input.
	157	// Caller must call scan after calling gobble.
	158	func (s *scanner) gobble(e error) {
	159	s.setError(e)
	160	if s.start == 0 {
	161	s.b = s.b[:+copy(s.b, s.b[s.next:])]
	162	s.end = 0
	163	} else {
	164	s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
	165	s.end = s.start - 1
	166	}
	167	s.next = s.start
	168	}
	169
	170	// deleteRange removes the given range from s.b before the current token.
	171	func (s *scanner) deleteRange(start, end int) {
	172	s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
	173	diff := end - start
	174	s.next -= diff
	175	s.start -= diff
	176	s.end -= diff
	177	}
	178
	179	// scan parses the next token of a BCP 47 string. Tokens that are larger
	180	// than 8 characters or include non-alphanumeric characters result in an error
	181	// and are gobbled and removed from the output.
	182	// It returns the end position of the last token consumed.
	183	func (s *scanner) scan() (end int) {
	184	end = s.end
	185	s.token = nil
	186	for s.start = s.next; s.next < len(s.b); {
	187	i := bytes.IndexByte(s.b[s.next:], '-')
	188	if i == -1 {
	189	s.end = len(s.b)
	190	s.next = len(s.b)
	191	i = s.end - s.start
	192	} else {
	193	s.end = s.next + i
	194	s.next = s.end + 1
	195	}
	196	token := s.b[s.start:s.end]
	197	if i < 1 \|\| i > 8 \|\| !isAlphaNum(token) {
	198	s.gobble(ErrSyntax)
	199	continue
	200	}
	201	s.token = token
	202	return end
	203	}
	204	if n := len(s.b); n > 0 && s.b[n-1] == '-' {
	205	s.setError(ErrSyntax)
	206	s.b = s.b[:len(s.b)-1]
	207	}
	208	s.done = true
	209	return end
	210	}
	211
	212	// acceptMinSize parses multiple tokens of the given size or greater.
	213	// It returns the end position of the last token consumed.
	214	func (s *scanner) acceptMinSize(min int) (end int) {
	215	end = s.end
	216	s.scan()
	217	for ; len(s.token) >= min; s.scan() {
	218	end = s.end
	219	}
	220	return end
	221	}
	222
	223	// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
	224	// failed it returns an error and any part of the tag that could be parsed.
	225	// If parsing succeeded but an unknown value was found, it returns
	226	// ValueError. The Tag returned in this case is just stripped of the unknown
	227	// value. All other values are preserved. It accepts tags in the BCP 47 format
	228	// and extensions to this standard defined in
	229	// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
	230	func Parse(s string) (t Tag, err error) {
	231	// TODO: consider supporting old-style locale key-value pairs.
	232	if s == "" {
	233	return Und, ErrSyntax
	234	}
	235	defer func() {
	236	if recover() != nil {
	237	t = Und
	238	err = ErrSyntax
	239	return
	240	}
	241	}()
	242	if len(s) <= maxAltTaglen {
	243	b := [maxAltTaglen]byte{}
	244	for i, c := range s {
	245	// Generating invalid UTF-8 is okay as it won't match.
	246	if 'A' <= c && c <= 'Z' {
	247	c += 'a' - 'A'
	248	} else if c == '_' {
	249	c = '-'
	250	}
	251	b[i] = byte(c)
	252	}
	253	if t, ok := grandfathered(b); ok {
	254	return t, nil
	255	}
	256	}
	257	scan := makeScannerString(s)
	258	return parse(&scan, s)
	259	}
	260
	261	func parse(scan *scanner, s string) (t Tag, err error) {
	262	t = Und
	263	var end int
	264	if n := len(scan.token); n <= 1 {
	265	scan.toLower(0, len(scan.b))
	266	if n == 0 \|\| scan.token[0] != 'x' {
	267	return t, ErrSyntax
	268	}
	269	end = parseExtensions(scan)
	270	} else if n >= 4 {
	271	return Und, ErrSyntax
	272	} else { // the usual case
	273	t, end = parseTag(scan, true)
	274	if n := len(scan.token); n == 1 {
	275	t.pExt = uint16(end)
	276	end = parseExtensions(scan)
	277	} else if end < len(scan.b) {
	278	scan.setError(ErrSyntax)
	279	scan.b = scan.b[:end]
	280	}
	281	}
	282	if int(t.pVariant) < len(scan.b) {
	283	if end < len(s) {
	284	s = s[:end]
	285	}
	286	if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
	287	t.str = s
	288	} else {
	289	t.str = string(scan.b)
	290	}
	291	} else {
	292	t.pVariant, t.pExt = 0, 0
	293	}
	294	return t, scan.err
	295	}
	296
	297	// parseTag parses language, script, region and variants.
	298	// It returns a Tag and the end position in the input that was parsed.
	299	// If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>.
	300	func parseTag(scan *scanner, doNorm bool) (t Tag, end int) {
	301	var e error
	302	// TODO: set an error if an unknown lang, script or region is encountered.
	303	t.LangID, e = getLangID(scan.token)
	304	scan.setError(e)
	305	scan.replace(t.LangID.String())
	306	langStart := scan.start
	307	end = scan.scan()
	308	for len(scan.token) == 3 && isAlpha(scan.token[0]) {
	309	// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
	310	// to a tag of the form <extlang>.
	311	if doNorm {
	312	lang, e := getLangID(scan.token)
	313	if lang != 0 {
	314	t.LangID = lang
	315	langStr := lang.String()
	316	copy(scan.b[langStart:], langStr)
	317	scan.b[langStart+len(langStr)] = '-'
	318	scan.start = langStart + len(langStr) + 1
	319	}
	320	scan.gobble(e)
	321	}
	322	end = scan.scan()
	323	}
	324	if len(scan.token) == 4 && isAlpha(scan.token[0]) {
	325	t.ScriptID, e = getScriptID(script, scan.token)
	326	if t.ScriptID == 0 {
	327	scan.gobble(e)
	328	}
	329	end = scan.scan()
	330	}
	331	if n := len(scan.token); n >= 2 && n <= 3 {
	332	t.RegionID, e = getRegionID(scan.token)
	333	if t.RegionID == 0 {
	334	scan.gobble(e)
	335	} else {
	336	scan.replace(t.RegionID.String())
	337	}
	338	end = scan.scan()
	339	}
	340	scan.toLower(scan.start, len(scan.b))
	341	t.pVariant = byte(end)
	342	end = parseVariants(scan, end, t)
	343	t.pExt = uint16(end)
	344	return t, end
	345	}
	346
	347	var separator = []byte{'-'}
	348
	349	// parseVariants scans tokens as long as each token is a valid variant string.
	350	// Duplicate variants are removed.
	351	func parseVariants(scan *scanner, end int, t Tag) int {
	352	start := scan.start
	353	varIDBuf := [4]uint8{}
	354	variantBuf := [4][]byte{}
	355	varID := varIDBuf[:0]
	356	variant := variantBuf[:0]
	357	last := -1
	358	needSort := false
	359	for ; len(scan.token) >= 4; scan.scan() {
	360	// TODO: measure the impact of needing this conversion and redesign
	361	// the data structure if there is an issue.
	362	v, ok := variantIndex[string(scan.token)]
	363	if !ok {
	364	// unknown variant
	365	// TODO: allow user-defined variants?
	366	scan.gobble(NewValueError(scan.token))
	367	continue
	368	}
	369	varID = append(varID, v)
	370	variant = append(variant, scan.token)
	371	if !needSort {
	372	if last < int(v) {
	373	last = int(v)
	374	} else {
	375	needSort = true
	376	// There is no legal combinations of more than 7 variants
	377	// (and this is by no means a useful sequence).
	378	const maxVariants = 8
	379	if len(varID) > maxVariants {
	380	break
	381	}
	382	}
	383	}
	384	end = scan.end
	385	}
	386	if needSort {
	387	sort.Sort(variantsSort{varID, variant})
	388	k, l := 0, -1
	389	for i, v := range varID {
	390	w := int(v)
	391	if l == w {
	392	// Remove duplicates.
	393	continue
	394	}
	395	varID[k] = varID[i]
	396	variant[k] = variant[i]
	397	k++
	398	l = w
	399	}
	400	if str := bytes.Join(variant[:k], separator); len(str) == 0 {
	401	end = start - 1
	402	} else {
	403	scan.resizeRange(start, end, len(str))
	404	copy(scan.b[scan.start:], str)
	405	end = scan.end
	406	}
	407	}
	408	return end
	409	}
	410
	411	type variantsSort struct {
	412	i []uint8
	413	v [][]byte
	414	}
	415
	416	func (s variantsSort) Len() int {
	417	return len(s.i)
	418	}
	419
	420	func (s variantsSort) Swap(i, j int) {
	421	s.i[i], s.i[j] = s.i[j], s.i[i]
	422	s.v[i], s.v[j] = s.v[j], s.v[i]
	423	}
	424
	425	func (s variantsSort) Less(i, j int) bool {
	426	return s.i[i] < s.i[j]
	427	}
	428
	429	type bytesSort struct {
	430	b [][]byte
	431	n int // first n bytes to compare
	432	}
	433
	434	func (b bytesSort) Len() int {
	435	return len(b.b)
	436	}
	437
	438	func (b bytesSort) Swap(i, j int) {
	439	b.b[i], b.b[j] = b.b[j], b.b[i]
	440	}
	441
	442	func (b bytesSort) Less(i, j int) bool {
	443	for k := 0; k < b.n; k++ {
	444	if b.b[i][k] == b.b[j][k] {
	445	continue
	446	}
	447	return b.b[i][k] < b.b[j][k]
	448	}
	449	return false
	450	}
	451
	452	// parseExtensions parses and normalizes the extensions in the buffer.
	453	// It returns the last position of scan.b that is part of any extension.
	454	// It also trims scan.b to remove excess parts accordingly.
	455	func parseExtensions(scan *scanner) int {
	456	start := scan.start
	457	exts := [][]byte{}
	458	private := []byte{}
	459	end := scan.end
	460	for len(scan.token) == 1 {
	461	extStart := scan.start
	462	ext := scan.token[0]
	463	end = parseExtension(scan)
	464	extension := scan.b[extStart:end]
	465	if len(extension) < 3 \|\| (ext != 'x' && len(extension) < 4) {
	466	scan.setError(ErrSyntax)
	467	end = extStart
	468	continue
	469	} else if start == extStart && (ext == 'x' \|\| scan.start == len(scan.b)) {
	470	scan.b = scan.b[:end]
	471	return end
	472	} else if ext == 'x' {
	473	private = extension
	474	break
	475	}
	476	exts = append(exts, extension)
	477	}
	478	sort.Sort(bytesSort{exts, 1})
	479	if len(private) > 0 {
	480	exts = append(exts, private)
	481	}
	482	scan.b = scan.b[:start]
	483	if len(exts) > 0 {
	484	scan.b = append(scan.b, bytes.Join(exts, separator)...)
	485	} else if start > 0 {
	486	// Strip trailing '-'.
	487	scan.b = scan.b[:start-1]
	488	}
	489	return end
	490	}
	491
	492	// parseExtension parses a single extension and returns the position of
	493	// the extension end.
	494	func parseExtension(scan *scanner) int {
	495	start, end := scan.start, scan.end
	496	switch scan.token[0] {
	497	case 'u': // https://www.ietf.org/rfc/rfc6067.txt
	498	attrStart := end
	499	scan.scan()
	500	for last := []byte{}; len(scan.token) > 2; scan.scan() {
	501	if bytes.Compare(scan.token, last) != -1 {
	502	// Attributes are unsorted. Start over from scratch.
	503	p := attrStart + 1
	504	scan.next = p
	505	attrs := [][]byte{}
	506	for scan.scan(); len(scan.token) > 2; scan.scan() {
	507	attrs = append(attrs, scan.token)
	508	end = scan.end
	509	}
	510	sort.Sort(bytesSort{attrs, 3})
	511	copy(scan.b[p:], bytes.Join(attrs, separator))
	512	break
	513	}
	514	last = scan.token
	515	end = scan.end
	516	}
	517	// Scan key-type sequences. A key is of length 2 and may be followed
	518	// by 0 or more "type" subtags from 3 to the maximum of 8 letters.
	519	var last, key []byte
	520	for attrEnd := end; len(scan.token) == 2; last = key {
	521	key = scan.token
	522	end = scan.end
	523	for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
	524	end = scan.end
	525	}
	526	// TODO: check key value validity
	527	if bytes.Compare(key, last) != 1 \|\| scan.err != nil {
	528	// We have an invalid key or the keys are not sorted.
	529	// Start scanning keys from scratch and reorder.
	530	p := attrEnd + 1
	531	scan.next = p
	532	keys := [][]byte{}
	533	for scan.scan(); len(scan.token) == 2; {
	534	keyStart := scan.start
	535	end = scan.end
	536	for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
	537	end = scan.end
	538	}
	539	keys = append(keys, scan.b[keyStart:end])
	540	}
	541	sort.Stable(bytesSort{keys, 2})
	542	if n := len(keys); n > 0 {
	543	k := 0
	544	for i := 1; i < n; i++ {
	545	if !bytes.Equal(keys[k][:2], keys[i][:2]) {
	546	k++
	547	keys[k] = keys[i]
	548	} else if !bytes.Equal(keys[k], keys[i]) {
	549	scan.setError(ErrDuplicateKey)
	550	}
	551	}
	552	keys = keys[:k+1]
	553	}
	554	reordered := bytes.Join(keys, separator)
	555	if e := p + len(reordered); e < end {
	556	scan.deleteRange(e, end)
	557	end = e
	558	}
	559	copy(scan.b[p:], reordered)
	560	break
	561	}
	562	}
	563	case 't': // https://www.ietf.org/rfc/rfc6497.txt
	564	scan.scan()
	565	if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
	566	_, end = parseTag(scan, false)
	567	scan.toLower(start, end)
	568	}
	569	for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
	570	end = scan.acceptMinSize(3)
	571	}
	572	case 'x':
	573	end = scan.acceptMinSize(1)
	574	default:
	575	end = scan.acceptMinSize(2)
	576	}
	577	return end
	578	}
	579
	580	// getExtension returns the name, body and end position of the extension.
	581	func getExtension(s string, p int) (end int, ext string) {
	582	if s[p] == '-' {
	583	p++
	584	}
	585	if s[p] == 'x' {
	586	return len(s), s[p:]
	587	}
	588	end = nextExtension(s, p)
	589	return end, s[p:end]
	590	}
	591
	592	// nextExtension finds the next extension within the string, searching
	593	// for the -<char>- pattern from position p.
	594	// In the fast majority of cases, language tags will have at most
	595	// one extension and extensions tend to be small.
	596	func nextExtension(s string, p int) int {
	597	for n := len(s) - 3; p < n; {
	598	if s[p] == '-' {
	599	if s[p+2] == '-' {
	600	return p
	601	}
	602	p += 3
	603	} else {
	604	p++
	605	}
	606	}
	607	return len(s)
	608	}