diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-05 00:37:32 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-05 00:37:32 +0100 |
| commit | 6960aecc25400320adee1b8802a86839326e15b6 (patch) | |
| tree | 334f7ca9491080a5e6f9a9747da77281c4958ba2 /vendor/golang.org/x/text/internal/language/parse.go | |
| download | hepi-6960aecc25400320adee1b8802a86839326e15b6.tar.gz | |
Engage!
Diffstat (limited to 'vendor/golang.org/x/text/internal/language/parse.go')
| -rw-r--r-- | vendor/golang.org/x/text/internal/language/parse.go | 608 |
1 files changed, 608 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/internal/language/parse.go b/vendor/golang.org/x/text/internal/language/parse.go new file mode 100644 index 0000000..aad1e0a --- /dev/null +++ b/vendor/golang.org/x/text/internal/language/parse.go | |||
| @@ -0,0 +1,608 @@ | |||
| 1 | // Copyright 2013 The Go Authors. All rights reserved. | ||
| 2 | // Use of this source code is governed by a BSD-style | ||
| 3 | // license that can be found in the LICENSE file. | ||
| 4 | |||
| 5 | package language | ||
| 6 | |||
| 7 | import ( | ||
| 8 | "bytes" | ||
| 9 | "errors" | ||
| 10 | "fmt" | ||
| 11 | "sort" | ||
| 12 | |||
| 13 | "golang.org/x/text/internal/tag" | ||
| 14 | ) | ||
| 15 | |||
| 16 | // isAlpha returns true if the byte is not a digit. | ||
| 17 | // b must be an ASCII letter or digit. | ||
| 18 | func isAlpha(b byte) bool { | ||
| 19 | return b > '9' | ||
| 20 | } | ||
| 21 | |||
| 22 | // isAlphaNum returns true if the string contains only ASCII letters or digits. | ||
| 23 | func isAlphaNum(s []byte) bool { | ||
| 24 | for _, c := range s { | ||
| 25 | if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') { | ||
| 26 | return false | ||
| 27 | } | ||
| 28 | } | ||
| 29 | return true | ||
| 30 | } | ||
| 31 | |||
| 32 | // ErrSyntax is returned by any of the parsing functions when the | ||
| 33 | // input is not well-formed, according to BCP 47. | ||
| 34 | // TODO: return the position at which the syntax error occurred? | ||
| 35 | var ErrSyntax = errors.New("language: tag is not well-formed") | ||
| 36 | |||
| 37 | // ErrDuplicateKey is returned when a tag contains the same key twice with | ||
| 38 | // different values in the -u section. | ||
| 39 | var ErrDuplicateKey = errors.New("language: different values for same key in -u extension") | ||
| 40 | |||
| 41 | // ValueError is returned by any of the parsing functions when the | ||
| 42 | // input is well-formed but the respective subtag is not recognized | ||
| 43 | // as a valid value. | ||
| 44 | type ValueError struct { | ||
| 45 | v [8]byte | ||
| 46 | } | ||
| 47 | |||
| 48 | // NewValueError creates a new ValueError. | ||
| 49 | func NewValueError(tag []byte) ValueError { | ||
| 50 | var e ValueError | ||
| 51 | copy(e.v[:], tag) | ||
| 52 | return e | ||
| 53 | } | ||
| 54 | |||
| 55 | func (e ValueError) tag() []byte { | ||
| 56 | n := bytes.IndexByte(e.v[:], 0) | ||
| 57 | if n == -1 { | ||
| 58 | n = 8 | ||
| 59 | } | ||
| 60 | return e.v[:n] | ||
| 61 | } | ||
| 62 | |||
| 63 | // Error implements the error interface. | ||
| 64 | func (e ValueError) Error() string { | ||
| 65 | return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag()) | ||
| 66 | } | ||
| 67 | |||
| 68 | // Subtag returns the subtag for which the error occurred. | ||
| 69 | func (e ValueError) Subtag() string { | ||
| 70 | return string(e.tag()) | ||
| 71 | } | ||
| 72 | |||
| 73 | // scanner is used to scan BCP 47 tokens, which are separated by _ or -. | ||
| 74 | type scanner struct { | ||
| 75 | b []byte | ||
| 76 | bytes [max99thPercentileSize]byte | ||
| 77 | token []byte | ||
| 78 | start int // start position of the current token | ||
| 79 | end int // end position of the current token | ||
| 80 | next int // next point for scan | ||
| 81 | err error | ||
| 82 | done bool | ||
| 83 | } | ||
| 84 | |||
| 85 | func makeScannerString(s string) scanner { | ||
| 86 | scan := scanner{} | ||
| 87 | if len(s) <= len(scan.bytes) { | ||
| 88 | scan.b = scan.bytes[:copy(scan.bytes[:], s)] | ||
| 89 | } else { | ||
| 90 | scan.b = []byte(s) | ||
| 91 | } | ||
| 92 | scan.init() | ||
| 93 | return scan | ||
| 94 | } | ||
| 95 | |||
| 96 | // makeScanner returns a scanner using b as the input buffer. | ||
| 97 | // b is not copied and may be modified by the scanner routines. | ||
| 98 | func makeScanner(b []byte) scanner { | ||
| 99 | scan := scanner{b: b} | ||
| 100 | scan.init() | ||
| 101 | return scan | ||
| 102 | } | ||
| 103 | |||
| 104 | func (s *scanner) init() { | ||
| 105 | for i, c := range s.b { | ||
| 106 | if c == '_' { | ||
| 107 | s.b[i] = '-' | ||
| 108 | } | ||
| 109 | } | ||
| 110 | s.scan() | ||
| 111 | } | ||
| 112 | |||
| 113 | // restToLower converts the string between start and end to lower case. | ||
| 114 | func (s *scanner) toLower(start, end int) { | ||
| 115 | for i := start; i < end; i++ { | ||
| 116 | c := s.b[i] | ||
| 117 | if 'A' <= c && c <= 'Z' { | ||
| 118 | s.b[i] += 'a' - 'A' | ||
| 119 | } | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | func (s *scanner) setError(e error) { | ||
| 124 | if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) { | ||
| 125 | s.err = e | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | // resizeRange shrinks or grows the array at position oldStart such that | ||
| 130 | // a new string of size newSize can fit between oldStart and oldEnd. | ||
| 131 | // Sets the scan point to after the resized range. | ||
| 132 | func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) { | ||
| 133 | s.start = oldStart | ||
| 134 | if end := oldStart + newSize; end != oldEnd { | ||
| 135 | diff := end - oldEnd | ||
| 136 | var b []byte | ||
| 137 | if n := len(s.b) + diff; n > cap(s.b) { | ||
| 138 | b = make([]byte, n) | ||
| 139 | copy(b, s.b[:oldStart]) | ||
| 140 | } else { | ||
| 141 | b = s.b[:n] | ||
| 142 | } | ||
| 143 | copy(b[end:], s.b[oldEnd:]) | ||
| 144 | s.b = b | ||
| 145 | s.next = end + (s.next - s.end) | ||
| 146 | s.end = end | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 150 | // replace replaces the current token with repl. | ||
| 151 | func (s *scanner) replace(repl string) { | ||
| 152 | s.resizeRange(s.start, s.end, len(repl)) | ||
| 153 | copy(s.b[s.start:], repl) | ||
| 154 | } | ||
| 155 | |||
| 156 | // gobble removes the current token from the input. | ||
| 157 | // Caller must call scan after calling gobble. | ||
| 158 | func (s *scanner) gobble(e error) { | ||
| 159 | s.setError(e) | ||
| 160 | if s.start == 0 { | ||
| 161 | s.b = s.b[:+copy(s.b, s.b[s.next:])] | ||
| 162 | s.end = 0 | ||
| 163 | } else { | ||
| 164 | s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])] | ||
| 165 | s.end = s.start - 1 | ||
| 166 | } | ||
| 167 | s.next = s.start | ||
| 168 | } | ||
| 169 | |||
| 170 | // deleteRange removes the given range from s.b before the current token. | ||
| 171 | func (s *scanner) deleteRange(start, end int) { | ||
| 172 | s.b = s.b[:start+copy(s.b[start:], s.b[end:])] | ||
| 173 | diff := end - start | ||
| 174 | s.next -= diff | ||
| 175 | s.start -= diff | ||
| 176 | s.end -= diff | ||
| 177 | } | ||
| 178 | |||
| 179 | // scan parses the next token of a BCP 47 string. Tokens that are larger | ||
| 180 | // than 8 characters or include non-alphanumeric characters result in an error | ||
| 181 | // and are gobbled and removed from the output. | ||
| 182 | // It returns the end position of the last token consumed. | ||
| 183 | func (s *scanner) scan() (end int) { | ||
| 184 | end = s.end | ||
| 185 | s.token = nil | ||
| 186 | for s.start = s.next; s.next < len(s.b); { | ||
| 187 | i := bytes.IndexByte(s.b[s.next:], '-') | ||
| 188 | if i == -1 { | ||
| 189 | s.end = len(s.b) | ||
| 190 | s.next = len(s.b) | ||
| 191 | i = s.end - s.start | ||
| 192 | } else { | ||
| 193 | s.end = s.next + i | ||
| 194 | s.next = s.end + 1 | ||
| 195 | } | ||
| 196 | token := s.b[s.start:s.end] | ||
| 197 | if i < 1 || i > 8 || !isAlphaNum(token) { | ||
| 198 | s.gobble(ErrSyntax) | ||
| 199 | continue | ||
| 200 | } | ||
| 201 | s.token = token | ||
| 202 | return end | ||
| 203 | } | ||
| 204 | if n := len(s.b); n > 0 && s.b[n-1] == '-' { | ||
| 205 | s.setError(ErrSyntax) | ||
| 206 | s.b = s.b[:len(s.b)-1] | ||
| 207 | } | ||
| 208 | s.done = true | ||
| 209 | return end | ||
| 210 | } | ||
| 211 | |||
| 212 | // acceptMinSize parses multiple tokens of the given size or greater. | ||
| 213 | // It returns the end position of the last token consumed. | ||
| 214 | func (s *scanner) acceptMinSize(min int) (end int) { | ||
| 215 | end = s.end | ||
| 216 | s.scan() | ||
| 217 | for ; len(s.token) >= min; s.scan() { | ||
| 218 | end = s.end | ||
| 219 | } | ||
| 220 | return end | ||
| 221 | } | ||
| 222 | |||
| 223 | // Parse parses the given BCP 47 string and returns a valid Tag. If parsing | ||
| 224 | // failed it returns an error and any part of the tag that could be parsed. | ||
| 225 | // If parsing succeeded but an unknown value was found, it returns | ||
| 226 | // ValueError. The Tag returned in this case is just stripped of the unknown | ||
| 227 | // value. All other values are preserved. It accepts tags in the BCP 47 format | ||
| 228 | // and extensions to this standard defined in | ||
| 229 | // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. | ||
| 230 | func Parse(s string) (t Tag, err error) { | ||
| 231 | // TODO: consider supporting old-style locale key-value pairs. | ||
| 232 | if s == "" { | ||
| 233 | return Und, ErrSyntax | ||
| 234 | } | ||
| 235 | defer func() { | ||
| 236 | if recover() != nil { | ||
| 237 | t = Und | ||
| 238 | err = ErrSyntax | ||
| 239 | return | ||
| 240 | } | ||
| 241 | }() | ||
| 242 | if len(s) <= maxAltTaglen { | ||
| 243 | b := [maxAltTaglen]byte{} | ||
| 244 | for i, c := range s { | ||
| 245 | // Generating invalid UTF-8 is okay as it won't match. | ||
| 246 | if 'A' <= c && c <= 'Z' { | ||
| 247 | c += 'a' - 'A' | ||
| 248 | } else if c == '_' { | ||
| 249 | c = '-' | ||
| 250 | } | ||
| 251 | b[i] = byte(c) | ||
| 252 | } | ||
| 253 | if t, ok := grandfathered(b); ok { | ||
| 254 | return t, nil | ||
| 255 | } | ||
| 256 | } | ||
| 257 | scan := makeScannerString(s) | ||
| 258 | return parse(&scan, s) | ||
| 259 | } | ||
| 260 | |||
| 261 | func parse(scan *scanner, s string) (t Tag, err error) { | ||
| 262 | t = Und | ||
| 263 | var end int | ||
| 264 | if n := len(scan.token); n <= 1 { | ||
| 265 | scan.toLower(0, len(scan.b)) | ||
| 266 | if n == 0 || scan.token[0] != 'x' { | ||
| 267 | return t, ErrSyntax | ||
| 268 | } | ||
| 269 | end = parseExtensions(scan) | ||
| 270 | } else if n >= 4 { | ||
| 271 | return Und, ErrSyntax | ||
| 272 | } else { // the usual case | ||
| 273 | t, end = parseTag(scan, true) | ||
| 274 | if n := len(scan.token); n == 1 { | ||
| 275 | t.pExt = uint16(end) | ||
| 276 | end = parseExtensions(scan) | ||
| 277 | } else if end < len(scan.b) { | ||
| 278 | scan.setError(ErrSyntax) | ||
| 279 | scan.b = scan.b[:end] | ||
| 280 | } | ||
| 281 | } | ||
| 282 | if int(t.pVariant) < len(scan.b) { | ||
| 283 | if end < len(s) { | ||
| 284 | s = s[:end] | ||
| 285 | } | ||
| 286 | if len(s) > 0 && tag.Compare(s, scan.b) == 0 { | ||
| 287 | t.str = s | ||
| 288 | } else { | ||
| 289 | t.str = string(scan.b) | ||
| 290 | } | ||
| 291 | } else { | ||
| 292 | t.pVariant, t.pExt = 0, 0 | ||
| 293 | } | ||
| 294 | return t, scan.err | ||
| 295 | } | ||
| 296 | |||
| 297 | // parseTag parses language, script, region and variants. | ||
| 298 | // It returns a Tag and the end position in the input that was parsed. | ||
| 299 | // If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>. | ||
| 300 | func parseTag(scan *scanner, doNorm bool) (t Tag, end int) { | ||
| 301 | var e error | ||
| 302 | // TODO: set an error if an unknown lang, script or region is encountered. | ||
| 303 | t.LangID, e = getLangID(scan.token) | ||
| 304 | scan.setError(e) | ||
| 305 | scan.replace(t.LangID.String()) | ||
| 306 | langStart := scan.start | ||
| 307 | end = scan.scan() | ||
| 308 | for len(scan.token) == 3 && isAlpha(scan.token[0]) { | ||
| 309 | // From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent | ||
| 310 | // to a tag of the form <extlang>. | ||
| 311 | if doNorm { | ||
| 312 | lang, e := getLangID(scan.token) | ||
| 313 | if lang != 0 { | ||
| 314 | t.LangID = lang | ||
| 315 | langStr := lang.String() | ||
| 316 | copy(scan.b[langStart:], langStr) | ||
| 317 | scan.b[langStart+len(langStr)] = '-' | ||
| 318 | scan.start = langStart + len(langStr) + 1 | ||
| 319 | } | ||
| 320 | scan.gobble(e) | ||
| 321 | } | ||
| 322 | end = scan.scan() | ||
| 323 | } | ||
| 324 | if len(scan.token) == 4 && isAlpha(scan.token[0]) { | ||
| 325 | t.ScriptID, e = getScriptID(script, scan.token) | ||
| 326 | if t.ScriptID == 0 { | ||
| 327 | scan.gobble(e) | ||
| 328 | } | ||
| 329 | end = scan.scan() | ||
| 330 | } | ||
| 331 | if n := len(scan.token); n >= 2 && n <= 3 { | ||
| 332 | t.RegionID, e = getRegionID(scan.token) | ||
| 333 | if t.RegionID == 0 { | ||
| 334 | scan.gobble(e) | ||
| 335 | } else { | ||
| 336 | scan.replace(t.RegionID.String()) | ||
| 337 | } | ||
| 338 | end = scan.scan() | ||
| 339 | } | ||
| 340 | scan.toLower(scan.start, len(scan.b)) | ||
| 341 | t.pVariant = byte(end) | ||
| 342 | end = parseVariants(scan, end, t) | ||
| 343 | t.pExt = uint16(end) | ||
| 344 | return t, end | ||
| 345 | } | ||
| 346 | |||
| 347 | var separator = []byte{'-'} | ||
| 348 | |||
| 349 | // parseVariants scans tokens as long as each token is a valid variant string. | ||
| 350 | // Duplicate variants are removed. | ||
| 351 | func parseVariants(scan *scanner, end int, t Tag) int { | ||
| 352 | start := scan.start | ||
| 353 | varIDBuf := [4]uint8{} | ||
| 354 | variantBuf := [4][]byte{} | ||
| 355 | varID := varIDBuf[:0] | ||
| 356 | variant := variantBuf[:0] | ||
| 357 | last := -1 | ||
| 358 | needSort := false | ||
| 359 | for ; len(scan.token) >= 4; scan.scan() { | ||
| 360 | // TODO: measure the impact of needing this conversion and redesign | ||
| 361 | // the data structure if there is an issue. | ||
| 362 | v, ok := variantIndex[string(scan.token)] | ||
| 363 | if !ok { | ||
| 364 | // unknown variant | ||
| 365 | // TODO: allow user-defined variants? | ||
| 366 | scan.gobble(NewValueError(scan.token)) | ||
| 367 | continue | ||
| 368 | } | ||
| 369 | varID = append(varID, v) | ||
| 370 | variant = append(variant, scan.token) | ||
| 371 | if !needSort { | ||
| 372 | if last < int(v) { | ||
| 373 | last = int(v) | ||
| 374 | } else { | ||
| 375 | needSort = true | ||
| 376 | // There is no legal combinations of more than 7 variants | ||
| 377 | // (and this is by no means a useful sequence). | ||
| 378 | const maxVariants = 8 | ||
| 379 | if len(varID) > maxVariants { | ||
| 380 | break | ||
| 381 | } | ||
| 382 | } | ||
| 383 | } | ||
| 384 | end = scan.end | ||
| 385 | } | ||
| 386 | if needSort { | ||
| 387 | sort.Sort(variantsSort{varID, variant}) | ||
| 388 | k, l := 0, -1 | ||
| 389 | for i, v := range varID { | ||
| 390 | w := int(v) | ||
| 391 | if l == w { | ||
| 392 | // Remove duplicates. | ||
| 393 | continue | ||
| 394 | } | ||
| 395 | varID[k] = varID[i] | ||
| 396 | variant[k] = variant[i] | ||
| 397 | k++ | ||
| 398 | l = w | ||
| 399 | } | ||
| 400 | if str := bytes.Join(variant[:k], separator); len(str) == 0 { | ||
| 401 | end = start - 1 | ||
| 402 | } else { | ||
| 403 | scan.resizeRange(start, end, len(str)) | ||
| 404 | copy(scan.b[scan.start:], str) | ||
| 405 | end = scan.end | ||
| 406 | } | ||
| 407 | } | ||
| 408 | return end | ||
| 409 | } | ||
| 410 | |||
| 411 | type variantsSort struct { | ||
| 412 | i []uint8 | ||
| 413 | v [][]byte | ||
| 414 | } | ||
| 415 | |||
| 416 | func (s variantsSort) Len() int { | ||
| 417 | return len(s.i) | ||
| 418 | } | ||
| 419 | |||
| 420 | func (s variantsSort) Swap(i, j int) { | ||
| 421 | s.i[i], s.i[j] = s.i[j], s.i[i] | ||
| 422 | s.v[i], s.v[j] = s.v[j], s.v[i] | ||
| 423 | } | ||
| 424 | |||
| 425 | func (s variantsSort) Less(i, j int) bool { | ||
| 426 | return s.i[i] < s.i[j] | ||
| 427 | } | ||
| 428 | |||
| 429 | type bytesSort struct { | ||
| 430 | b [][]byte | ||
| 431 | n int // first n bytes to compare | ||
| 432 | } | ||
| 433 | |||
| 434 | func (b bytesSort) Len() int { | ||
| 435 | return len(b.b) | ||
| 436 | } | ||
| 437 | |||
| 438 | func (b bytesSort) Swap(i, j int) { | ||
| 439 | b.b[i], b.b[j] = b.b[j], b.b[i] | ||
| 440 | } | ||
| 441 | |||
| 442 | func (b bytesSort) Less(i, j int) bool { | ||
| 443 | for k := 0; k < b.n; k++ { | ||
| 444 | if b.b[i][k] == b.b[j][k] { | ||
| 445 | continue | ||
| 446 | } | ||
| 447 | return b.b[i][k] < b.b[j][k] | ||
| 448 | } | ||
| 449 | return false | ||
| 450 | } | ||
| 451 | |||
| 452 | // parseExtensions parses and normalizes the extensions in the buffer. | ||
| 453 | // It returns the last position of scan.b that is part of any extension. | ||
| 454 | // It also trims scan.b to remove excess parts accordingly. | ||
| 455 | func parseExtensions(scan *scanner) int { | ||
| 456 | start := scan.start | ||
| 457 | exts := [][]byte{} | ||
| 458 | private := []byte{} | ||
| 459 | end := scan.end | ||
| 460 | for len(scan.token) == 1 { | ||
| 461 | extStart := scan.start | ||
| 462 | ext := scan.token[0] | ||
| 463 | end = parseExtension(scan) | ||
| 464 | extension := scan.b[extStart:end] | ||
| 465 | if len(extension) < 3 || (ext != 'x' && len(extension) < 4) { | ||
| 466 | scan.setError(ErrSyntax) | ||
| 467 | end = extStart | ||
| 468 | continue | ||
| 469 | } else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) { | ||
| 470 | scan.b = scan.b[:end] | ||
| 471 | return end | ||
| 472 | } else if ext == 'x' { | ||
| 473 | private = extension | ||
| 474 | break | ||
| 475 | } | ||
| 476 | exts = append(exts, extension) | ||
| 477 | } | ||
| 478 | sort.Sort(bytesSort{exts, 1}) | ||
| 479 | if len(private) > 0 { | ||
| 480 | exts = append(exts, private) | ||
| 481 | } | ||
| 482 | scan.b = scan.b[:start] | ||
| 483 | if len(exts) > 0 { | ||
| 484 | scan.b = append(scan.b, bytes.Join(exts, separator)...) | ||
| 485 | } else if start > 0 { | ||
| 486 | // Strip trailing '-'. | ||
| 487 | scan.b = scan.b[:start-1] | ||
| 488 | } | ||
| 489 | return end | ||
| 490 | } | ||
| 491 | |||
| 492 | // parseExtension parses a single extension and returns the position of | ||
| 493 | // the extension end. | ||
| 494 | func parseExtension(scan *scanner) int { | ||
| 495 | start, end := scan.start, scan.end | ||
| 496 | switch scan.token[0] { | ||
| 497 | case 'u': // https://www.ietf.org/rfc/rfc6067.txt | ||
| 498 | attrStart := end | ||
| 499 | scan.scan() | ||
| 500 | for last := []byte{}; len(scan.token) > 2; scan.scan() { | ||
| 501 | if bytes.Compare(scan.token, last) != -1 { | ||
| 502 | // Attributes are unsorted. Start over from scratch. | ||
| 503 | p := attrStart + 1 | ||
| 504 | scan.next = p | ||
| 505 | attrs := [][]byte{} | ||
| 506 | for scan.scan(); len(scan.token) > 2; scan.scan() { | ||
| 507 | attrs = append(attrs, scan.token) | ||
| 508 | end = scan.end | ||
| 509 | } | ||
| 510 | sort.Sort(bytesSort{attrs, 3}) | ||
| 511 | copy(scan.b[p:], bytes.Join(attrs, separator)) | ||
| 512 | break | ||
| 513 | } | ||
| 514 | last = scan.token | ||
| 515 | end = scan.end | ||
| 516 | } | ||
| 517 | // Scan key-type sequences. A key is of length 2 and may be followed | ||
| 518 | // by 0 or more "type" subtags from 3 to the maximum of 8 letters. | ||
| 519 | var last, key []byte | ||
| 520 | for attrEnd := end; len(scan.token) == 2; last = key { | ||
| 521 | key = scan.token | ||
| 522 | end = scan.end | ||
| 523 | for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() { | ||
| 524 | end = scan.end | ||
| 525 | } | ||
| 526 | // TODO: check key value validity | ||
| 527 | if bytes.Compare(key, last) != 1 || scan.err != nil { | ||
| 528 | // We have an invalid key or the keys are not sorted. | ||
| 529 | // Start scanning keys from scratch and reorder. | ||
| 530 | p := attrEnd + 1 | ||
| 531 | scan.next = p | ||
| 532 | keys := [][]byte{} | ||
| 533 | for scan.scan(); len(scan.token) == 2; { | ||
| 534 | keyStart := scan.start | ||
| 535 | end = scan.end | ||
| 536 | for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() { | ||
| 537 | end = scan.end | ||
| 538 | } | ||
| 539 | keys = append(keys, scan.b[keyStart:end]) | ||
| 540 | } | ||
| 541 | sort.Stable(bytesSort{keys, 2}) | ||
| 542 | if n := len(keys); n > 0 { | ||
| 543 | k := 0 | ||
| 544 | for i := 1; i < n; i++ { | ||
| 545 | if !bytes.Equal(keys[k][:2], keys[i][:2]) { | ||
| 546 | k++ | ||
| 547 | keys[k] = keys[i] | ||
| 548 | } else if !bytes.Equal(keys[k], keys[i]) { | ||
| 549 | scan.setError(ErrDuplicateKey) | ||
| 550 | } | ||
| 551 | } | ||
| 552 | keys = keys[:k+1] | ||
| 553 | } | ||
| 554 | reordered := bytes.Join(keys, separator) | ||
| 555 | if e := p + len(reordered); e < end { | ||
| 556 | scan.deleteRange(e, end) | ||
| 557 | end = e | ||
| 558 | } | ||
| 559 | copy(scan.b[p:], reordered) | ||
| 560 | break | ||
| 561 | } | ||
| 562 | } | ||
| 563 | case 't': // https://www.ietf.org/rfc/rfc6497.txt | ||
| 564 | scan.scan() | ||
| 565 | if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { | ||
| 566 | _, end = parseTag(scan, false) | ||
| 567 | scan.toLower(start, end) | ||
| 568 | } | ||
| 569 | for len(scan.token) == 2 && !isAlpha(scan.token[1]) { | ||
| 570 | end = scan.acceptMinSize(3) | ||
| 571 | } | ||
| 572 | case 'x': | ||
| 573 | end = scan.acceptMinSize(1) | ||
| 574 | default: | ||
| 575 | end = scan.acceptMinSize(2) | ||
| 576 | } | ||
| 577 | return end | ||
| 578 | } | ||
| 579 | |||
| 580 | // getExtension returns the name, body and end position of the extension. | ||
| 581 | func getExtension(s string, p int) (end int, ext string) { | ||
| 582 | if s[p] == '-' { | ||
| 583 | p++ | ||
| 584 | } | ||
| 585 | if s[p] == 'x' { | ||
| 586 | return len(s), s[p:] | ||
| 587 | } | ||
| 588 | end = nextExtension(s, p) | ||
| 589 | return end, s[p:end] | ||
| 590 | } | ||
| 591 | |||
| 592 | // nextExtension finds the next extension within the string, searching | ||
| 593 | // for the -<char>- pattern from position p. | ||
| 594 | // In the fast majority of cases, language tags will have at most | ||
| 595 | // one extension and extensions tend to be small. | ||
| 596 | func nextExtension(s string, p int) int { | ||
| 597 | for n := len(s) - 3; p < n; { | ||
| 598 | if s[p] == '-' { | ||
| 599 | if s[p+2] == '-' { | ||
| 600 | return p | ||
| 601 | } | ||
| 602 | p += 3 | ||
| 603 | } else { | ||
| 604 | p++ | ||
| 605 | } | ||
| 606 | } | ||
| 607 | return len(s) | ||
| 608 | } | ||
