diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2024-10-25 00:47:47 +0200 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2024-10-25 00:47:47 +0200 |
| commit | c6cc0108ca7738023b45e0eeac0fa2390532dd93 (patch) | |
| tree | 36890e6cd3091bbab8efbe686cc56f467f645bfd /vendor/github.com/alecthomas/chroma/v2/serialise.go | |
| parent | 0130404a1dc663d4aa68d780c9bcb23a4243e68d (diff) | |
| download | jbmafp-master.tar.gz | |
Diffstat (limited to 'vendor/github.com/alecthomas/chroma/v2/serialise.go')
| -rw-r--r-- | vendor/github.com/alecthomas/chroma/v2/serialise.go | 433 |
1 files changed, 433 insertions, 0 deletions
diff --git a/vendor/github.com/alecthomas/chroma/v2/serialise.go b/vendor/github.com/alecthomas/chroma/v2/serialise.go new file mode 100644 index 0000000..0369639 --- /dev/null +++ b/vendor/github.com/alecthomas/chroma/v2/serialise.go | |||
| @@ -0,0 +1,433 @@ | |||
| 1 | package chroma | ||
| 2 | |||
| 3 | import ( | ||
| 4 | "compress/gzip" | ||
| 5 | "encoding/xml" | ||
| 6 | "errors" | ||
| 7 | "fmt" | ||
| 8 | "io" | ||
| 9 | "io/fs" | ||
| 10 | "path/filepath" | ||
| 11 | "reflect" | ||
| 12 | "regexp" | ||
| 13 | "strings" | ||
| 14 | ) | ||
| 15 | |||
| 16 | // Serialisation of Chroma rules to XML. The format is: | ||
| 17 | // | ||
| 18 | // <rules> | ||
| 19 | // <state name="$STATE"> | ||
| 20 | // <rule [pattern="$PATTERN"]> | ||
| 21 | // [<$EMITTER ...>] | ||
| 22 | // [<$MUTATOR ...>] | ||
| 23 | // </rule> | ||
| 24 | // </state> | ||
| 25 | // </rules> | ||
| 26 | // | ||
| 27 | // eg. Include("String") would become: | ||
| 28 | // | ||
| 29 | // <rule> | ||
| 30 | // <include state="String" /> | ||
| 31 | // </rule> | ||
| 32 | // | ||
| 33 | // [null, null, {"kind": "include", "state": "String"}] | ||
| 34 | // | ||
| 35 | // eg. Rule{`\d+`, Text, nil} would become: | ||
| 36 | // | ||
| 37 | // <rule pattern="\\d+"> | ||
| 38 | // <token type="Text"/> | ||
| 39 | // </rule> | ||
| 40 | // | ||
| 41 | // eg. Rule{`"`, String, Push("String")} | ||
| 42 | // | ||
| 43 | // <rule pattern="\""> | ||
| 44 | // <token type="String" /> | ||
| 45 | // <push state="String" /> | ||
| 46 | // </rule> | ||
| 47 | // | ||
| 48 | // eg. Rule{`(\w+)(\n)`, ByGroups(Keyword, Whitespace), nil}, | ||
| 49 | // | ||
| 50 | // <rule pattern="(\\w+)(\\n)"> | ||
| 51 | // <bygroups token="Keyword" token="Whitespace" /> | ||
| 52 | // <push state="String" /> | ||
| 53 | // </rule> | ||
| 54 | var ( | ||
| 55 | // ErrNotSerialisable is returned if a lexer contains Rules that cannot be serialised. | ||
| 56 | ErrNotSerialisable = fmt.Errorf("not serialisable") | ||
| 57 | emitterTemplates = func() map[string]SerialisableEmitter { | ||
| 58 | out := map[string]SerialisableEmitter{} | ||
| 59 | for _, emitter := range []SerialisableEmitter{ | ||
| 60 | &byGroupsEmitter{}, | ||
| 61 | &usingSelfEmitter{}, | ||
| 62 | TokenType(0), | ||
| 63 | &usingEmitter{}, | ||
| 64 | &usingByGroup{}, | ||
| 65 | } { | ||
| 66 | out[emitter.EmitterKind()] = emitter | ||
| 67 | } | ||
| 68 | return out | ||
| 69 | }() | ||
| 70 | mutatorTemplates = func() map[string]SerialisableMutator { | ||
| 71 | out := map[string]SerialisableMutator{} | ||
| 72 | for _, mutator := range []SerialisableMutator{ | ||
| 73 | &includeMutator{}, | ||
| 74 | &combinedMutator{}, | ||
| 75 | &multiMutator{}, | ||
| 76 | &pushMutator{}, | ||
| 77 | &popMutator{}, | ||
| 78 | } { | ||
| 79 | out[mutator.MutatorKind()] = mutator | ||
| 80 | } | ||
| 81 | return out | ||
| 82 | }() | ||
| 83 | ) | ||
| 84 | |||
| 85 | // fastUnmarshalConfig unmarshals only the Config from a serialised lexer. | ||
| 86 | func fastUnmarshalConfig(from fs.FS, path string) (*Config, error) { | ||
| 87 | r, err := from.Open(path) | ||
| 88 | if err != nil { | ||
| 89 | return nil, err | ||
| 90 | } | ||
| 91 | defer r.Close() | ||
| 92 | dec := xml.NewDecoder(r) | ||
| 93 | for { | ||
| 94 | token, err := dec.Token() | ||
| 95 | if err != nil { | ||
| 96 | if errors.Is(err, io.EOF) { | ||
| 97 | return nil, fmt.Errorf("could not find <config> element") | ||
| 98 | } | ||
| 99 | return nil, err | ||
| 100 | } | ||
| 101 | switch se := token.(type) { | ||
| 102 | case xml.StartElement: | ||
| 103 | if se.Name.Local != "config" { | ||
| 104 | break | ||
| 105 | } | ||
| 106 | |||
| 107 | var config Config | ||
| 108 | err = dec.DecodeElement(&config, &se) | ||
| 109 | if err != nil { | ||
| 110 | panic(err) | ||
| 111 | } | ||
| 112 | return &config, nil | ||
| 113 | } | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | // MustNewXMLLexer constructs a new RegexLexer from an XML file or panics. | ||
| 118 | func MustNewXMLLexer(from fs.FS, path string) *RegexLexer { | ||
| 119 | lex, err := NewXMLLexer(from, path) | ||
| 120 | if err != nil { | ||
| 121 | panic(err) | ||
| 122 | } | ||
| 123 | return lex | ||
| 124 | } | ||
| 125 | |||
| 126 | // NewXMLLexer creates a new RegexLexer from a serialised RegexLexer. | ||
| 127 | func NewXMLLexer(from fs.FS, path string) (*RegexLexer, error) { | ||
| 128 | config, err := fastUnmarshalConfig(from, path) | ||
| 129 | if err != nil { | ||
| 130 | return nil, err | ||
| 131 | } | ||
| 132 | for _, glob := range append(config.Filenames, config.AliasFilenames...) { | ||
| 133 | _, err := filepath.Match(glob, "") | ||
| 134 | if err != nil { | ||
| 135 | return nil, fmt.Errorf("%s: %q is not a valid glob: %w", config.Name, glob, err) | ||
| 136 | } | ||
| 137 | } | ||
| 138 | return &RegexLexer{ | ||
| 139 | config: config, | ||
| 140 | fetchRulesFunc: func() (Rules, error) { | ||
| 141 | var lexer struct { | ||
| 142 | Config | ||
| 143 | Rules Rules `xml:"rules"` | ||
| 144 | } | ||
| 145 | // Try to open .xml fallback to .xml.gz | ||
| 146 | fr, err := from.Open(path) | ||
| 147 | if err != nil { | ||
| 148 | if errors.Is(err, fs.ErrNotExist) { | ||
| 149 | path += ".gz" | ||
| 150 | fr, err = from.Open(path) | ||
| 151 | if err != nil { | ||
| 152 | return nil, err | ||
| 153 | } | ||
| 154 | } else { | ||
| 155 | return nil, err | ||
| 156 | } | ||
| 157 | } | ||
| 158 | defer fr.Close() | ||
| 159 | var r io.Reader = fr | ||
| 160 | if strings.HasSuffix(path, ".gz") { | ||
| 161 | r, err = gzip.NewReader(r) | ||
| 162 | if err != nil { | ||
| 163 | return nil, fmt.Errorf("%s: %w", path, err) | ||
| 164 | } | ||
| 165 | } | ||
| 166 | err = xml.NewDecoder(r).Decode(&lexer) | ||
| 167 | if err != nil { | ||
| 168 | return nil, fmt.Errorf("%s: %w", path, err) | ||
| 169 | } | ||
| 170 | return lexer.Rules, nil | ||
| 171 | }, | ||
| 172 | }, nil | ||
| 173 | } | ||
| 174 | |||
| 175 | // Marshal a RegexLexer to XML. | ||
| 176 | func Marshal(l *RegexLexer) ([]byte, error) { | ||
| 177 | type lexer struct { | ||
| 178 | Config Config `xml:"config"` | ||
| 179 | Rules Rules `xml:"rules"` | ||
| 180 | } | ||
| 181 | |||
| 182 | rules, err := l.Rules() | ||
| 183 | if err != nil { | ||
| 184 | return nil, err | ||
| 185 | } | ||
| 186 | root := &lexer{ | ||
| 187 | Config: *l.Config(), | ||
| 188 | Rules: rules, | ||
| 189 | } | ||
| 190 | data, err := xml.MarshalIndent(root, "", " ") | ||
| 191 | if err != nil { | ||
| 192 | return nil, err | ||
| 193 | } | ||
| 194 | re := regexp.MustCompile(`></[a-zA-Z]+>`) | ||
| 195 | data = re.ReplaceAll(data, []byte(`/>`)) | ||
| 196 | return data, nil | ||
| 197 | } | ||
| 198 | |||
| 199 | // Unmarshal a RegexLexer from XML. | ||
| 200 | func Unmarshal(data []byte) (*RegexLexer, error) { | ||
| 201 | type lexer struct { | ||
| 202 | Config Config `xml:"config"` | ||
| 203 | Rules Rules `xml:"rules"` | ||
| 204 | } | ||
| 205 | root := &lexer{} | ||
| 206 | err := xml.Unmarshal(data, root) | ||
| 207 | if err != nil { | ||
| 208 | return nil, fmt.Errorf("invalid Lexer XML: %w", err) | ||
| 209 | } | ||
| 210 | lex, err := NewLexer(&root.Config, func() Rules { return root.Rules }) | ||
| 211 | if err != nil { | ||
| 212 | return nil, err | ||
| 213 | } | ||
| 214 | return lex, nil | ||
| 215 | } | ||
| 216 | |||
| 217 | func marshalMutator(e *xml.Encoder, mutator Mutator) error { | ||
| 218 | if mutator == nil { | ||
| 219 | return nil | ||
| 220 | } | ||
| 221 | smutator, ok := mutator.(SerialisableMutator) | ||
| 222 | if !ok { | ||
| 223 | return fmt.Errorf("unsupported mutator: %w", ErrNotSerialisable) | ||
| 224 | } | ||
| 225 | return e.EncodeElement(mutator, xml.StartElement{Name: xml.Name{Local: smutator.MutatorKind()}}) | ||
| 226 | } | ||
| 227 | |||
| 228 | func unmarshalMutator(d *xml.Decoder, start xml.StartElement) (Mutator, error) { | ||
| 229 | kind := start.Name.Local | ||
| 230 | mutator, ok := mutatorTemplates[kind] | ||
| 231 | if !ok { | ||
| 232 | return nil, fmt.Errorf("unknown mutator %q: %w", kind, ErrNotSerialisable) | ||
| 233 | } | ||
| 234 | value, target := newFromTemplate(mutator) | ||
| 235 | if err := d.DecodeElement(target, &start); err != nil { | ||
| 236 | return nil, err | ||
| 237 | } | ||
| 238 | return value().(SerialisableMutator), nil | ||
| 239 | } | ||
| 240 | |||
| 241 | func marshalEmitter(e *xml.Encoder, emitter Emitter) error { | ||
| 242 | if emitter == nil { | ||
| 243 | return nil | ||
| 244 | } | ||
| 245 | semitter, ok := emitter.(SerialisableEmitter) | ||
| 246 | if !ok { | ||
| 247 | return fmt.Errorf("unsupported emitter %T: %w", emitter, ErrNotSerialisable) | ||
| 248 | } | ||
| 249 | return e.EncodeElement(emitter, xml.StartElement{ | ||
| 250 | Name: xml.Name{Local: semitter.EmitterKind()}, | ||
| 251 | }) | ||
| 252 | } | ||
| 253 | |||
| 254 | func unmarshalEmitter(d *xml.Decoder, start xml.StartElement) (Emitter, error) { | ||
| 255 | kind := start.Name.Local | ||
| 256 | mutator, ok := emitterTemplates[kind] | ||
| 257 | if !ok { | ||
| 258 | return nil, fmt.Errorf("unknown emitter %q: %w", kind, ErrNotSerialisable) | ||
| 259 | } | ||
| 260 | value, target := newFromTemplate(mutator) | ||
| 261 | if err := d.DecodeElement(target, &start); err != nil { | ||
| 262 | return nil, err | ||
| 263 | } | ||
| 264 | return value().(SerialisableEmitter), nil | ||
| 265 | } | ||
| 266 | |||
| 267 | func (r Rule) MarshalXML(e *xml.Encoder, _ xml.StartElement) error { | ||
| 268 | start := xml.StartElement{ | ||
| 269 | Name: xml.Name{Local: "rule"}, | ||
| 270 | } | ||
| 271 | if r.Pattern != "" { | ||
| 272 | start.Attr = append(start.Attr, xml.Attr{ | ||
| 273 | Name: xml.Name{Local: "pattern"}, | ||
| 274 | Value: r.Pattern, | ||
| 275 | }) | ||
| 276 | } | ||
| 277 | if err := e.EncodeToken(start); err != nil { | ||
| 278 | return err | ||
| 279 | } | ||
| 280 | if err := marshalEmitter(e, r.Type); err != nil { | ||
| 281 | return err | ||
| 282 | } | ||
| 283 | if err := marshalMutator(e, r.Mutator); err != nil { | ||
| 284 | return err | ||
| 285 | } | ||
| 286 | return e.EncodeToken(xml.EndElement{Name: start.Name}) | ||
| 287 | } | ||
| 288 | |||
| 289 | func (r *Rule) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | ||
| 290 | for _, attr := range start.Attr { | ||
| 291 | if attr.Name.Local == "pattern" { | ||
| 292 | r.Pattern = attr.Value | ||
| 293 | break | ||
| 294 | } | ||
| 295 | } | ||
| 296 | for { | ||
| 297 | token, err := d.Token() | ||
| 298 | if err != nil { | ||
| 299 | return err | ||
| 300 | } | ||
| 301 | switch token := token.(type) { | ||
| 302 | case xml.StartElement: | ||
| 303 | mutator, err := unmarshalMutator(d, token) | ||
| 304 | if err != nil && !errors.Is(err, ErrNotSerialisable) { | ||
| 305 | return err | ||
| 306 | } else if err == nil { | ||
| 307 | if r.Mutator != nil { | ||
| 308 | return fmt.Errorf("duplicate mutator") | ||
| 309 | } | ||
| 310 | r.Mutator = mutator | ||
| 311 | continue | ||
| 312 | } | ||
| 313 | emitter, err := unmarshalEmitter(d, token) | ||
| 314 | if err != nil && !errors.Is(err, ErrNotSerialisable) { // nolint: gocritic | ||
| 315 | return err | ||
| 316 | } else if err == nil { | ||
| 317 | if r.Type != nil { | ||
| 318 | return fmt.Errorf("duplicate emitter") | ||
| 319 | } | ||
| 320 | r.Type = emitter | ||
| 321 | continue | ||
| 322 | } else { | ||
| 323 | return err | ||
| 324 | } | ||
| 325 | |||
| 326 | case xml.EndElement: | ||
| 327 | return nil | ||
| 328 | } | ||
| 329 | } | ||
| 330 | } | ||
| 331 | |||
| 332 | type xmlRuleState struct { | ||
| 333 | Name string `xml:"name,attr"` | ||
| 334 | Rules []Rule `xml:"rule"` | ||
| 335 | } | ||
| 336 | |||
| 337 | type xmlRules struct { | ||
| 338 | States []xmlRuleState `xml:"state"` | ||
| 339 | } | ||
| 340 | |||
| 341 | func (r Rules) MarshalXML(e *xml.Encoder, _ xml.StartElement) error { | ||
| 342 | xr := xmlRules{} | ||
| 343 | for state, rules := range r { | ||
| 344 | xr.States = append(xr.States, xmlRuleState{ | ||
| 345 | Name: state, | ||
| 346 | Rules: rules, | ||
| 347 | }) | ||
| 348 | } | ||
| 349 | return e.EncodeElement(xr, xml.StartElement{Name: xml.Name{Local: "rules"}}) | ||
| 350 | } | ||
| 351 | |||
| 352 | func (r *Rules) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | ||
| 353 | xr := xmlRules{} | ||
| 354 | if err := d.DecodeElement(&xr, &start); err != nil { | ||
| 355 | return err | ||
| 356 | } | ||
| 357 | if *r == nil { | ||
| 358 | *r = Rules{} | ||
| 359 | } | ||
| 360 | for _, state := range xr.States { | ||
| 361 | (*r)[state.Name] = state.Rules | ||
| 362 | } | ||
| 363 | return nil | ||
| 364 | } | ||
| 365 | |||
| 366 | type xmlTokenType struct { | ||
| 367 | Type string `xml:"type,attr"` | ||
| 368 | } | ||
| 369 | |||
| 370 | func (t *TokenType) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | ||
| 371 | el := xmlTokenType{} | ||
| 372 | if err := d.DecodeElement(&el, &start); err != nil { | ||
| 373 | return err | ||
| 374 | } | ||
| 375 | for tt, text := range _TokenType_map { | ||
| 376 | if text == el.Type { | ||
| 377 | *t = tt | ||
| 378 | return nil | ||
| 379 | } | ||
| 380 | } | ||
| 381 | return fmt.Errorf("unknown TokenType %q", el.Type) | ||
| 382 | } | ||
| 383 | |||
| 384 | func (t TokenType) MarshalXML(e *xml.Encoder, start xml.StartElement) error { | ||
| 385 | start.Attr = append(start.Attr, xml.Attr{Name: xml.Name{Local: "type"}, Value: t.String()}) | ||
| 386 | if err := e.EncodeToken(start); err != nil { | ||
| 387 | return err | ||
| 388 | } | ||
| 389 | return e.EncodeToken(xml.EndElement{Name: start.Name}) | ||
| 390 | } | ||
| 391 | |||
| 392 | // This hijinks is a bit unfortunate but without it we can't deserialise into TokenType. | ||
| 393 | func newFromTemplate(template interface{}) (value func() interface{}, target interface{}) { | ||
| 394 | t := reflect.TypeOf(template) | ||
| 395 | if t.Kind() == reflect.Ptr { | ||
| 396 | v := reflect.New(t.Elem()) | ||
| 397 | return v.Interface, v.Interface() | ||
| 398 | } | ||
| 399 | v := reflect.New(t) | ||
| 400 | return func() interface{} { return v.Elem().Interface() }, v.Interface() | ||
| 401 | } | ||
| 402 | |||
| 403 | func (b *Emitters) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | ||
| 404 | for { | ||
| 405 | token, err := d.Token() | ||
| 406 | if err != nil { | ||
| 407 | return err | ||
| 408 | } | ||
| 409 | switch token := token.(type) { | ||
| 410 | case xml.StartElement: | ||
| 411 | emitter, err := unmarshalEmitter(d, token) | ||
| 412 | if err != nil { | ||
| 413 | return err | ||
| 414 | } | ||
| 415 | *b = append(*b, emitter) | ||
| 416 | |||
| 417 | case xml.EndElement: | ||
| 418 | return nil | ||
| 419 | } | ||
| 420 | } | ||
| 421 | } | ||
| 422 | |||
| 423 | func (b Emitters) MarshalXML(e *xml.Encoder, start xml.StartElement) error { | ||
| 424 | if err := e.EncodeToken(start); err != nil { | ||
| 425 | return err | ||
| 426 | } | ||
| 427 | for _, m := range b { | ||
| 428 | if err := marshalEmitter(e, m); err != nil { | ||
| 429 | return err | ||
| 430 | } | ||
| 431 | } | ||
| 432 | return e.EncodeToken(xml.EndElement{Name: start.Name}) | ||
| 433 | } | ||
