1package chroma
2
3import (
4 "bytes"
5)
6
7type delegatingLexer struct {
8 root Lexer
9 language Lexer
10}
11
12// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
13// inside HTML or PHP inside plain text.
14//
15// It takes two lexer as arguments: a root lexer and a language lexer. First everything is scanned using the language
16// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
17// Finally, these two sets of tokens are merged.
18//
19// The lexers from the template lexer package use this base lexer.
20func DelegatingLexer(root Lexer, language Lexer) Lexer {
21 return &delegatingLexer{
22 root: root,
23 language: language,
24 }
25}
26
27func (d *delegatingLexer) SetTracing(enable bool) {
28 if l, ok := d.language.(TracingLexer); ok {
29 l.SetTracing(enable)
30 }
31 if l, ok := d.root.(TracingLexer); ok {
32 l.SetTracing(enable)
33 }
34}
35
36func (d *delegatingLexer) AnalyseText(text string) float32 {
37 return d.root.AnalyseText(text)
38}
39
40func (d *delegatingLexer) SetAnalyser(analyser func(text string) float32) Lexer {
41 d.root.SetAnalyser(analyser)
42 return d
43}
44
45func (d *delegatingLexer) SetRegistry(r *LexerRegistry) Lexer {
46 d.root.SetRegistry(r)
47 d.language.SetRegistry(r)
48 return d
49}
50
51func (d *delegatingLexer) Config() *Config {
52 return d.language.Config()
53}
54
55// An insertion is the character range where language tokens should be inserted.
56type insertion struct {
57 start, end int
58 tokens []Token
59}
60
61func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
62 tokens, err := Tokenise(Coalesce(d.language), options, text)
63 if err != nil {
64 return nil, err
65 }
66 // Compute insertions and gather "Other" tokens.
67 others := &bytes.Buffer{}
68 insertions := []*insertion{}
69 var insert *insertion
70 offset := 0
71 var last Token
72 for _, t := range tokens {
73 if t.Type == Other {
74 if last != EOF && insert != nil && last.Type != Other {
75 insert.end = offset
76 }
77 others.WriteString(t.Value)
78 } else {
79 if last == EOF || last.Type == Other {
80 insert = &insertion{start: offset}
81 insertions = append(insertions, insert)
82 }
83 insert.tokens = append(insert.tokens, t)
84 }
85 last = t
86 offset += len(t.Value)
87 }
88
89 if len(insertions) == 0 {
90 return d.root.Tokenise(options, text)
91 }
92
93 // Lex the other tokens.
94 rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
95 if err != nil {
96 return nil, err
97 }
98
99 // Interleave the two sets of tokens.
100 var out []Token
101 offset = 0 // Offset into text.
102 tokenIndex := 0
103 nextToken := func() Token {
104 if tokenIndex >= len(rootTokens) {
105 return EOF
106 }
107 t := rootTokens[tokenIndex]
108 tokenIndex++
109 return t
110 }
111 insertionIndex := 0
112 nextInsertion := func() *insertion {
113 if insertionIndex >= len(insertions) {
114 return nil
115 }
116 i := insertions[insertionIndex]
117 insertionIndex++
118 return i
119 }
120 t := nextToken()
121 i := nextInsertion()
122 for t != EOF || i != nil {
123 // fmt.Printf("%d->%d:%q %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
124 if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
125 var l Token
126 l, t = splitToken(t, i.start-offset)
127 if l != EOF {
128 out = append(out, l)
129 offset += len(l.Value)
130 }
131 out = append(out, i.tokens...)
132 offset += i.end - i.start
133 if t == EOF {
134 t = nextToken()
135 }
136 i = nextInsertion()
137 } else {
138 out = append(out, t)
139 offset += len(t.Value)
140 t = nextToken()
141 }
142 }
143 return Literator(out...), nil
144}
145
146func splitToken(t Token, offset int) (l Token, r Token) {
147 if t == EOF {
148 return EOF, EOF
149 }
150 if offset == 0 {
151 return EOF, t
152 }
153 if offset == len(t.Value) {
154 return t, EOF
155 }
156 l = t.Clone()
157 r = t.Clone()
158 l.Value = l.Value[:offset]
159 r.Value = r.Value[offset:]
160 return
161}