1package chroma
  2
  3import (
  4	"bytes"
  5)
  6
  7type delegatingLexer struct {
  8	root     Lexer
  9	language Lexer
 10}
 11
 12// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
 13// inside HTML or PHP inside plain text.
 14//
 15// It takes two lexer as arguments: a root lexer and a language lexer.  First everything is scanned using the language
 16// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
 17// Finally, these two sets of tokens are merged.
 18//
 19// The lexers from the template lexer package use this base lexer.
 20func DelegatingLexer(root Lexer, language Lexer) Lexer {
 21	return &delegatingLexer{
 22		root:     root,
 23		language: language,
 24	}
 25}
 26
 27func (d *delegatingLexer) SetTracing(enable bool) {
 28	if l, ok := d.language.(TracingLexer); ok {
 29		l.SetTracing(enable)
 30	}
 31	if l, ok := d.root.(TracingLexer); ok {
 32		l.SetTracing(enable)
 33	}
 34}
 35
 36func (d *delegatingLexer) AnalyseText(text string) float32 {
 37	return d.root.AnalyseText(text)
 38}
 39
 40func (d *delegatingLexer) SetAnalyser(analyser func(text string) float32) Lexer {
 41	d.root.SetAnalyser(analyser)
 42	return d
 43}
 44
 45func (d *delegatingLexer) SetRegistry(r *LexerRegistry) Lexer {
 46	d.root.SetRegistry(r)
 47	d.language.SetRegistry(r)
 48	return d
 49}
 50
 51func (d *delegatingLexer) Config() *Config {
 52	return d.language.Config()
 53}
 54
 55// An insertion is the character range where language tokens should be inserted.
 56type insertion struct {
 57	start, end int
 58	tokens     []Token
 59}
 60
 61func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
 62	tokens, err := Tokenise(Coalesce(d.language), options, text)
 63	if err != nil {
 64		return nil, err
 65	}
 66	// Compute insertions and gather "Other" tokens.
 67	others := &bytes.Buffer{}
 68	insertions := []*insertion{}
 69	var insert *insertion
 70	offset := 0
 71	var last Token
 72	for _, t := range tokens {
 73		if t.Type == Other {
 74			if last != EOF && insert != nil && last.Type != Other {
 75				insert.end = offset
 76			}
 77			others.WriteString(t.Value)
 78		} else {
 79			if last == EOF || last.Type == Other {
 80				insert = &insertion{start: offset}
 81				insertions = append(insertions, insert)
 82			}
 83			insert.tokens = append(insert.tokens, t)
 84		}
 85		last = t
 86		offset += len(t.Value)
 87	}
 88
 89	if len(insertions) == 0 {
 90		return d.root.Tokenise(options, text)
 91	}
 92
 93	// Lex the other tokens.
 94	rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
 95	if err != nil {
 96		return nil, err
 97	}
 98
 99	// Interleave the two sets of tokens.
100	var out []Token
101	offset = 0 // Offset into text.
102	tokenIndex := 0
103	nextToken := func() Token {
104		if tokenIndex >= len(rootTokens) {
105			return EOF
106		}
107		t := rootTokens[tokenIndex]
108		tokenIndex++
109		return t
110	}
111	insertionIndex := 0
112	nextInsertion := func() *insertion {
113		if insertionIndex >= len(insertions) {
114			return nil
115		}
116		i := insertions[insertionIndex]
117		insertionIndex++
118		return i
119	}
120	t := nextToken()
121	i := nextInsertion()
122	for t != EOF || i != nil {
123		// fmt.Printf("%d->%d:%q   %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
124		if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
125			var l Token
126			l, t = splitToken(t, i.start-offset)
127			if l != EOF {
128				out = append(out, l)
129				offset += len(l.Value)
130			}
131			out = append(out, i.tokens...)
132			offset += i.end - i.start
133			if t == EOF {
134				t = nextToken()
135			}
136			i = nextInsertion()
137		} else {
138			out = append(out, t)
139			offset += len(t.Value)
140			t = nextToken()
141		}
142	}
143	return Literator(out...), nil
144}
145
146func splitToken(t Token, offset int) (l Token, r Token) {
147	if t == EOF {
148		return EOF, EOF
149	}
150	if offset == 0 {
151		return EOF, t
152	}
153	if offset == len(t.Value) {
154		return t, EOF
155	}
156	l = t.Clone()
157	r = t.Clone()
158	l.Value = l.Value[:offset]
159	r.Value = r.Value[offset:]
160	return
161}