1package chroma
2
3import (
4 "fmt"
5)
6
7// An Emitter takes group matches and returns tokens.
8type Emitter interface {
9 // Emit tokens for the given regex groups.
10 Emit(groups []string, state *LexerState) Iterator
11}
12
13// ValidatingEmitter is an Emitter that can validate against a compiled rule.
14type ValidatingEmitter interface {
15 Emitter
16 ValidateEmitter(rule *CompiledRule) error
17}
18
19// SerialisableEmitter is an Emitter that can be serialised and deserialised to/from JSON.
20type SerialisableEmitter interface {
21 Emitter
22 EmitterKind() string
23}
24
25// EmitterFunc is a function that is an Emitter.
26type EmitterFunc func(groups []string, state *LexerState) Iterator
27
28// Emit tokens for groups.
29func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
30 return e(groups, state)
31}
32
33type Emitters []Emitter
34
35type byGroupsEmitter struct {
36 Emitters
37}
38
39var _ ValidatingEmitter = (*byGroupsEmitter)(nil)
40
41// ByGroups emits a token for each matching group in the rule's regex.
42func ByGroups(emitters ...Emitter) Emitter {
43 return &byGroupsEmitter{Emitters: emitters}
44}
45
46func (b *byGroupsEmitter) EmitterKind() string { return "bygroups" }
47
48func (b *byGroupsEmitter) ValidateEmitter(rule *CompiledRule) error {
49 if len(rule.Regexp.GetGroupNumbers())-1 != len(b.Emitters) {
50 return fmt.Errorf("number of groups %d does not match number of emitters %d", len(rule.Regexp.GetGroupNumbers())-1, len(b.Emitters))
51 }
52 return nil
53}
54
55func (b *byGroupsEmitter) Emit(groups []string, state *LexerState) Iterator {
56 iterators := make([]Iterator, 0, len(groups)-1)
57 if len(b.Emitters) != len(groups)-1 {
58 iterators = append(iterators, Error.Emit(groups, state))
59 // panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
60 } else {
61 for i, group := range groups[1:] {
62 if b.Emitters[i] != nil {
63 iterators = append(iterators, b.Emitters[i].Emit([]string{group}, state))
64 }
65 }
66 }
67 return Concaterator(iterators...)
68}
69
70// ByGroupNames emits a token for each named matching group in the rule's regex.
71func ByGroupNames(emitters map[string]Emitter) Emitter {
72 return EmitterFunc(func(groups []string, state *LexerState) Iterator {
73 iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
74 if len(state.NamedGroups)-1 == 0 {
75 if emitter, ok := emitters[`0`]; ok {
76 iterators = append(iterators, emitter.Emit(groups, state))
77 } else {
78 iterators = append(iterators, Error.Emit(groups, state))
79 }
80 } else {
81 ruleRegex := state.Rules[state.State][state.Rule].Regexp
82 for i := 1; i < len(state.NamedGroups); i++ {
83 groupName := ruleRegex.GroupNameFromNumber(i)
84 group := state.NamedGroups[groupName]
85 if emitter, ok := emitters[groupName]; ok {
86 if emitter != nil {
87 iterators = append(iterators, emitter.Emit([]string{group}, state))
88 }
89 } else {
90 iterators = append(iterators, Error.Emit([]string{group}, state))
91 }
92 }
93 }
94 return Concaterator(iterators...)
95 })
96}
97
98// UsingByGroup emits tokens for the matched groups in the regex using a
99// sublexer. Used when lexing code blocks where the name of a sublexer is
100// contained within the block, for example on a Markdown text block or SQL
101// language block.
102//
103// An attempt to load the sublexer will be made using the captured value from
104// the text of the matched sublexerNameGroup. If a sublexer matching the
105// sublexerNameGroup is available, then tokens for the matched codeGroup will
106// be emitted using the sublexer. Otherwise, if no sublexer is available, then
107// tokens will be emitted from the passed emitter.
108//
109// Example:
110//
111// var Markdown = internal.Register(MustNewLexer(
112// &Config{
113// Name: "markdown",
114// Aliases: []string{"md", "mkd"},
115// Filenames: []string{"*.md", "*.mkd", "*.markdown"},
116// MimeTypes: []string{"text/x-markdown"},
117// },
118// Rules{
119// "root": {
120// {"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
121// UsingByGroup(
122// 2, 4,
123// String, String, String, Text, String,
124// ),
125// nil,
126// },
127// },
128// },
129// ))
130//
131// See the lexers/markdown.go for the complete example.
132//
133// Note: panic's if the number of emitters does not equal the number of matched
134// groups in the regex.
135func UsingByGroup(sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
136 return &usingByGroup{
137 SublexerNameGroup: sublexerNameGroup,
138 CodeGroup: codeGroup,
139 Emitters: emitters,
140 }
141}
142
143type usingByGroup struct {
144 SublexerNameGroup int `xml:"sublexer_name_group"`
145 CodeGroup int `xml:"code_group"`
146 Emitters Emitters `xml:"emitters"`
147}
148
149func (u *usingByGroup) EmitterKind() string { return "usingbygroup" }
150func (u *usingByGroup) Emit(groups []string, state *LexerState) Iterator {
151 // bounds check
152 if len(u.Emitters) != len(groups)-1 {
153 panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
154 }
155
156 // grab sublexer
157 sublexer := state.Registry.Get(groups[u.SublexerNameGroup])
158
159 // build iterators
160 iterators := make([]Iterator, len(groups)-1)
161 for i, group := range groups[1:] {
162 if i == u.CodeGroup-1 && sublexer != nil {
163 var err error
164 iterators[i], err = sublexer.Tokenise(nil, groups[u.CodeGroup])
165 if err != nil {
166 panic(err)
167 }
168 } else if u.Emitters[i] != nil {
169 iterators[i] = u.Emitters[i].Emit([]string{group}, state)
170 }
171 }
172 return Concaterator(iterators...)
173}
174
175// UsingLexer returns an Emitter that uses a given Lexer for parsing and emitting.
176//
177// This Emitter is not serialisable.
178func UsingLexer(lexer Lexer) Emitter {
179 return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
180 it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
181 if err != nil {
182 panic(err)
183 }
184 return it
185 })
186}
187
188type usingEmitter struct {
189 Lexer string `xml:"lexer,attr"`
190}
191
192func (u *usingEmitter) EmitterKind() string { return "using" }
193
194func (u *usingEmitter) Emit(groups []string, state *LexerState) Iterator {
195 if state.Registry == nil {
196 panic(fmt.Sprintf("no LexerRegistry available for Using(%q)", u.Lexer))
197 }
198 lexer := state.Registry.Get(u.Lexer)
199 if lexer == nil {
200 panic(fmt.Sprintf("no such lexer %q", u.Lexer))
201 }
202 it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
203 if err != nil {
204 panic(err)
205 }
206 return it
207}
208
209// Using returns an Emitter that uses a given Lexer reference for parsing and emitting.
210//
211// The referenced lexer must be stored in the same LexerRegistry.
212func Using(lexer string) Emitter {
213 return &usingEmitter{Lexer: lexer}
214}
215
216type usingSelfEmitter struct {
217 State string `xml:"state,attr"`
218}
219
220func (u *usingSelfEmitter) EmitterKind() string { return "usingself" }
221
222func (u *usingSelfEmitter) Emit(groups []string, state *LexerState) Iterator {
223 it, err := state.Lexer.Tokenise(&TokeniseOptions{State: u.State, Nested: true}, groups[0])
224 if err != nil {
225 panic(err)
226 }
227 return it
228}
229
230// UsingSelf is like Using, but uses the current Lexer.
231func UsingSelf(stateName string) Emitter {
232 return &usingSelfEmitter{stateName}
233}