1package chroma
  2
  3import (
  4	"fmt"
  5)
  6
  7// An Emitter takes group matches and returns tokens.
  8type Emitter interface {
  9	// Emit tokens for the given regex groups.
 10	Emit(groups []string, state *LexerState) Iterator
 11}
 12
 13// ValidatingEmitter is an Emitter that can validate against a compiled rule.
 14type ValidatingEmitter interface {
 15	Emitter
 16	ValidateEmitter(rule *CompiledRule) error
 17}
 18
 19// SerialisableEmitter is an Emitter that can be serialised and deserialised to/from JSON.
 20type SerialisableEmitter interface {
 21	Emitter
 22	EmitterKind() string
 23}
 24
 25// EmitterFunc is a function that is an Emitter.
 26type EmitterFunc func(groups []string, state *LexerState) Iterator
 27
 28// Emit tokens for groups.
 29func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
 30	return e(groups, state)
 31}
 32
 33type Emitters []Emitter
 34
 35type byGroupsEmitter struct {
 36	Emitters
 37}
 38
 39var _ ValidatingEmitter = (*byGroupsEmitter)(nil)
 40
 41// ByGroups emits a token for each matching group in the rule's regex.
 42func ByGroups(emitters ...Emitter) Emitter {
 43	return &byGroupsEmitter{Emitters: emitters}
 44}
 45
 46func (b *byGroupsEmitter) EmitterKind() string { return "bygroups" }
 47
 48func (b *byGroupsEmitter) ValidateEmitter(rule *CompiledRule) error {
 49	if len(rule.Regexp.GetGroupNumbers())-1 != len(b.Emitters) {
 50		return fmt.Errorf("number of groups %d does not match number of emitters %d", len(rule.Regexp.GetGroupNumbers())-1, len(b.Emitters))
 51	}
 52	return nil
 53}
 54
 55func (b *byGroupsEmitter) Emit(groups []string, state *LexerState) Iterator {
 56	iterators := make([]Iterator, 0, len(groups)-1)
 57	if len(b.Emitters) != len(groups)-1 {
 58		iterators = append(iterators, Error.Emit(groups, state))
 59		// panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
 60	} else {
 61		for i, group := range groups[1:] {
 62			if b.Emitters[i] != nil {
 63				iterators = append(iterators, b.Emitters[i].Emit([]string{group}, state))
 64			}
 65		}
 66	}
 67	return Concaterator(iterators...)
 68}
 69
 70// ByGroupNames emits a token for each named matching group in the rule's regex.
 71func ByGroupNames(emitters map[string]Emitter) Emitter {
 72	return EmitterFunc(func(groups []string, state *LexerState) Iterator {
 73		iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
 74		if len(state.NamedGroups)-1 == 0 {
 75			if emitter, ok := emitters[`0`]; ok {
 76				iterators = append(iterators, emitter.Emit(groups, state))
 77			} else {
 78				iterators = append(iterators, Error.Emit(groups, state))
 79			}
 80		} else {
 81			ruleRegex := state.Rules[state.State][state.Rule].Regexp
 82			for i := 1; i < len(state.NamedGroups); i++ {
 83				groupName := ruleRegex.GroupNameFromNumber(i)
 84				group := state.NamedGroups[groupName]
 85				if emitter, ok := emitters[groupName]; ok {
 86					if emitter != nil {
 87						iterators = append(iterators, emitter.Emit([]string{group}, state))
 88					}
 89				} else {
 90					iterators = append(iterators, Error.Emit([]string{group}, state))
 91				}
 92			}
 93		}
 94		return Concaterator(iterators...)
 95	})
 96}
 97
 98// UsingByGroup emits tokens for the matched groups in the regex using a
 99// sublexer. Used when lexing code blocks where the name of a sublexer is
100// contained within the block, for example on a Markdown text block or SQL
101// language block.
102//
103// An attempt to load the sublexer will be made using the captured value from
104// the text of the matched sublexerNameGroup. If a sublexer matching the
105// sublexerNameGroup is available, then tokens for the matched codeGroup will
106// be emitted using the sublexer. Otherwise, if no sublexer is available, then
107// tokens will be emitted from the passed emitter.
108//
109// Example:
110//
111//	var Markdown = internal.Register(MustNewLexer(
112//		&Config{
113//			Name:      "markdown",
114//			Aliases:   []string{"md", "mkd"},
115//			Filenames: []string{"*.md", "*.mkd", "*.markdown"},
116//			MimeTypes: []string{"text/x-markdown"},
117//		},
118//		Rules{
119//			"root": {
120//				{"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
121//					UsingByGroup(
122//						2, 4,
123//						String, String, String, Text, String,
124//					),
125//					nil,
126//				},
127//			},
128//		},
129//	))
130//
131// See the lexers/markdown.go for the complete example.
132//
133// Note: panic's if the number of emitters does not equal the number of matched
134// groups in the regex.
135func UsingByGroup(sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
136	return &usingByGroup{
137		SublexerNameGroup: sublexerNameGroup,
138		CodeGroup:         codeGroup,
139		Emitters:          emitters,
140	}
141}
142
143type usingByGroup struct {
144	SublexerNameGroup int      `xml:"sublexer_name_group"`
145	CodeGroup         int      `xml:"code_group"`
146	Emitters          Emitters `xml:"emitters"`
147}
148
149func (u *usingByGroup) EmitterKind() string { return "usingbygroup" }
150func (u *usingByGroup) Emit(groups []string, state *LexerState) Iterator {
151	// bounds check
152	if len(u.Emitters) != len(groups)-1 {
153		panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
154	}
155
156	// grab sublexer
157	sublexer := state.Registry.Get(groups[u.SublexerNameGroup])
158
159	// build iterators
160	iterators := make([]Iterator, len(groups)-1)
161	for i, group := range groups[1:] {
162		if i == u.CodeGroup-1 && sublexer != nil {
163			var err error
164			iterators[i], err = sublexer.Tokenise(nil, groups[u.CodeGroup])
165			if err != nil {
166				panic(err)
167			}
168		} else if u.Emitters[i] != nil {
169			iterators[i] = u.Emitters[i].Emit([]string{group}, state)
170		}
171	}
172	return Concaterator(iterators...)
173}
174
175// UsingLexer returns an Emitter that uses a given Lexer for parsing and emitting.
176//
177// This Emitter is not serialisable.
178func UsingLexer(lexer Lexer) Emitter {
179	return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
180		it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
181		if err != nil {
182			panic(err)
183		}
184		return it
185	})
186}
187
188type usingEmitter struct {
189	Lexer string `xml:"lexer,attr"`
190}
191
192func (u *usingEmitter) EmitterKind() string { return "using" }
193
194func (u *usingEmitter) Emit(groups []string, state *LexerState) Iterator {
195	if state.Registry == nil {
196		panic(fmt.Sprintf("no LexerRegistry available for Using(%q)", u.Lexer))
197	}
198	lexer := state.Registry.Get(u.Lexer)
199	if lexer == nil {
200		panic(fmt.Sprintf("no such lexer %q", u.Lexer))
201	}
202	it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
203	if err != nil {
204		panic(err)
205	}
206	return it
207}
208
209// Using returns an Emitter that uses a given Lexer reference for parsing and emitting.
210//
211// The referenced lexer must be stored in the same LexerRegistry.
212func Using(lexer string) Emitter {
213	return &usingEmitter{Lexer: lexer}
214}
215
216type usingSelfEmitter struct {
217	State string `xml:"state,attr"`
218}
219
220func (u *usingSelfEmitter) EmitterKind() string { return "usingself" }
221
222func (u *usingSelfEmitter) Emit(groups []string, state *LexerState) Iterator {
223	it, err := state.Lexer.Tokenise(&TokeniseOptions{State: u.State, Nested: true}, groups[0])
224	if err != nil {
225		panic(err)
226	}
227	return it
228}
229
230// UsingSelf is like Using, but uses the current Lexer.
231func UsingSelf(stateName string) Emitter {
232	return &usingSelfEmitter{stateName}
233}