1package chroma
  2
  3import (
  4	"fmt"
  5)
  6
  7// An Emitter takes group matches and returns tokens.
  8type Emitter interface {
  9	// Emit tokens for the given regex groups.
 10	Emit(groups []string, state *LexerState) Iterator
 11}
 12
 13// SerialisableEmitter is an Emitter that can be serialised and deserialised to/from JSON.
 14type SerialisableEmitter interface {
 15	Emitter
 16	EmitterKind() string
 17}
 18
 19// EmitterFunc is a function that is an Emitter.
 20type EmitterFunc func(groups []string, state *LexerState) Iterator
 21
 22// Emit tokens for groups.
 23func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
 24	return e(groups, state)
 25}
 26
 27type Emitters []Emitter
 28
 29type byGroupsEmitter struct {
 30	Emitters
 31}
 32
 33// ByGroups emits a token for each matching group in the rule's regex.
 34func ByGroups(emitters ...Emitter) Emitter {
 35	return &byGroupsEmitter{Emitters: emitters}
 36}
 37
 38func (b *byGroupsEmitter) EmitterKind() string { return "bygroups" }
 39
 40func (b *byGroupsEmitter) Emit(groups []string, state *LexerState) Iterator {
 41	iterators := make([]Iterator, 0, len(groups)-1)
 42	if len(b.Emitters) != len(groups)-1 {
 43		iterators = append(iterators, Error.Emit(groups, state))
 44		// panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
 45	} else {
 46		for i, group := range groups[1:] {
 47			if b.Emitters[i] != nil {
 48				iterators = append(iterators, b.Emitters[i].Emit([]string{group}, state))
 49			}
 50		}
 51	}
 52	return Concaterator(iterators...)
 53}
 54
 55// ByGroupNames emits a token for each named matching group in the rule's regex.
 56func ByGroupNames(emitters map[string]Emitter) Emitter {
 57	return EmitterFunc(func(groups []string, state *LexerState) Iterator {
 58		iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
 59		if len(state.NamedGroups)-1 == 0 {
 60			if emitter, ok := emitters[`0`]; ok {
 61				iterators = append(iterators, emitter.Emit(groups, state))
 62			} else {
 63				iterators = append(iterators, Error.Emit(groups, state))
 64			}
 65		} else {
 66			ruleRegex := state.Rules[state.State][state.Rule].Regexp
 67			for i := 1; i < len(state.NamedGroups); i++ {
 68				groupName := ruleRegex.GroupNameFromNumber(i)
 69				group := state.NamedGroups[groupName]
 70				if emitter, ok := emitters[groupName]; ok {
 71					if emitter != nil {
 72						iterators = append(iterators, emitter.Emit([]string{group}, state))
 73					}
 74				} else {
 75					iterators = append(iterators, Error.Emit([]string{group}, state))
 76				}
 77			}
 78		}
 79		return Concaterator(iterators...)
 80	})
 81}
 82
 83// UsingByGroup emits tokens for the matched groups in the regex using a
 84// "sublexer". Used when lexing code blocks where the name of a sublexer is
 85// contained within the block, for example on a Markdown text block or SQL
 86// language block.
 87//
 88// The sublexer will be retrieved using sublexerGetFunc (typically
 89// internal.Get), using the captured value from the matched sublexerNameGroup.
 90//
 91// If sublexerGetFunc returns a non-nil lexer for the captured sublexerNameGroup,
 92// then tokens for the matched codeGroup will be emitted using the retrieved
 93// lexer. Otherwise, if the sublexer is nil, then tokens will be emitted from
 94// the passed emitter.
 95//
 96// Example:
 97//
 98// 	var Markdown = internal.Register(MustNewLexer(
 99// 		&Config{
100// 			Name:      "markdown",
101// 			Aliases:   []string{"md", "mkd"},
102// 			Filenames: []string{"*.md", "*.mkd", "*.markdown"},
103// 			MimeTypes: []string{"text/x-markdown"},
104// 		},
105// 		Rules{
106// 			"root": {
107// 				{"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
108// 					UsingByGroup(
109// 						internal.Get,
110// 						2, 4,
111// 						String, String, String, Text, String,
112// 					),
113// 					nil,
114// 				},
115// 			},
116// 		},
117// 	))
118//
119// See the lexers/m/markdown.go for the complete example.
120//
121// Note: panic's if the number of emitters does not equal the number of matched
122// groups in the regex.
123func UsingByGroup(sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
124	return &usingByGroup{
125		SublexerNameGroup: sublexerNameGroup,
126		CodeGroup:         codeGroup,
127		Emitters:          emitters,
128	}
129}
130
131type usingByGroup struct {
132	SublexerNameGroup int      `xml:"sublexer_name_group"`
133	CodeGroup         int      `xml:"code_group"`
134	Emitters          Emitters `xml:"emitters"`
135}
136
137func (u *usingByGroup) EmitterKind() string { return "usingbygroup" }
138func (u *usingByGroup) Emit(groups []string, state *LexerState) Iterator {
139	// bounds check
140	if len(u.Emitters) != len(groups)-1 {
141		panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
142	}
143
144	// grab sublexer
145	sublexer := state.Registry.Get(groups[u.SublexerNameGroup])
146
147	// build iterators
148	iterators := make([]Iterator, len(groups)-1)
149	for i, group := range groups[1:] {
150		if i == u.CodeGroup-1 && sublexer != nil {
151			var err error
152			iterators[i], err = sublexer.Tokenise(nil, groups[u.CodeGroup])
153			if err != nil {
154				panic(err)
155			}
156		} else if u.Emitters[i] != nil {
157			iterators[i] = u.Emitters[i].Emit([]string{group}, state)
158		}
159	}
160	return Concaterator(iterators...)
161}
162
163// UsingLexer returns an Emitter that uses a given Lexer for parsing and emitting.
164//
165// This Emitter is not serialisable.
166func UsingLexer(lexer Lexer) Emitter {
167	return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
168		it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
169		if err != nil {
170			panic(err)
171		}
172		return it
173	})
174}
175
176type usingEmitter struct {
177	Lexer string `xml:"lexer,attr"`
178}
179
180func (u *usingEmitter) EmitterKind() string { return "using" }
181
182func (u *usingEmitter) Emit(groups []string, state *LexerState) Iterator {
183	if state.Registry == nil {
184		panic(fmt.Sprintf("no LexerRegistry available for Using(%q)", u.Lexer))
185	}
186	lexer := state.Registry.Get(u.Lexer)
187	if lexer == nil {
188		panic(fmt.Sprintf("no such lexer %q", u.Lexer))
189	}
190	it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
191	if err != nil {
192		panic(err)
193	}
194	return it
195}
196
197// Using returns an Emitter that uses a given Lexer reference for parsing and emitting.
198//
199// The referenced lexer must be stored in the same LexerRegistry.
200func Using(lexer string) Emitter {
201	return &usingEmitter{Lexer: lexer}
202}
203
204type usingSelfEmitter struct {
205	State string `xml:"state,attr"`
206}
207
208func (u *usingSelfEmitter) EmitterKind() string { return "usingself" }
209
210func (u *usingSelfEmitter) Emit(groups []string, state *LexerState) Iterator {
211	it, err := state.Lexer.Tokenise(&TokeniseOptions{State: u.State, Nested: true}, groups[0])
212	if err != nil {
213		panic(err)
214	}
215	return it
216}
217
218// UsingSelf is like Using, but uses the current Lexer.
219func UsingSelf(stateName string) Emitter {
220	return &usingSelfEmitter{stateName}
221}