1package lexers
2
3import (
4 "regexp"
5 "slices"
6 "strings"
7 "unicode/utf8"
8
9 "github.com/dlclark/regexp2"
10
11 . "github.com/alecthomas/chroma/v2" // nolint
12)
13
14// Raku lexer.
15var Raku Lexer = Register(MustNewLexer(
16 &Config{
17 Name: "Raku",
18 Aliases: []string{"perl6", "pl6", "raku"},
19 Filenames: []string{
20 "*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm",
21 "*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc",
22 },
23 MimeTypes: []string{
24 "text/x-perl6", "application/x-perl6",
25 "text/x-raku", "application/x-raku",
26 },
27 DotAll: true,
28 },
29 rakuRules,
30))
31
32func rakuRules() Rules {
33 type RakuToken int
34
35 const (
36 rakuQuote RakuToken = iota
37 rakuNameAttribute
38 rakuPod
39 rakuPodFormatter
40 rakuPodDeclaration
41 rakuMultilineComment
42 rakuMatchRegex
43 rakuSubstitutionRegex
44 )
45
46 const (
47 colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)`
48 colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})`
49 colonPairPattern = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)`
50 colonPairLookahead = `(?=(:['\w-]+` +
51 colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?`
52 namePattern = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+`
53 variablePattern = `[$@%&]+[.^:?=!~]?` + namePattern
54 globalVariablePattern = `[$@%&]+\*` + namePattern
55 )
56
57 keywords := []string{
58 `BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`,
59 `KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`,
60 `class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`,
61 `grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`,
62 `module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`,
63 `where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`,
64 `submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`,
65 `use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`,
66 `symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`,
67 `pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`,
68 `dynamic-scope`, `built`, `temp`,
69 }
70
71 keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
72
73 wordOperators := []string{
74 `X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
75 `gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`,
76 `but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`,
77 `TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`,
78 `(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
79 }
80
81 wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
82
83 operators := []string{
84 `++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
85 `+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`,
86 `<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`,
87 `::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`,
88 `??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`,
89 `,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`,
90 `⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`,
91 `⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`,
92 }
93
94 operatorsPattern := Words(``, ``, operators...)
95
96 builtinTypes := []string{
97 `False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
98 `atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`,
99 `Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`,
100 `CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`,
101 `CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`,
102 `CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`,
103 `CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`,
104 `CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`,
105 `DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`,
106 `Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`,
107 `Encoding::GlobalLexerRegistry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`,
108 `Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`,
109 `IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`,
110 `IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`,
111 `IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`,
112 `IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`,
113 `IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`,
114 `Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`,
115 `Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`,
116 `Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`,
117 `Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`,
118 `Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`,
119 `Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`,
120 `Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`,
121 `Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`,
122 `Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`,
123 `MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`,
124 `Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`,
125 `Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`,
126 `Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`,
127 `Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`,
128 `PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`,
129 `Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`,
130 `Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`,
131 `Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`,
132 `Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`,
133 `Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`,
134 `Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`,
135 `uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`,
136 `WhateverCode`, `WrapHandle`, `NativeCall`,
137 // Pragmas
138 `precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`,
139 `MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`,
140 `strict`, `trace`, `variables`,
141 }
142
143 builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
144
145 builtinRoutines := []string{
146 `ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
147 `acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`,
148 `add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`,
149 `add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`,
150 `all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`,
151 `antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`,
152 `archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`,
153 `ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`,
154 `atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`,
155 `atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`,
156 `await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`,
157 `basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`,
158 `bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`,
159 `bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`,
160 `callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`,
161 `candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`,
162 `cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`,
163 `cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`,
164 `child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`,
165 `classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`,
166 `codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`,
167 `command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`,
168 `compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`,
169 `configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`,
170 `content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`,
171 `count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`,
172 `curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`,
173 `day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`,
174 `default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`,
175 `DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`,
176 `diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`,
177 `DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`,
178 `eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`,
179 `endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`,
180 `eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`,
181 `excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`,
182 `expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`,
183 `FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`,
184 `find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`,
185 `flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`,
186 `free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`,
187 `full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`,
188 `gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`,
189 `has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`,
190 `hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`,
191 `indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`,
192 `install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`,
193 `invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`,
194 `is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`,
195 `is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`,
196 `is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`,
197 `kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`,
198 `lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`,
199 `List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`,
200 `loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`,
201 `map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`,
202 `methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`,
203 `MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`,
204 `mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`,
205 `nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`,
206 `new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`,
207 `nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`,
208 `nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`,
209 `Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`,
210 `ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`,
211 `ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`,
212 `package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`,
213 `parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`,
214 `parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`,
215 `permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`,
216 `polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`,
217 `precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`,
218 `primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`,
219 `private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`,
220 `protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`,
221 `push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`,
222 `quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`,
223 `read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`,
224 `read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`,
225 `read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`,
226 `reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`,
227 `rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`,
228 `replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`,
229 `result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`,
230 `rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`,
231 `rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`,
232 `samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`,
233 `sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`,
234 `set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`,
235 `set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`,
236 `set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`,
237 `setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`,
238 `short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`,
239 `signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`,
240 `skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`,
241 `Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`,
242 `socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`,
243 `splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`,
244 `started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`,
245 `store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`,
246 `subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`,
247 `subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`,
248 `take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`,
249 `term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`,
250 `tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`,
251 `trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`,
252 `trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`,
253 `typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`,
254 `uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`,
255 `unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`,
256 `USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`,
257 `verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`,
258 `watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`,
259 `what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`,
260 `with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`,
261 `write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`,
262 `write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`,
263 `write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`,
264 `yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
265 }
266
267 builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
268
269 // A map of opening and closing brackets
270 brackets := map[rune]rune{
271 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
272 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
273 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
274 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
275 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
276 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
277 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
278 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
279 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
280 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
281 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
282 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
283 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
284 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
285 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
286 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
287 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
288 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
289 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
290 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
291 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
292 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
293 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
294 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
295 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
296 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
297 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
298 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
299 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
300 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
301 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
302 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
303 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
304 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
305 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
306 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
307 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
308 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
309 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
310 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
311 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
312 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
313 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
314 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
315 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
316 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
317 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
318 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
319 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
320 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
321 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
322 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
323 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
324 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
325 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
326 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
327 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
328 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
329 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
330 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
331 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
332 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
333 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
334 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
335 }
336
337 bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]`
338
339 // Finds opening brackets and their closing counterparts (including pod and heredoc)
340 // and modifies state groups and position accordingly
341 findBrackets := func(tokenClass RakuToken) MutatorFunc {
342 return func(state *LexerState) error {
343 var openingChars []rune
344 var adverbs []rune
345 switch tokenClass {
346 case rakuPod:
347 openingChars = []rune(strings.Join(state.Groups[1:5], ``))
348 default:
349 adverbs = []rune(state.NamedGroups[`adverbs`])
350 openingChars = []rune(state.NamedGroups[`opening_delimiters`])
351 }
352
353 openingChar := openingChars[0]
354
355 nChars := len(openingChars)
356
357 var closingChar rune
358 var closingCharExists bool
359 var closingChars []rune
360
361 switch tokenClass {
362 case rakuPod:
363 closingCharExists = true
364 default:
365 closingChar, closingCharExists = brackets[openingChar]
366 }
367
368 switch tokenClass {
369 case rakuPodFormatter:
370 formatter := StringOther
371
372 switch state.NamedGroups[`keyword`] {
373 case "B":
374 formatter = GenericStrong
375 case "I":
376 formatter = GenericEmph
377 case "U":
378 formatter = GenericUnderline
379 }
380
381 formatterRule := ruleReplacingConfig{
382 pattern: `.+?`,
383 tokenType: formatter,
384 mutator: nil,
385 stateName: `pod-formatter`,
386 rulePosition: bottomRule,
387 }
388
389 err := replaceRule(formatterRule)(state)
390 if err != nil {
391 panic(err)
392 }
393
394 err = replaceRule(ruleReplacingConfig{
395 delimiter: []rune{closingChar},
396 tokenType: Punctuation,
397 stateName: `pod-formatter`,
398 pushState: true,
399 numberOfDelimiterChars: nChars,
400 appendMutator: popRule(formatterRule),
401 })(state)
402 if err != nil {
403 panic(err)
404 }
405
406 return nil
407 case rakuMatchRegex:
408 var delimiter []rune
409 if closingCharExists {
410 delimiter = []rune{closingChar}
411 } else {
412 delimiter = openingChars
413 }
414
415 err := replaceRule(ruleReplacingConfig{
416 delimiter: delimiter,
417 tokenType: Punctuation,
418 stateName: `regex`,
419 popState: true,
420 pushState: true,
421 })(state)
422 if err != nil {
423 panic(err)
424 }
425
426 return nil
427 case rakuSubstitutionRegex:
428 delimiter := regexp2.Escape(string(openingChars))
429
430 err := replaceRule(ruleReplacingConfig{
431 pattern: `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`,
432 tokenType: ByGroups(Punctuation, UsingSelf(`qq`), Punctuation),
433 rulePosition: topRule,
434 stateName: `regex`,
435 popState: true,
436 pushState: true,
437 })(state)
438 if err != nil {
439 panic(err)
440 }
441
442 return nil
443 }
444
445 text := state.Text
446
447 var endPos int
448
449 var nonMirroredOpeningCharPosition int
450
451 if !closingCharExists {
452 // it's not a mirrored character, which means we
453 // just need to look for the next occurrence
454 closingChars = openingChars
455 nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos)
456 endPos = nonMirroredOpeningCharPosition
457 } else {
458 var podRegex *regexp2.Regexp
459 if tokenClass == rakuPod {
460 podRegex = regexp2.MustCompile(
461 state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]),
462 0,
463 )
464 } else {
465 closingChars = []rune(strings.Repeat(string(closingChar), nChars))
466 }
467
468 // we need to look for the corresponding closing character,
469 // keep nesting in mind
470 nestingLevel := 1
471
472 searchPos := state.Pos - nChars
473
474 var nextClosePos int
475
476 for nestingLevel > 0 {
477 if tokenClass == rakuPod {
478 match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars)
479 if err == nil {
480 closingChars = match.Runes()
481 nextClosePos = match.Index
482 } else {
483 nextClosePos = -1
484 }
485 } else {
486 nextClosePos = indexAt(text, closingChars, searchPos+nChars)
487 }
488
489 nextOpenPos := indexAt(text, openingChars, searchPos+nChars)
490
491 switch {
492 case nextClosePos == -1:
493 nextClosePos = len(text)
494 nestingLevel = 0
495 case nextOpenPos != -1 && nextOpenPos < nextClosePos:
496 nestingLevel++
497 nChars = len(openingChars)
498 searchPos = nextOpenPos
499 default: // next_close_pos < next_open_pos
500 nestingLevel--
501 nChars = len(closingChars)
502 searchPos = nextClosePos
503 }
504 }
505
506 endPos = nextClosePos
507 }
508
509 if endPos < 0 {
510 // if we didn't find a closer, just highlight the
511 // rest of the text in this class
512 endPos = len(text)
513 }
514
515 adverbre := regexp.MustCompile(`:to\b|:heredoc\b`)
516 var heredocTerminator []rune
517 var endHeredocPos int
518 if adverbre.MatchString(string(adverbs)) {
519 if endPos != len(text) {
520 heredocTerminator = text[state.Pos:endPos]
521 nChars = len(heredocTerminator)
522 } else {
523 endPos = state.Pos + 1
524 heredocTerminator = []rune{}
525 nChars = 0
526 }
527
528 if nChars > 0 {
529 endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0)
530 if endHeredocPos > -1 {
531 endPos += endHeredocPos
532 } else {
533 endPos = len(text)
534 }
535 }
536 }
537
538 textBetweenBrackets := string(text[state.Pos:endPos])
539 switch tokenClass {
540 case rakuPod, rakuPodDeclaration, rakuNameAttribute:
541 state.NamedGroups[`value`] = textBetweenBrackets
542 state.NamedGroups[`closing_delimiters`] = string(closingChars)
543 case rakuQuote:
544 if len(heredocTerminator) > 0 {
545 // Length of heredoc terminator + closing chars + `;`
546 heredocFristPunctuationLen := nChars + len(openingChars) + 1
547
548 state.NamedGroups[`opening_delimiters`] = string(openingChars) +
549 string(text[state.Pos:state.Pos+heredocFristPunctuationLen])
550
551 state.NamedGroups[`value`] =
552 string(text[state.Pos+heredocFristPunctuationLen : endPos])
553
554 if endHeredocPos > -1 {
555 state.NamedGroups[`closing_delimiters`] = string(heredocTerminator)
556 }
557 } else {
558 state.NamedGroups[`value`] = textBetweenBrackets
559 if nChars > 0 {
560 state.NamedGroups[`closing_delimiters`] = string(closingChars)
561 }
562 }
563 default:
564 state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])}
565 }
566
567 state.Pos = endPos + nChars
568
569 return nil
570 }
571 }
572
573 // Raku rules
574 // Empty capture groups are placeholders and will be replaced by mutators
575 // DO NOT REMOVE THEM!
576 return Rules{
577 "root": {
578 // Placeholder, will be overwritten by mutators, DO NOT REMOVE!
579 {`\A\z`, nil, nil},
580 Include("common"),
581 {`{`, Punctuation, Push(`root`)},
582 {`\(`, Punctuation, Push(`root`)},
583 {`[)}]`, Punctuation, Pop(1)},
584 {`;`, Punctuation, nil},
585 {`\[|\]`, Operator, nil},
586 {`.+?`, Text, nil},
587 },
588 "common": {
589 {`^#![^\n]*$`, CommentHashbang, nil},
590 Include("pod"),
591 // Multi-line, Embedded comment
592 {
593 "#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`,
594 CommentMultiline,
595 findBrackets(rakuMultilineComment),
596 },
597 {`#[^\n]*$`, CommentSingle, nil},
598 // /regex/
599 {
600 `(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`,
601 ByGroups(Punctuation, UsingSelf("regex"), Punctuation),
602 nil,
603 },
604 Include("variable"),
605 // ::?VARIABLE
606 {`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil},
607 // Version
608 {
609 `\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`,
610 ByGroups(Keyword, NumberInteger, NameEntity, Operator),
611 nil,
612 },
613 Include("number"),
614 // Hyperoperator | »*«
615 {`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
616 {`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
617 // Hyperoperator | «*«
618 {`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
619 {`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
620 // Hyperoperator | »*»
621 {`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
622 {`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
623 // <<quoted words>>
624 {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")},
625 // «quoted words»
626 {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")},
627 // [<]
628 {`(?<=\[\\?)<(?=\])`, Operator, nil},
629 // < and > operators | something < onething > something
630 {
631 `(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`,
632 ByGroups(Operator, UsingSelf("root"), Operator),
633 nil,
634 },
635 // <quoted words>
636 {
637 `(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`,
638 ByGroups(Punctuation, String, Punctuation),
639 nil,
640 },
641 {`C?X::['\w:-]+`, NameException, nil},
642 Include("metaoperator"),
643 // Pair | key => value
644 {
645 `(\w[\w'-]*)(\s*)(=>)`,
646 ByGroups(String, Text, Operator),
647 nil,
648 },
649 Include("colon-pair"),
650 // Token
651 {
652 `(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`,
653 NameFunction,
654 Push("token", "name-adverb"),
655 },
656 // Substitution
657 {`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")},
658 {keywordsPattern, Keyword, nil},
659 {builtinTypesPattern, KeywordType, nil},
660 {builtinRoutinesPattern, NameBuiltin, nil},
661 // Class name
662 {
663 `(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern,
664 NameClass,
665 Push("name-adverb"),
666 },
667 // Routine
668 {
669 `(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`,
670 NameFunction,
671 Push("name-adverb"),
672 },
673 // Constant
674 {`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")},
675 // Namespace
676 {`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")},
677 Include("operator"),
678 Include("single-quote"),
679 {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
680 // m,rx regex
681 {`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")},
682 // Quote constructs
683 {
684 `(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`,
685 EmitterFunc(quote),
686 findBrackets(rakuQuote),
687 },
688 // Function
689 {
690 `\b` + namePattern + colonPairLookahead + `\()`,
691 NameFunction,
692 Push("name-adverb"),
693 },
694 // Method
695 {
696 `(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`,
697 NameFunction,
698 Push("name-adverb"),
699 },
700 // Indirect invocant
701 {namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")},
702 {`(?<=\W)(?:∅|i|e|𝑒|tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil},
703 {`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil},
704 {`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil},
705 // Sigilless variable
706 {
707 `(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern,
708 NameVariable,
709 Push("name-adverb"),
710 },
711 {namePattern, Name, Push("name-adverb")},
712 },
713 "rx": {
714 Include("colon-pair-attribute"),
715 {
716 `(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`,
717 ByGroupNames(
718 map[string]Emitter{
719 `opening_delimiters`: Punctuation,
720 `delimiter`: nil,
721 },
722 ),
723 findBrackets(rakuMatchRegex),
724 },
725 },
726 "substitution": {
727 Include("colon-pair-attribute"),
728 // Substitution | s{regex} = value
729 {
730 `(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`,
731 ByGroupNames(map[string]Emitter{
732 `opening_delimiters`: Punctuation,
733 `delimiter`: nil,
734 }),
735 findBrackets(rakuMatchRegex),
736 },
737 // Substitution | s/regex/string/
738 {
739 `(?<opening_delimiters>[^\w:\s])`,
740 Punctuation,
741 findBrackets(rakuSubstitutionRegex),
742 },
743 },
744 "number": {
745 {`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil},
746 {`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil},
747 {`0b[01]+(_[01]+)*`, LiteralNumberBin, nil},
748 {
749 `(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`,
750 LiteralNumberFloat,
751 nil,
752 },
753 {`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil},
754 {`(?<=\d+)i`, NameConstant, nil},
755 {`\d+(_\d+)*`, LiteralNumberInteger, nil},
756 },
757 "name-adverb": {
758 Include("colon-pair-attribute-keyvalue"),
759 Default(Pop(1)),
760 },
761 "colon-pair": {
762 // :key(value)
763 {colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)},
764 // :123abc
765 {
766 `(:)(\d+)(\w[\w'-]*)`,
767 ByGroups(Punctuation, UsingSelf("number"), String),
768 nil,
769 },
770 // :key
771 {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil},
772 {`\s+`, Text, nil},
773 },
774 "colon-pair-attribute": {
775 // :key(value)
776 {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
777 // :123abc
778 {
779 `(:)(\d+)(\w[\w'-]*)`,
780 ByGroups(Punctuation, UsingSelf("number"), NameAttribute),
781 nil,
782 },
783 // :key
784 {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil},
785 {`\s+`, Text, nil},
786 },
787 "colon-pair-attribute-keyvalue": {
788 // :key(value)
789 {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
790 },
791 "escape-qq": {
792 {
793 `(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`,
794 ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation),
795 nil,
796 },
797 },
798 `escape-char`: {
799 {`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil},
800 },
801 `escape-single-quote`: {
802 {`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil},
803 },
804 "escape-c-name": {
805 {
806 `(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`,
807 ByGroups(StringEscape, Punctuation, String, Punctuation),
808 nil,
809 },
810 },
811 "escape-hexadecimal": {
812 {
813 `(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`,
814 ByGroups(StringEscape, Punctuation, NumberHex, Punctuation),
815 nil,
816 },
817 {`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil},
818 },
819 "regex": {
820 // Placeholder, will be overwritten by mutators, DO NOT REMOVE!
821 {`\A\z`, nil, nil},
822 Include("regex-escape-class"),
823 Include(`regex-character-escape`),
824 // $(code)
825 {
826 `([$@])((?<!(?<!\\)\\)\()`,
827 ByGroups(Keyword, Punctuation),
828 replaceRule(ruleReplacingConfig{
829 delimiter: []rune(`)`),
830 tokenType: Punctuation,
831 stateName: `root`,
832 pushState: true,
833 }),
834 },
835 // Exclude $/ from variables, because we can't get out of the end of the slash regex: $/;
836 {`\$(?=/)`, NameEntity, nil},
837 // Exclude $ from variables
838 {`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil},
839 Include("variable"),
840 Include("escape-c-name"),
841 Include("escape-hexadecimal"),
842 Include("number"),
843 Include("single-quote"),
844 // :my variable code ...
845 {
846 `(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`,
847 ByGroups(Operator, KeywordDeclaration),
848 replaceRule(ruleReplacingConfig{
849 delimiter: []rune(`;`),
850 tokenType: Punctuation,
851 stateName: `root`,
852 pushState: true,
853 }),
854 },
855 // <{code}>
856 {
857 `(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`,
858 ByGroups(Punctuation, Operator, Punctuation),
859 replaceRule(ruleReplacingConfig{
860 delimiter: []rune(`}>`),
861 tokenType: Punctuation,
862 stateName: `root`,
863 pushState: true,
864 }),
865 },
866 // {code}
867 Include(`closure`),
868 // Properties
869 {`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil},
870 // Operator
871 {`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil},
872 // Anchors
873 {`\^\^|\^|\$\$|\$`, NameEntity, nil},
874 {`\.`, NameEntity, nil},
875 {`#[^\n]*\n`, CommentSingle, nil},
876 // Lookaround
877 {
878 `(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`,
879 ByGroups(Punctuation, Text, Operator, Text, OperatorWord),
880 replaceRule(ruleReplacingConfig{
881 delimiter: []rune(`>`),
882 tokenType: Punctuation,
883 stateName: `regex`,
884 pushState: true,
885 }),
886 },
887 {
888 `(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`,
889 ByGroups(Punctuation, Operator, OperatorWord, Punctuation),
890 nil,
891 },
892 // <$variable>
893 {
894 `(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`,
895 ByGroups(Punctuation, Operator, NameVariable, Punctuation),
896 nil,
897 },
898 // Capture markers
899 {`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil},
900 {
901 `(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`,
902 ByGroups(Punctuation, NameVariable, Operator),
903 Push(`regex-variable`),
904 },
905 {
906 `(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`,
907 ByGroups(Punctuation, Operator, NameFunction),
908 Push(`regex-function`),
909 },
910 {`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")},
911 {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
912 {`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)},
913 {`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")},
914 {`.+?`, StringRegex, nil},
915 },
916 "regex-class-builtin": {
917 {
918 `\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`,
919 NameBuiltin,
920 nil,
921 },
922 },
923 "regex-function": {
924 // <function>
925 {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
926 // <function(parameter)>
927 {
928 `\(`,
929 Punctuation,
930 replaceRule(ruleReplacingConfig{
931 delimiter: []rune(`)>`),
932 tokenType: Punctuation,
933 stateName: `root`,
934 popState: true,
935 pushState: true,
936 }),
937 },
938 // <function value>
939 {
940 `\s+`,
941 StringRegex,
942 replaceRule(ruleReplacingConfig{
943 delimiter: []rune(`>`),
944 tokenType: Punctuation,
945 stateName: `regex`,
946 popState: true,
947 pushState: true,
948 }),
949 },
950 // <function: value>
951 {
952 `:`,
953 Punctuation,
954 replaceRule(ruleReplacingConfig{
955 delimiter: []rune(`>`),
956 tokenType: Punctuation,
957 stateName: `root`,
958 popState: true,
959 pushState: true,
960 }),
961 },
962 },
963 "regex-variable": {
964 Include(`regex-starting-operators`),
965 // <var=function(
966 {
967 `(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`,
968 ByGroups(Operator, NameFunction),
969 Mutators(Pop(1), Push(`regex-function`)),
970 },
971 // <var=function>
972 {`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)},
973 // <var=
974 Default(Pop(1), Push(`regex-property`)),
975 },
976 "regex-property": {
977 {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
978 Include("regex-class-builtin"),
979 Include("variable"),
980 Include(`regex-starting-operators`),
981 Include("colon-pair-attribute"),
982 {`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")},
983 {`\+|\-`, Operator, nil},
984 {`@[\w':-]+`, NameVariable, nil},
985 {`.+?`, StringRegex, nil},
986 },
987 `regex-starting-operators`: {
988 {`(?<=<)[|!?.]+`, Operator, nil},
989 },
990 "regex-escape-class": {
991 {`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil},
992 },
993 `regex-character-escape`: {
994 {`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil},
995 },
996 "regex-character-class": {
997 {`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)},
998 Include("regex-escape-class"),
999 Include("escape-c-name"),
1000 Include("escape-hexadecimal"),
1001 Include(`regex-character-escape`),
1002 Include("number"),
1003 {`\.\.`, Operator, nil},
1004 {`.+?`, StringRegex, nil},
1005 },
1006 "metaoperator": {
1007 // Z[=>]
1008 {
1009 `\b([RZX]+)\b(\[)([^\s\]]+?)(\])`,
1010 ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation),
1011 nil,
1012 },
1013 // Z=>
1014 {`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil},
1015 },
1016 "operator": {
1017 // Word Operator
1018 {wordOperatorsPattern, OperatorWord, nil},
1019 // Operator
1020 {operatorsPattern, Operator, nil},
1021 },
1022 "pod": {
1023 // Single-line pod declaration
1024 {`(#[|=])\s`, Keyword, Push("pod-single")},
1025 // Multi-line pod declaration
1026 {
1027 "(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`,
1028 ByGroupNames(
1029 map[string]Emitter{
1030 `keyword`: Keyword,
1031 `opening_delimiters`: Punctuation,
1032 `delimiter`: nil,
1033 `value`: UsingSelf("pod-declaration"),
1034 `closing_delimiters`: Punctuation,
1035 }),
1036 findBrackets(rakuPodDeclaration),
1037 },
1038 Include("pod-blocks"),
1039 },
1040 "pod-blocks": {
1041 // =begin code
1042 {
1043 `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`,
1044 EmitterFunc(podCode),
1045 nil,
1046 },
1047 // =begin
1048 {
1049 `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`,
1050 ByGroupNames(
1051 map[string]Emitter{
1052 `ws`: Comment,
1053 `keyword`: Keyword,
1054 `ws2`: StringDoc,
1055 `name`: Keyword,
1056 `config`: EmitterFunc(podConfig),
1057 `value`: UsingSelf("pod-begin"),
1058 `closing_delimiters`: Keyword,
1059 }),
1060 findBrackets(rakuPod),
1061 },
1062 // =for ...
1063 {
1064 `(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
1065 ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
1066 Push("pod-paragraph"),
1067 },
1068 // =config
1069 {
1070 `(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
1071 ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
1072 nil,
1073 },
1074 // =alias
1075 {
1076 `(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`,
1077 ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc),
1078 nil,
1079 },
1080 // =encoding
1081 {
1082 `(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`,
1083 ByGroups(Comment, Keyword, StringDoc, Name),
1084 nil,
1085 },
1086 // =para ...
1087 {
1088 `(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`,
1089 ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
1090 Push("pod-paragraph"),
1091 },
1092 // =head1 ...
1093 {
1094 `(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`,
1095 ByGroups(Comment, Keyword, GenericHeading, Keyword),
1096 Push("pod-heading"),
1097 },
1098 // =item ...
1099 {
1100 `(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`,
1101 ByGroups(Comment, Keyword, StringDoc, Keyword),
1102 Push("pod-paragraph"),
1103 },
1104 {
1105 `(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`,
1106 ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
1107 Push("pod-finish"),
1108 },
1109 // ={custom} ...
1110 {
1111 `(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`,
1112 ByGroups(Comment, Name, StringDoc, Keyword),
1113 Push("pod-paragraph"),
1114 },
1115 // = podconfig
1116 {
1117 `(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` +
1118 colonPairClosingBrackets + `) *)*\n)`,
1119 ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)),
1120 nil,
1121 },
1122 },
1123 "pod-begin": {
1124 Include("pod-blocks"),
1125 Include("pre-pod-formatter"),
1126 {`.+?`, StringDoc, nil},
1127 },
1128 "pod-declaration": {
1129 Include("pre-pod-formatter"),
1130 {`.+?`, StringDoc, nil},
1131 },
1132 "pod-paragraph": {
1133 {`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)},
1134 Include("pre-pod-formatter"),
1135 {`.+?`, StringDoc, nil},
1136 },
1137 "pod-single": {
1138 {`\n`, StringDoc, Pop(1)},
1139 Include("pre-pod-formatter"),
1140 {`.+?`, StringDoc, nil},
1141 },
1142 "pod-heading": {
1143 {`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)},
1144 Include("pre-pod-formatter"),
1145 {`.+?`, GenericHeading, nil},
1146 },
1147 "pod-finish": {
1148 {`\z`, nil, Pop(1)},
1149 Include("pre-pod-formatter"),
1150 {`.+?`, StringDoc, nil},
1151 },
1152 "pre-pod-formatter": {
1153 // C<code>, B<bold>, ...
1154 {
1155 `(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`,
1156 ByGroups(Keyword, Punctuation),
1157 findBrackets(rakuPodFormatter),
1158 },
1159 },
1160 "pod-formatter": {
1161 // Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
1162 {`>`, Punctuation, Pop(1)},
1163 Include("pre-pod-formatter"),
1164 // Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
1165 {`.+?`, StringOther, nil},
1166 },
1167 "variable": {
1168 {variablePattern, NameVariable, Push("name-adverb")},
1169 {globalVariablePattern, NameVariableGlobal, Push("name-adverb")},
1170 {`[$@]<[^>]+>`, NameVariable, nil},
1171 {`\$[/!¢]`, NameVariable, nil},
1172 {`[$@%]`, NameVariable, nil},
1173 },
1174 "single-quote": {
1175 {`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")},
1176 },
1177 "single-quote-inner": {
1178 {`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)},
1179 Include("escape-single-quote"),
1180 Include("escape-qq"),
1181 {`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil},
1182 },
1183 "double-quotes": {
1184 {`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)},
1185 Include("qq"),
1186 },
1187 "<<": {
1188 {`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
1189 Include("ww"),
1190 },
1191 "«": {
1192 {`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
1193 Include("ww"),
1194 },
1195 "ww": {
1196 Include("single-quote"),
1197 Include("qq"),
1198 },
1199 "qq": {
1200 Include("qq-variable"),
1201 Include("closure"),
1202 Include(`escape-char`),
1203 Include("escape-hexadecimal"),
1204 Include("escape-c-name"),
1205 Include("escape-qq"),
1206 {`.+?`, StringDouble, nil},
1207 },
1208 "qq-variable": {
1209 {
1210 `(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`,
1211 NameVariable,
1212 Push("qq-variable-extras", "name-adverb"),
1213 },
1214 },
1215 "qq-variable-extras": {
1216 // Method
1217 {
1218 `(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`,
1219 ByGroupNames(map[string]Emitter{
1220 `operator`: Operator,
1221 `method_name`: NameFunction,
1222 }),
1223 Push(`name-adverb`),
1224 },
1225 // Function/Signature
1226 {
1227 `\(`, Punctuation, replaceRule(
1228 ruleReplacingConfig{
1229 delimiter: []rune(`)`),
1230 tokenType: Punctuation,
1231 stateName: `root`,
1232 pushState: true,
1233 }),
1234 },
1235 Default(Pop(1)),
1236 },
1237 "Q": {
1238 Include("escape-qq"),
1239 {`.+?`, String, nil},
1240 },
1241 "Q-closure": {
1242 Include("escape-qq"),
1243 Include("closure"),
1244 {`.+?`, String, nil},
1245 },
1246 "Q-variable": {
1247 Include("escape-qq"),
1248 Include("qq-variable"),
1249 {`.+?`, String, nil},
1250 },
1251 "closure": {
1252 {`(?<!(?<!\\)\\){`, Punctuation, replaceRule(
1253 ruleReplacingConfig{
1254 delimiter: []rune(`}`),
1255 tokenType: Punctuation,
1256 stateName: `root`,
1257 pushState: true,
1258 }),
1259 },
1260 },
1261 "token": {
1262 // Token signature
1263 {`\(`, Punctuation, replaceRule(
1264 ruleReplacingConfig{
1265 delimiter: []rune(`)`),
1266 tokenType: Punctuation,
1267 stateName: `root`,
1268 pushState: true,
1269 }),
1270 },
1271 {`{`, Punctuation, replaceRule(
1272 ruleReplacingConfig{
1273 delimiter: []rune(`}`),
1274 tokenType: Punctuation,
1275 stateName: `regex`,
1276 popState: true,
1277 pushState: true,
1278 }),
1279 },
1280 {`\s*`, Text, nil},
1281 Default(Pop(1)),
1282 },
1283 }
1284}
1285
1286// Joins keys of rune map
1287func joinRuneMap(m map[rune]rune) string {
1288 runes := make([]rune, 0, len(m))
1289 for k := range m {
1290 runes = append(runes, k)
1291 }
1292
1293 return string(runes)
1294}
1295
1296// Finds the index of substring in the string starting at position n
1297func indexAt(str []rune, substr []rune, pos int) int {
1298 strFromPos := str[pos:]
1299 text := string(strFromPos)
1300
1301 idx := strings.Index(text, string(substr))
1302 if idx > -1 {
1303 idx = utf8.RuneCountInString(text[:idx])
1304
1305 // Search again if the substr is escaped with backslash
1306 if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') ||
1307 (idx == 1 && strFromPos[idx-1] == '\\') {
1308 idx = indexAt(str[pos:], substr, idx+1)
1309
1310 idx = utf8.RuneCountInString(text[:idx])
1311
1312 if idx < 0 {
1313 return idx
1314 }
1315 }
1316 idx += pos
1317 }
1318
1319 return idx
1320}
1321
1322type rulePosition int
1323
1324const (
1325 topRule rulePosition = 0 - iota
1326 bottomRule
1327)
1328
1329type ruleMakingConfig struct {
1330 delimiter []rune
1331 pattern string
1332 tokenType Emitter
1333 mutator Mutator
1334 numberOfDelimiterChars int
1335}
1336
1337type ruleReplacingConfig struct {
1338 delimiter []rune
1339 pattern string
1340 tokenType Emitter
1341 numberOfDelimiterChars int
1342 mutator Mutator
1343 appendMutator Mutator
1344 rulePosition rulePosition
1345 stateName string
1346 pop bool
1347 popState bool
1348 pushState bool
1349}
1350
1351// Pops rule from state-stack and replaces the rule with the previous rule
1352func popRule(rule ruleReplacingConfig) MutatorFunc {
1353 return func(state *LexerState) error {
1354 stackName := genStackName(rule.stateName, rule.rulePosition)
1355
1356 stack, ok := state.Get(stackName).([]ruleReplacingConfig)
1357
1358 if ok && len(stack) > 0 {
1359 // Pop from stack
1360 stack = stack[:len(stack)-1]
1361 lastRule := stack[len(stack)-1]
1362 lastRule.pushState = false
1363 lastRule.popState = false
1364 lastRule.pop = true
1365 state.Set(stackName, stack)
1366
1367 // Call replaceRule to use the last rule
1368 err := replaceRule(lastRule)(state)
1369 if err != nil {
1370 panic(err)
1371 }
1372 }
1373
1374 return nil
1375 }
1376}
1377
1378// Replaces a state's rule based on the rule config and position
1379func replaceRule(rule ruleReplacingConfig) MutatorFunc {
1380 return func(state *LexerState) error {
1381 stateName := rule.stateName
1382 stackName := genStackName(rule.stateName, rule.rulePosition)
1383
1384 stack, ok := state.Get(stackName).([]ruleReplacingConfig)
1385 if !ok {
1386 stack = []ruleReplacingConfig{}
1387 }
1388
1389 // If state-stack is empty fill it with the placeholder rule
1390 if len(stack) == 0 {
1391 stack = []ruleReplacingConfig{
1392 {
1393 // Placeholder, will be overwritten by mutators, DO NOT REMOVE!
1394 pattern: `\A\z`,
1395 tokenType: nil,
1396 mutator: nil,
1397 stateName: stateName,
1398 rulePosition: rule.rulePosition,
1399 },
1400 }
1401 state.Set(stackName, stack)
1402 }
1403
1404 var mutator Mutator
1405 mutators := []Mutator{}
1406
1407 switch {
1408 case rule.rulePosition == topRule && rule.mutator == nil:
1409 // Default mutator for top rule
1410 mutators = []Mutator{Pop(1), popRule(rule)}
1411 case rule.rulePosition == topRule && rule.mutator != nil:
1412 // Default mutator for top rule, when rule.mutator is set
1413 mutators = []Mutator{rule.mutator, popRule(rule)}
1414 case rule.mutator != nil:
1415 mutators = []Mutator{rule.mutator}
1416 }
1417
1418 if rule.appendMutator != nil {
1419 mutators = append(mutators, rule.appendMutator)
1420 }
1421
1422 if len(mutators) > 0 {
1423 mutator = Mutators(mutators...)
1424 } else {
1425 mutator = nil
1426 }
1427
1428 ruleConfig := ruleMakingConfig{
1429 pattern: rule.pattern,
1430 delimiter: rule.delimiter,
1431 numberOfDelimiterChars: rule.numberOfDelimiterChars,
1432 tokenType: rule.tokenType,
1433 mutator: mutator,
1434 }
1435
1436 cRule := makeRule(ruleConfig)
1437
1438 switch rule.rulePosition {
1439 case topRule:
1440 state.Rules[stateName][0] = cRule
1441 case bottomRule:
1442 state.Rules[stateName][len(state.Rules[stateName])-1] = cRule
1443 }
1444
1445 // Pop state name from stack if asked. State should be popped first before Pushing
1446 if rule.popState {
1447 err := Pop(1).Mutate(state)
1448 if err != nil {
1449 panic(err)
1450 }
1451 }
1452
1453 // Push state name to stack if asked
1454 if rule.pushState {
1455 err := Push(stateName).Mutate(state)
1456 if err != nil {
1457 panic(err)
1458 }
1459 }
1460
1461 if !rule.pop {
1462 state.Set(stackName, append(stack, rule))
1463 }
1464
1465 return nil
1466 }
1467}
1468
1469// Generates rule replacing stack using state name and rule position
1470func genStackName(stateName string, rulePosition rulePosition) (stackName string) {
1471 switch rulePosition {
1472 case topRule:
1473 stackName = stateName + `-top-stack`
1474 case bottomRule:
1475 stackName = stateName + `-bottom-stack`
1476 }
1477 return
1478}
1479
1480// Makes a compiled rule and returns it
1481func makeRule(config ruleMakingConfig) *CompiledRule {
1482 var rePattern string
1483
1484 if len(config.delimiter) > 0 {
1485 delimiter := string(config.delimiter)
1486
1487 if config.numberOfDelimiterChars > 1 {
1488 delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars)
1489 }
1490
1491 rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter)
1492 } else {
1493 rePattern = config.pattern
1494 }
1495
1496 regex := regexp2.MustCompile(rePattern, regexp2.None)
1497
1498 cRule := &CompiledRule{
1499 Rule: Rule{rePattern, config.tokenType, config.mutator},
1500 Regexp: regex,
1501 }
1502
1503 return cRule
1504}
1505
1506// Emitter for colon pairs, changes token state based on key and brackets
1507func colonPair(tokenClass TokenType) Emitter {
1508 return EmitterFunc(func(groups []string, state *LexerState) Iterator {
1509 iterators := []Iterator{}
1510 tokens := []Token{
1511 {Punctuation, state.NamedGroups[`colon`]},
1512 {Punctuation, state.NamedGroups[`opening_delimiters`]},
1513 {Punctuation, state.NamedGroups[`closing_delimiters`]},
1514 }
1515
1516 // Append colon
1517 iterators = append(iterators, Literator(tokens[0]))
1518
1519 if tokenClass == NameAttribute {
1520 iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]}))
1521 } else {
1522 var keyTokenState string
1523 keyre := regexp.MustCompile(`^\d+$`)
1524 if keyre.MatchString(state.NamedGroups[`key`]) {
1525 keyTokenState = "common"
1526 } else {
1527 keyTokenState = "Q"
1528 }
1529
1530 // Use token state to Tokenise key
1531 if keyTokenState != "" {
1532 iterator, err := state.Lexer.Tokenise(
1533 &TokeniseOptions{
1534 State: keyTokenState,
1535 Nested: true,
1536 }, state.NamedGroups[`key`])
1537
1538 if err != nil {
1539 panic(err)
1540 } else {
1541 // Append key
1542 iterators = append(iterators, iterator)
1543 }
1544 }
1545 }
1546
1547 // Append punctuation
1548 iterators = append(iterators, Literator(tokens[1]))
1549
1550 var valueTokenState string
1551
1552 switch state.NamedGroups[`opening_delimiters`] {
1553 case "(", "{", "[":
1554 valueTokenState = "root"
1555 case "<<", "«":
1556 valueTokenState = "ww"
1557 case "<":
1558 valueTokenState = "Q"
1559 }
1560
1561 // Use token state to Tokenise value
1562 if valueTokenState != "" {
1563 iterator, err := state.Lexer.Tokenise(
1564 &TokeniseOptions{
1565 State: valueTokenState,
1566 Nested: true,
1567 }, state.NamedGroups[`value`])
1568
1569 if err != nil {
1570 panic(err)
1571 } else {
1572 // Append value
1573 iterators = append(iterators, iterator)
1574 }
1575 }
1576 // Append last punctuation
1577 iterators = append(iterators, Literator(tokens[2]))
1578
1579 return Concaterator(iterators...)
1580 })
1581}
1582
1583// Emitter for quoting constructs, changes token state based on quote name and adverbs
1584func quote(groups []string, state *LexerState) Iterator {
1585 keyword := state.NamedGroups[`keyword`]
1586 adverbsStr := state.NamedGroups[`adverbs`]
1587 iterators := []Iterator{}
1588 tokens := []Token{
1589 {Keyword, keyword},
1590 {StringAffix, adverbsStr},
1591 {Text, state.NamedGroups[`ws`]},
1592 {Punctuation, state.NamedGroups[`opening_delimiters`]},
1593 {Punctuation, state.NamedGroups[`closing_delimiters`]},
1594 }
1595
1596 // Append all tokens before dealing with the main string
1597 iterators = append(iterators, Literator(tokens[:4]...))
1598
1599 var tokenStates []string
1600
1601 // Set tokenStates based on adverbs
1602 adverbs := strings.Split(adverbsStr, ":")
1603 for _, adverb := range adverbs {
1604 switch adverb {
1605 case "c", "closure":
1606 tokenStates = append(tokenStates, "Q-closure")
1607 case "qq":
1608 tokenStates = append(tokenStates, "qq")
1609 case "ww":
1610 tokenStates = append(tokenStates, "ww")
1611 case "s", "scalar", "a", "array", "h", "hash", "f", "function":
1612 tokenStates = append(tokenStates, "Q-variable")
1613 }
1614 }
1615
1616 var tokenState string
1617
1618 switch {
1619 case keyword == "qq" || slices.Contains(tokenStates, "qq"):
1620 tokenState = "qq"
1621 case adverbsStr == "ww" || slices.Contains(tokenStates, "ww"):
1622 tokenState = "ww"
1623 case slices.Contains(tokenStates, "Q-closure") && slices.Contains(tokenStates, "Q-variable"):
1624 tokenState = "qq"
1625 case slices.Contains(tokenStates, "Q-closure"):
1626 tokenState = "Q-closure"
1627 case slices.Contains(tokenStates, "Q-variable"):
1628 tokenState = "Q-variable"
1629 default:
1630 tokenState = "Q"
1631 }
1632
1633 iterator, err := state.Lexer.Tokenise(
1634 &TokeniseOptions{
1635 State: tokenState,
1636 Nested: true,
1637 }, state.NamedGroups[`value`])
1638
1639 if err != nil {
1640 panic(err)
1641 } else {
1642 iterators = append(iterators, iterator)
1643 }
1644
1645 // Append the last punctuation
1646 iterators = append(iterators, Literator(tokens[4]))
1647
1648 return Concaterator(iterators...)
1649}
1650
1651// Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
1652func podConfig(groups []string, state *LexerState) Iterator {
1653 // Tokenise pod config
1654 iterator, err := state.Lexer.Tokenise(
1655 &TokeniseOptions{
1656 State: "colon-pair-attribute",
1657 Nested: true,
1658 }, groups[0])
1659
1660 if err != nil {
1661 panic(err)
1662 } else {
1663 return iterator
1664 }
1665}
1666
1667// Emitter for pod code, tokenises the code based on the lang specified
1668func podCode(groups []string, state *LexerState) Iterator {
1669 iterators := []Iterator{}
1670 tokens := []Token{
1671 {Comment, state.NamedGroups[`ws`]},
1672 {Keyword, state.NamedGroups[`keyword`]},
1673 {Keyword, state.NamedGroups[`ws2`]},
1674 {Keyword, state.NamedGroups[`name`]},
1675 {StringDoc, state.NamedGroups[`value`]},
1676 {Comment, state.NamedGroups[`ws3`]},
1677 {Keyword, state.NamedGroups[`end_keyword`]},
1678 {Keyword, state.NamedGroups[`ws4`]},
1679 {Keyword, state.NamedGroups[`name`]},
1680 }
1681
1682 // Append all tokens before dealing with the pod config
1683 iterators = append(iterators, Literator(tokens[:4]...))
1684
1685 // Tokenise pod config
1686 iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state))
1687
1688 langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`])
1689 var lang string
1690 if len(langMatch) > 1 {
1691 lang = langMatch[1]
1692 }
1693
1694 // Tokenise code based on lang property
1695 sublexer := Get(lang)
1696 if sublexer != nil {
1697 iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`])
1698
1699 if err != nil {
1700 panic(err)
1701 } else {
1702 iterators = append(iterators, iterator)
1703 }
1704 } else {
1705 iterators = append(iterators, Literator(tokens[4]))
1706 }
1707
1708 // Append the rest of the tokens
1709 iterators = append(iterators, Literator(tokens[5:]...))
1710
1711 return Concaterator(iterators...)
1712}