1package lexers
   2
   3import (
   4	"regexp"
   5	"slices"
   6	"strings"
   7	"unicode/utf8"
   8
   9	"github.com/dlclark/regexp2"
  10
  11	. "github.com/alecthomas/chroma/v2" // nolint
  12)
  13
  14// Raku lexer.
  15var Raku Lexer = Register(MustNewLexer(
  16	&Config{
  17		Name:    "Raku",
  18		Aliases: []string{"perl6", "pl6", "raku"},
  19		Filenames: []string{
  20			"*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm",
  21			"*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc",
  22		},
  23		MimeTypes: []string{
  24			"text/x-perl6", "application/x-perl6",
  25			"text/x-raku", "application/x-raku",
  26		},
  27		DotAll: true,
  28	},
  29	rakuRules,
  30))
  31
  32func rakuRules() Rules {
  33	type RakuToken int
  34
  35	const (
  36		rakuQuote RakuToken = iota
  37		rakuNameAttribute
  38		rakuPod
  39		rakuPodFormatter
  40		rakuPodDeclaration
  41		rakuMultilineComment
  42		rakuMatchRegex
  43		rakuSubstitutionRegex
  44	)
  45
  46	const (
  47		colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)`
  48		colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})`
  49		colonPairPattern         = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)`
  50		colonPairLookahead       = `(?=(:['\w-]+` +
  51			colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?`
  52		namePattern           = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+`
  53		variablePattern       = `[$@%&]+[.^:?=!~]?` + namePattern
  54		globalVariablePattern = `[$@%&]+\*` + namePattern
  55	)
  56
  57	keywords := []string{
  58		`BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`,
  59		`KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`,
  60		`class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`,
  61		`grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`,
  62		`module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`,
  63		`where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`,
  64		`submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`,
  65		`use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`,
  66		`symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`,
  67		`pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`,
  68		`dynamic-scope`, `built`, `temp`,
  69	}
  70
  71	keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
  72
  73	wordOperators := []string{
  74		`X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
  75		`gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`,
  76		`but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`,
  77		`TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`,
  78		`(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
  79	}
  80
  81	wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
  82
  83	operators := []string{
  84		`++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
  85		`+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`,
  86		`<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`,
  87		`::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`,
  88		`??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`,
  89		`,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`,
  90		`⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`,
  91		`⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`,
  92	}
  93
  94	operatorsPattern := Words(``, ``, operators...)
  95
  96	builtinTypes := []string{
  97		`False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
  98		`atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`,
  99		`Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`,
 100		`CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`,
 101		`CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`,
 102		`CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`,
 103		`CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`,
 104		`CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`,
 105		`DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`,
 106		`Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`,
 107		`Encoding::GlobalLexerRegistry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`,
 108		`Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`,
 109		`IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`,
 110		`IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`,
 111		`IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`,
 112		`IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`,
 113		`IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`,
 114		`Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`,
 115		`Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`,
 116		`Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`,
 117		`Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`,
 118		`Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`,
 119		`Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`,
 120		`Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`,
 121		`Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`,
 122		`Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`,
 123		`MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`,
 124		`Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`,
 125		`Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`,
 126		`Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`,
 127		`Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`,
 128		`PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`,
 129		`Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`,
 130		`Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`,
 131		`Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`,
 132		`Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`,
 133		`Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`,
 134		`Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`,
 135		`uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`,
 136		`WhateverCode`, `WrapHandle`, `NativeCall`,
 137		// Pragmas
 138		`precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`,
 139		`MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`,
 140		`strict`, `trace`, `variables`,
 141	}
 142
 143	builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
 144
 145	builtinRoutines := []string{
 146		`ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
 147		`acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`,
 148		`add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`,
 149		`add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`,
 150		`all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`,
 151		`antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`,
 152		`archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`,
 153		`ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`,
 154		`atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`,
 155		`atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`,
 156		`await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`,
 157		`basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`,
 158		`bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`,
 159		`bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`,
 160		`callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`,
 161		`candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`,
 162		`cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`,
 163		`cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`,
 164		`child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`,
 165		`classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`,
 166		`codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`,
 167		`command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`,
 168		`compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`,
 169		`configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`,
 170		`content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`,
 171		`count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`,
 172		`curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`,
 173		`day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`,
 174		`default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`,
 175		`DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`,
 176		`diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`,
 177		`DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`,
 178		`eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`,
 179		`endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`,
 180		`eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`,
 181		`excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`,
 182		`expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`,
 183		`FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`,
 184		`find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`,
 185		`flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`,
 186		`free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`,
 187		`full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`,
 188		`gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`,
 189		`has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`,
 190		`hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`,
 191		`indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`,
 192		`install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`,
 193		`invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`,
 194		`is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`,
 195		`is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`,
 196		`is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`,
 197		`kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`,
 198		`lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`,
 199		`List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`,
 200		`loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`,
 201		`map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`,
 202		`methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`,
 203		`MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`,
 204		`mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`,
 205		`nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`,
 206		`new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`,
 207		`nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`,
 208		`nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`,
 209		`Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`,
 210		`ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`,
 211		`ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`,
 212		`package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`,
 213		`parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`,
 214		`parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`,
 215		`permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`,
 216		`polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`,
 217		`precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`,
 218		`primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`,
 219		`private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`,
 220		`protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`,
 221		`push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`,
 222		`quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`,
 223		`read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`,
 224		`read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`,
 225		`read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`,
 226		`reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`,
 227		`rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`,
 228		`replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`,
 229		`result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`,
 230		`rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`,
 231		`rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`,
 232		`samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`,
 233		`sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`,
 234		`set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`,
 235		`set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`,
 236		`set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`,
 237		`setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`,
 238		`short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`,
 239		`signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`,
 240		`skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`,
 241		`Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`,
 242		`socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`,
 243		`splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`,
 244		`started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`,
 245		`store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`,
 246		`subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`,
 247		`subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`,
 248		`take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`,
 249		`term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`,
 250		`tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`,
 251		`trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`,
 252		`trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`,
 253		`typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`,
 254		`uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`,
 255		`unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`,
 256		`USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`,
 257		`verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`,
 258		`watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`,
 259		`what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`,
 260		`with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`,
 261		`write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`,
 262		`write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`,
 263		`write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`,
 264		`yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
 265	}
 266
 267	builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
 268
 269	// A map of opening and closing brackets
 270	brackets := map[rune]rune{
 271		'\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
 272		'\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
 273		'\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
 274		'\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
 275		'\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
 276		'\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
 277		'\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
 278		'\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
 279		'\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
 280		'\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
 281		'\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
 282		'\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
 283		'\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
 284		'\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
 285		'\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
 286		'\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
 287		'\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
 288		'\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
 289		'\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
 290		'\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
 291		'\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
 292		'\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
 293		'\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
 294		'\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
 295		'\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
 296		'\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
 297		'\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
 298		'\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
 299		'\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
 300		'\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
 301		'\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
 302		'\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
 303		'\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
 304		'\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
 305		'\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
 306		'\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
 307		'\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
 308		'\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
 309		'\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
 310		'\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
 311		'\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
 312		'\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
 313		'\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
 314		'\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
 315		'\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
 316		'\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
 317		'\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
 318		'\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
 319		'\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
 320		'\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
 321		'\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
 322		'\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
 323		'\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
 324		'\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
 325		'\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
 326		'\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
 327		'\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
 328		'\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
 329		'\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
 330		'\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
 331		'\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
 332		'\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
 333		'\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
 334		'\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
 335	}
 336
 337	bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]`
 338
 339	// Finds opening brackets and their closing counterparts (including pod and heredoc)
 340	// and modifies state groups and position accordingly
 341	findBrackets := func(tokenClass RakuToken) MutatorFunc {
 342		return func(state *LexerState) error {
 343			var openingChars []rune
 344			var adverbs []rune
 345			switch tokenClass {
 346			case rakuPod:
 347				openingChars = []rune(strings.Join(state.Groups[1:5], ``))
 348			default:
 349				adverbs = []rune(state.NamedGroups[`adverbs`])
 350				openingChars = []rune(state.NamedGroups[`opening_delimiters`])
 351			}
 352
 353			openingChar := openingChars[0]
 354
 355			nChars := len(openingChars)
 356
 357			var closingChar rune
 358			var closingCharExists bool
 359			var closingChars []rune
 360
 361			switch tokenClass {
 362			case rakuPod:
 363				closingCharExists = true
 364			default:
 365				closingChar, closingCharExists = brackets[openingChar]
 366			}
 367
 368			switch tokenClass {
 369			case rakuPodFormatter:
 370				formatter := StringOther
 371
 372				switch state.NamedGroups[`keyword`] {
 373				case "B":
 374					formatter = GenericStrong
 375				case "I":
 376					formatter = GenericEmph
 377				case "U":
 378					formatter = GenericUnderline
 379				}
 380
 381				formatterRule := ruleReplacingConfig{
 382					pattern:      `.+?`,
 383					tokenType:    formatter,
 384					mutator:      nil,
 385					stateName:    `pod-formatter`,
 386					rulePosition: bottomRule,
 387				}
 388
 389				err := replaceRule(formatterRule)(state)
 390				if err != nil {
 391					panic(err)
 392				}
 393
 394				err = replaceRule(ruleReplacingConfig{
 395					delimiter:              []rune{closingChar},
 396					tokenType:              Punctuation,
 397					stateName:              `pod-formatter`,
 398					pushState:              true,
 399					numberOfDelimiterChars: nChars,
 400					appendMutator:          popRule(formatterRule),
 401				})(state)
 402				if err != nil {
 403					panic(err)
 404				}
 405
 406				return nil
 407			case rakuMatchRegex:
 408				var delimiter []rune
 409				if closingCharExists {
 410					delimiter = []rune{closingChar}
 411				} else {
 412					delimiter = openingChars
 413				}
 414
 415				err := replaceRule(ruleReplacingConfig{
 416					delimiter: delimiter,
 417					tokenType: Punctuation,
 418					stateName: `regex`,
 419					popState:  true,
 420					pushState: true,
 421				})(state)
 422				if err != nil {
 423					panic(err)
 424				}
 425
 426				return nil
 427			case rakuSubstitutionRegex:
 428				delimiter := regexp2.Escape(string(openingChars))
 429
 430				err := replaceRule(ruleReplacingConfig{
 431					pattern:      `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`,
 432					tokenType:    ByGroups(Punctuation, UsingSelf(`qq`), Punctuation),
 433					rulePosition: topRule,
 434					stateName:    `regex`,
 435					popState:     true,
 436					pushState:    true,
 437				})(state)
 438				if err != nil {
 439					panic(err)
 440				}
 441
 442				return nil
 443			}
 444
 445			text := state.Text
 446
 447			var endPos int
 448
 449			var nonMirroredOpeningCharPosition int
 450
 451			if !closingCharExists {
 452				// it's not a mirrored character, which means we
 453				// just need to look for the next occurrence
 454				closingChars = openingChars
 455				nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos)
 456				endPos = nonMirroredOpeningCharPosition
 457			} else {
 458				var podRegex *regexp2.Regexp
 459				if tokenClass == rakuPod {
 460					podRegex = regexp2.MustCompile(
 461						state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]),
 462						0,
 463					)
 464				} else {
 465					closingChars = []rune(strings.Repeat(string(closingChar), nChars))
 466				}
 467
 468				// we need to look for the corresponding closing character,
 469				// keep nesting in mind
 470				nestingLevel := 1
 471
 472				searchPos := state.Pos - nChars
 473
 474				var nextClosePos int
 475
 476				for nestingLevel > 0 {
 477					if tokenClass == rakuPod {
 478						match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars)
 479						if err == nil {
 480							closingChars = match.Runes()
 481							nextClosePos = match.Index
 482						} else {
 483							nextClosePos = -1
 484						}
 485					} else {
 486						nextClosePos = indexAt(text, closingChars, searchPos+nChars)
 487					}
 488
 489					nextOpenPos := indexAt(text, openingChars, searchPos+nChars)
 490
 491					switch {
 492					case nextClosePos == -1:
 493						nextClosePos = len(text)
 494						nestingLevel = 0
 495					case nextOpenPos != -1 && nextOpenPos < nextClosePos:
 496						nestingLevel++
 497						nChars = len(openingChars)
 498						searchPos = nextOpenPos
 499					default: // next_close_pos < next_open_pos
 500						nestingLevel--
 501						nChars = len(closingChars)
 502						searchPos = nextClosePos
 503					}
 504				}
 505
 506				endPos = nextClosePos
 507			}
 508
 509			if endPos < 0 {
 510				// if we didn't find a closer, just highlight the
 511				// rest of the text in this class
 512				endPos = len(text)
 513			}
 514
 515			adverbre := regexp.MustCompile(`:to\b|:heredoc\b`)
 516			var heredocTerminator []rune
 517			var endHeredocPos int
 518			if adverbre.MatchString(string(adverbs)) {
 519				if endPos != len(text) {
 520					heredocTerminator = text[state.Pos:endPos]
 521					nChars = len(heredocTerminator)
 522				} else {
 523					endPos = state.Pos + 1
 524					heredocTerminator = []rune{}
 525					nChars = 0
 526				}
 527
 528				if nChars > 0 {
 529					endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0)
 530					if endHeredocPos > -1 {
 531						endPos += endHeredocPos
 532					} else {
 533						endPos = len(text)
 534					}
 535				}
 536			}
 537
 538			textBetweenBrackets := string(text[state.Pos:endPos])
 539			switch tokenClass {
 540			case rakuPod, rakuPodDeclaration, rakuNameAttribute:
 541				state.NamedGroups[`value`] = textBetweenBrackets
 542				state.NamedGroups[`closing_delimiters`] = string(closingChars)
 543			case rakuQuote:
 544				if len(heredocTerminator) > 0 {
 545					// Length of heredoc terminator + closing chars + `;`
 546					heredocFristPunctuationLen := nChars + len(openingChars) + 1
 547
 548					state.NamedGroups[`opening_delimiters`] = string(openingChars) +
 549						string(text[state.Pos:state.Pos+heredocFristPunctuationLen])
 550
 551					state.NamedGroups[`value`] =
 552						string(text[state.Pos+heredocFristPunctuationLen : endPos])
 553
 554					if endHeredocPos > -1 {
 555						state.NamedGroups[`closing_delimiters`] = string(heredocTerminator)
 556					}
 557				} else {
 558					state.NamedGroups[`value`] = textBetweenBrackets
 559					if nChars > 0 {
 560						state.NamedGroups[`closing_delimiters`] = string(closingChars)
 561					}
 562				}
 563			default:
 564				state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])}
 565			}
 566
 567			state.Pos = endPos + nChars
 568
 569			return nil
 570		}
 571	}
 572
 573	// Raku rules
 574	// Empty capture groups are placeholders and will be replaced by mutators
 575	// DO NOT REMOVE THEM!
 576	return Rules{
 577		"root": {
 578			// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
 579			{`\A\z`, nil, nil},
 580			Include("common"),
 581			{`{`, Punctuation, Push(`root`)},
 582			{`\(`, Punctuation, Push(`root`)},
 583			{`[)}]`, Punctuation, Pop(1)},
 584			{`;`, Punctuation, nil},
 585			{`\[|\]`, Operator, nil},
 586			{`.+?`, Text, nil},
 587		},
 588		"common": {
 589			{`^#![^\n]*$`, CommentHashbang, nil},
 590			Include("pod"),
 591			// Multi-line, Embedded comment
 592			{
 593				"#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`,
 594				CommentMultiline,
 595				findBrackets(rakuMultilineComment),
 596			},
 597			{`#[^\n]*$`, CommentSingle, nil},
 598			// /regex/
 599			{
 600				`(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`,
 601				ByGroups(Punctuation, UsingSelf("regex"), Punctuation),
 602				nil,
 603			},
 604			Include("variable"),
 605			// ::?VARIABLE
 606			{`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil},
 607			// Version
 608			{
 609				`\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`,
 610				ByGroups(Keyword, NumberInteger, NameEntity, Operator),
 611				nil,
 612			},
 613			Include("number"),
 614			// Hyperoperator | »*«
 615			{`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
 616			{`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
 617			// Hyperoperator | «*«
 618			{`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
 619			{`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
 620			// Hyperoperator | »*»
 621			{`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
 622			{`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
 623			// <<quoted words>>
 624			{`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")},
 625			// «quoted words»
 626			{`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")},
 627			// [<]
 628			{`(?<=\[\\?)<(?=\])`, Operator, nil},
 629			// < and > operators | something < onething > something
 630			{
 631				`(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`,
 632				ByGroups(Operator, UsingSelf("root"), Operator),
 633				nil,
 634			},
 635			// <quoted words>
 636			{
 637				`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`,
 638				ByGroups(Punctuation, String, Punctuation),
 639				nil,
 640			},
 641			{`C?X::['\w:-]+`, NameException, nil},
 642			Include("metaoperator"),
 643			// Pair | key => value
 644			{
 645				`(\w[\w'-]*)(\s*)(=>)`,
 646				ByGroups(String, Text, Operator),
 647				nil,
 648			},
 649			Include("colon-pair"),
 650			// Token
 651			{
 652				`(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`,
 653				NameFunction,
 654				Push("token", "name-adverb"),
 655			},
 656			// Substitution
 657			{`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")},
 658			{keywordsPattern, Keyword, nil},
 659			{builtinTypesPattern, KeywordType, nil},
 660			{builtinRoutinesPattern, NameBuiltin, nil},
 661			// Class name
 662			{
 663				`(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern,
 664				NameClass,
 665				Push("name-adverb"),
 666			},
 667			//  Routine
 668			{
 669				`(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`,
 670				NameFunction,
 671				Push("name-adverb"),
 672			},
 673			// Constant
 674			{`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")},
 675			// Namespace
 676			{`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")},
 677			Include("operator"),
 678			Include("single-quote"),
 679			{`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
 680			// m,rx regex
 681			{`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")},
 682			// Quote constructs
 683			{
 684				`(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`,
 685				EmitterFunc(quote),
 686				findBrackets(rakuQuote),
 687			},
 688			// Function
 689			{
 690				`\b` + namePattern + colonPairLookahead + `\()`,
 691				NameFunction,
 692				Push("name-adverb"),
 693			},
 694			// Method
 695			{
 696				`(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`,
 697				NameFunction,
 698				Push("name-adverb"),
 699			},
 700			// Indirect invocant
 701			{namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")},
 702			{`(?<=\W)(?:∅|i|e|𝑒|tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil},
 703			{`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil},
 704			{`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil},
 705			// Sigilless variable
 706			{
 707				`(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern,
 708				NameVariable,
 709				Push("name-adverb"),
 710			},
 711			{namePattern, Name, Push("name-adverb")},
 712		},
 713		"rx": {
 714			Include("colon-pair-attribute"),
 715			{
 716				`(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`,
 717				ByGroupNames(
 718					map[string]Emitter{
 719						`opening_delimiters`: Punctuation,
 720						`delimiter`:          nil,
 721					},
 722				),
 723				findBrackets(rakuMatchRegex),
 724			},
 725		},
 726		"substitution": {
 727			Include("colon-pair-attribute"),
 728			// Substitution | s{regex} = value
 729			{
 730				`(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`,
 731				ByGroupNames(map[string]Emitter{
 732					`opening_delimiters`: Punctuation,
 733					`delimiter`:          nil,
 734				}),
 735				findBrackets(rakuMatchRegex),
 736			},
 737			// Substitution | s/regex/string/
 738			{
 739				`(?<opening_delimiters>[^\w:\s])`,
 740				Punctuation,
 741				findBrackets(rakuSubstitutionRegex),
 742			},
 743		},
 744		"number": {
 745			{`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil},
 746			{`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil},
 747			{`0b[01]+(_[01]+)*`, LiteralNumberBin, nil},
 748			{
 749				`(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`,
 750				LiteralNumberFloat,
 751				nil,
 752			},
 753			{`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil},
 754			{`(?<=\d+)i`, NameConstant, nil},
 755			{`\d+(_\d+)*`, LiteralNumberInteger, nil},
 756		},
 757		"name-adverb": {
 758			Include("colon-pair-attribute-keyvalue"),
 759			Default(Pop(1)),
 760		},
 761		"colon-pair": {
 762			// :key(value)
 763			{colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)},
 764			// :123abc
 765			{
 766				`(:)(\d+)(\w[\w'-]*)`,
 767				ByGroups(Punctuation, UsingSelf("number"), String),
 768				nil,
 769			},
 770			// :key
 771			{`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil},
 772			{`\s+`, Text, nil},
 773		},
 774		"colon-pair-attribute": {
 775			// :key(value)
 776			{colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
 777			// :123abc
 778			{
 779				`(:)(\d+)(\w[\w'-]*)`,
 780				ByGroups(Punctuation, UsingSelf("number"), NameAttribute),
 781				nil,
 782			},
 783			// :key
 784			{`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil},
 785			{`\s+`, Text, nil},
 786		},
 787		"colon-pair-attribute-keyvalue": {
 788			// :key(value)
 789			{colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
 790		},
 791		"escape-qq": {
 792			{
 793				`(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`,
 794				ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation),
 795				nil,
 796			},
 797		},
 798		`escape-char`: {
 799			{`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil},
 800		},
 801		`escape-single-quote`: {
 802			{`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil},
 803		},
 804		"escape-c-name": {
 805			{
 806				`(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`,
 807				ByGroups(StringEscape, Punctuation, String, Punctuation),
 808				nil,
 809			},
 810		},
 811		"escape-hexadecimal": {
 812			{
 813				`(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`,
 814				ByGroups(StringEscape, Punctuation, NumberHex, Punctuation),
 815				nil,
 816			},
 817			{`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil},
 818		},
 819		"regex": {
 820			// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
 821			{`\A\z`, nil, nil},
 822			Include("regex-escape-class"),
 823			Include(`regex-character-escape`),
 824			// $(code)
 825			{
 826				`([$@])((?<!(?<!\\)\\)\()`,
 827				ByGroups(Keyword, Punctuation),
 828				replaceRule(ruleReplacingConfig{
 829					delimiter: []rune(`)`),
 830					tokenType: Punctuation,
 831					stateName: `root`,
 832					pushState: true,
 833				}),
 834			},
 835			// Exclude $/ from variables, because we can't get out of the end of the slash regex: $/;
 836			{`\$(?=/)`, NameEntity, nil},
 837			// Exclude $ from variables
 838			{`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil},
 839			Include("variable"),
 840			Include("escape-c-name"),
 841			Include("escape-hexadecimal"),
 842			Include("number"),
 843			Include("single-quote"),
 844			// :my variable code ...
 845			{
 846				`(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`,
 847				ByGroups(Operator, KeywordDeclaration),
 848				replaceRule(ruleReplacingConfig{
 849					delimiter: []rune(`;`),
 850					tokenType: Punctuation,
 851					stateName: `root`,
 852					pushState: true,
 853				}),
 854			},
 855			// <{code}>
 856			{
 857				`(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`,
 858				ByGroups(Punctuation, Operator, Punctuation),
 859				replaceRule(ruleReplacingConfig{
 860					delimiter: []rune(`}>`),
 861					tokenType: Punctuation,
 862					stateName: `root`,
 863					pushState: true,
 864				}),
 865			},
 866			// {code}
 867			Include(`closure`),
 868			// Properties
 869			{`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil},
 870			// Operator
 871			{`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil},
 872			// Anchors
 873			{`\^\^|\^|\$\$|\$`, NameEntity, nil},
 874			{`\.`, NameEntity, nil},
 875			{`#[^\n]*\n`, CommentSingle, nil},
 876			// Lookaround
 877			{
 878				`(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`,
 879				ByGroups(Punctuation, Text, Operator, Text, OperatorWord),
 880				replaceRule(ruleReplacingConfig{
 881					delimiter: []rune(`>`),
 882					tokenType: Punctuation,
 883					stateName: `regex`,
 884					pushState: true,
 885				}),
 886			},
 887			{
 888				`(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`,
 889				ByGroups(Punctuation, Operator, OperatorWord, Punctuation),
 890				nil,
 891			},
 892			// <$variable>
 893			{
 894				`(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`,
 895				ByGroups(Punctuation, Operator, NameVariable, Punctuation),
 896				nil,
 897			},
 898			// Capture markers
 899			{`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil},
 900			{
 901				`(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`,
 902				ByGroups(Punctuation, NameVariable, Operator),
 903				Push(`regex-variable`),
 904			},
 905			{
 906				`(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`,
 907				ByGroups(Punctuation, Operator, NameFunction),
 908				Push(`regex-function`),
 909			},
 910			{`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")},
 911			{`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
 912			{`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)},
 913			{`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")},
 914			{`.+?`, StringRegex, nil},
 915		},
 916		"regex-class-builtin": {
 917			{
 918				`\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`,
 919				NameBuiltin,
 920				nil,
 921			},
 922		},
 923		"regex-function": {
 924			// <function>
 925			{`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
 926			// <function(parameter)>
 927			{
 928				`\(`,
 929				Punctuation,
 930				replaceRule(ruleReplacingConfig{
 931					delimiter: []rune(`)>`),
 932					tokenType: Punctuation,
 933					stateName: `root`,
 934					popState:  true,
 935					pushState: true,
 936				}),
 937			},
 938			// <function value>
 939			{
 940				`\s+`,
 941				StringRegex,
 942				replaceRule(ruleReplacingConfig{
 943					delimiter: []rune(`>`),
 944					tokenType: Punctuation,
 945					stateName: `regex`,
 946					popState:  true,
 947					pushState: true,
 948				}),
 949			},
 950			// <function: value>
 951			{
 952				`:`,
 953				Punctuation,
 954				replaceRule(ruleReplacingConfig{
 955					delimiter: []rune(`>`),
 956					tokenType: Punctuation,
 957					stateName: `root`,
 958					popState:  true,
 959					pushState: true,
 960				}),
 961			},
 962		},
 963		"regex-variable": {
 964			Include(`regex-starting-operators`),
 965			// <var=function(
 966			{
 967				`(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`,
 968				ByGroups(Operator, NameFunction),
 969				Mutators(Pop(1), Push(`regex-function`)),
 970			},
 971			// <var=function>
 972			{`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)},
 973			// <var=
 974			Default(Pop(1), Push(`regex-property`)),
 975		},
 976		"regex-property": {
 977			{`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
 978			Include("regex-class-builtin"),
 979			Include("variable"),
 980			Include(`regex-starting-operators`),
 981			Include("colon-pair-attribute"),
 982			{`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")},
 983			{`\+|\-`, Operator, nil},
 984			{`@[\w':-]+`, NameVariable, nil},
 985			{`.+?`, StringRegex, nil},
 986		},
 987		`regex-starting-operators`: {
 988			{`(?<=<)[|!?.]+`, Operator, nil},
 989		},
 990		"regex-escape-class": {
 991			{`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil},
 992		},
 993		`regex-character-escape`: {
 994			{`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil},
 995		},
 996		"regex-character-class": {
 997			{`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)},
 998			Include("regex-escape-class"),
 999			Include("escape-c-name"),
1000			Include("escape-hexadecimal"),
1001			Include(`regex-character-escape`),
1002			Include("number"),
1003			{`\.\.`, Operator, nil},
1004			{`.+?`, StringRegex, nil},
1005		},
1006		"metaoperator": {
1007			// Z[=>]
1008			{
1009				`\b([RZX]+)\b(\[)([^\s\]]+?)(\])`,
1010				ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation),
1011				nil,
1012			},
1013			// Z=>
1014			{`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil},
1015		},
1016		"operator": {
1017			// Word Operator
1018			{wordOperatorsPattern, OperatorWord, nil},
1019			// Operator
1020			{operatorsPattern, Operator, nil},
1021		},
1022		"pod": {
1023			// Single-line pod declaration
1024			{`(#[|=])\s`, Keyword, Push("pod-single")},
1025			// Multi-line pod declaration
1026			{
1027				"(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`,
1028				ByGroupNames(
1029					map[string]Emitter{
1030						`keyword`:            Keyword,
1031						`opening_delimiters`: Punctuation,
1032						`delimiter`:          nil,
1033						`value`:              UsingSelf("pod-declaration"),
1034						`closing_delimiters`: Punctuation,
1035					}),
1036				findBrackets(rakuPodDeclaration),
1037			},
1038			Include("pod-blocks"),
1039		},
1040		"pod-blocks": {
1041			// =begin code
1042			{
1043				`(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`,
1044				EmitterFunc(podCode),
1045				nil,
1046			},
1047			// =begin
1048			{
1049				`(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`,
1050				ByGroupNames(
1051					map[string]Emitter{
1052						`ws`:                 Comment,
1053						`keyword`:            Keyword,
1054						`ws2`:                StringDoc,
1055						`name`:               Keyword,
1056						`config`:             EmitterFunc(podConfig),
1057						`value`:              UsingSelf("pod-begin"),
1058						`closing_delimiters`: Keyword,
1059					}),
1060				findBrackets(rakuPod),
1061			},
1062			// =for ...
1063			{
1064				`(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
1065				ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
1066				Push("pod-paragraph"),
1067			},
1068			// =config
1069			{
1070				`(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
1071				ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
1072				nil,
1073			},
1074			// =alias
1075			{
1076				`(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`,
1077				ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc),
1078				nil,
1079			},
1080			// =encoding
1081			{
1082				`(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`,
1083				ByGroups(Comment, Keyword, StringDoc, Name),
1084				nil,
1085			},
1086			// =para ...
1087			{
1088				`(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`,
1089				ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
1090				Push("pod-paragraph"),
1091			},
1092			// =head1 ...
1093			{
1094				`(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`,
1095				ByGroups(Comment, Keyword, GenericHeading, Keyword),
1096				Push("pod-heading"),
1097			},
1098			// =item ...
1099			{
1100				`(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`,
1101				ByGroups(Comment, Keyword, StringDoc, Keyword),
1102				Push("pod-paragraph"),
1103			},
1104			{
1105				`(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`,
1106				ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
1107				Push("pod-finish"),
1108			},
1109			// ={custom} ...
1110			{
1111				`(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`,
1112				ByGroups(Comment, Name, StringDoc, Keyword),
1113				Push("pod-paragraph"),
1114			},
1115			// = podconfig
1116			{
1117				`(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` +
1118					colonPairClosingBrackets + `) *)*\n)`,
1119				ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)),
1120				nil,
1121			},
1122		},
1123		"pod-begin": {
1124			Include("pod-blocks"),
1125			Include("pre-pod-formatter"),
1126			{`.+?`, StringDoc, nil},
1127		},
1128		"pod-declaration": {
1129			Include("pre-pod-formatter"),
1130			{`.+?`, StringDoc, nil},
1131		},
1132		"pod-paragraph": {
1133			{`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)},
1134			Include("pre-pod-formatter"),
1135			{`.+?`, StringDoc, nil},
1136		},
1137		"pod-single": {
1138			{`\n`, StringDoc, Pop(1)},
1139			Include("pre-pod-formatter"),
1140			{`.+?`, StringDoc, nil},
1141		},
1142		"pod-heading": {
1143			{`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)},
1144			Include("pre-pod-formatter"),
1145			{`.+?`, GenericHeading, nil},
1146		},
1147		"pod-finish": {
1148			{`\z`, nil, Pop(1)},
1149			Include("pre-pod-formatter"),
1150			{`.+?`, StringDoc, nil},
1151		},
1152		"pre-pod-formatter": {
1153			// C<code>, B<bold>, ...
1154			{
1155				`(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`,
1156				ByGroups(Keyword, Punctuation),
1157				findBrackets(rakuPodFormatter),
1158			},
1159		},
1160		"pod-formatter": {
1161			// Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
1162			{`>`, Punctuation, Pop(1)},
1163			Include("pre-pod-formatter"),
1164			// Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
1165			{`.+?`, StringOther, nil},
1166		},
1167		"variable": {
1168			{variablePattern, NameVariable, Push("name-adverb")},
1169			{globalVariablePattern, NameVariableGlobal, Push("name-adverb")},
1170			{`[$@]<[^>]+>`, NameVariable, nil},
1171			{`\$[/!¢]`, NameVariable, nil},
1172			{`[$@%]`, NameVariable, nil},
1173		},
1174		"single-quote": {
1175			{`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")},
1176		},
1177		"single-quote-inner": {
1178			{`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)},
1179			Include("escape-single-quote"),
1180			Include("escape-qq"),
1181			{`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil},
1182		},
1183		"double-quotes": {
1184			{`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)},
1185			Include("qq"),
1186		},
1187		"<<": {
1188			{`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
1189			Include("ww"),
1190		},
1191		"«": {
1192			{`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
1193			Include("ww"),
1194		},
1195		"ww": {
1196			Include("single-quote"),
1197			Include("qq"),
1198		},
1199		"qq": {
1200			Include("qq-variable"),
1201			Include("closure"),
1202			Include(`escape-char`),
1203			Include("escape-hexadecimal"),
1204			Include("escape-c-name"),
1205			Include("escape-qq"),
1206			{`.+?`, StringDouble, nil},
1207		},
1208		"qq-variable": {
1209			{
1210				`(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`,
1211				NameVariable,
1212				Push("qq-variable-extras", "name-adverb"),
1213			},
1214		},
1215		"qq-variable-extras": {
1216			// Method
1217			{
1218				`(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`,
1219				ByGroupNames(map[string]Emitter{
1220					`operator`:    Operator,
1221					`method_name`: NameFunction,
1222				}),
1223				Push(`name-adverb`),
1224			},
1225			// Function/Signature
1226			{
1227				`\(`, Punctuation, replaceRule(
1228					ruleReplacingConfig{
1229						delimiter: []rune(`)`),
1230						tokenType: Punctuation,
1231						stateName: `root`,
1232						pushState: true,
1233					}),
1234			},
1235			Default(Pop(1)),
1236		},
1237		"Q": {
1238			Include("escape-qq"),
1239			{`.+?`, String, nil},
1240		},
1241		"Q-closure": {
1242			Include("escape-qq"),
1243			Include("closure"),
1244			{`.+?`, String, nil},
1245		},
1246		"Q-variable": {
1247			Include("escape-qq"),
1248			Include("qq-variable"),
1249			{`.+?`, String, nil},
1250		},
1251		"closure": {
1252			{`(?<!(?<!\\)\\){`, Punctuation, replaceRule(
1253				ruleReplacingConfig{
1254					delimiter: []rune(`}`),
1255					tokenType: Punctuation,
1256					stateName: `root`,
1257					pushState: true,
1258				}),
1259			},
1260		},
1261		"token": {
1262			// Token signature
1263			{`\(`, Punctuation, replaceRule(
1264				ruleReplacingConfig{
1265					delimiter: []rune(`)`),
1266					tokenType: Punctuation,
1267					stateName: `root`,
1268					pushState: true,
1269				}),
1270			},
1271			{`{`, Punctuation, replaceRule(
1272				ruleReplacingConfig{
1273					delimiter: []rune(`}`),
1274					tokenType: Punctuation,
1275					stateName: `regex`,
1276					popState:  true,
1277					pushState: true,
1278				}),
1279			},
1280			{`\s*`, Text, nil},
1281			Default(Pop(1)),
1282		},
1283	}
1284}
1285
1286// Joins keys of rune map
1287func joinRuneMap(m map[rune]rune) string {
1288	runes := make([]rune, 0, len(m))
1289	for k := range m {
1290		runes = append(runes, k)
1291	}
1292
1293	return string(runes)
1294}
1295
1296// Finds the index of substring in the string starting at position n
1297func indexAt(str []rune, substr []rune, pos int) int {
1298	strFromPos := str[pos:]
1299	text := string(strFromPos)
1300
1301	idx := strings.Index(text, string(substr))
1302	if idx > -1 {
1303		idx = utf8.RuneCountInString(text[:idx])
1304
1305		// Search again if the substr is escaped with backslash
1306		if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') ||
1307			(idx == 1 && strFromPos[idx-1] == '\\') {
1308			idx = indexAt(str[pos:], substr, idx+1)
1309
1310			idx = utf8.RuneCountInString(text[:idx])
1311
1312			if idx < 0 {
1313				return idx
1314			}
1315		}
1316		idx += pos
1317	}
1318
1319	return idx
1320}
1321
1322type rulePosition int
1323
1324const (
1325	topRule rulePosition = 0 - iota
1326	bottomRule
1327)
1328
1329type ruleMakingConfig struct {
1330	delimiter              []rune
1331	pattern                string
1332	tokenType              Emitter
1333	mutator                Mutator
1334	numberOfDelimiterChars int
1335}
1336
1337type ruleReplacingConfig struct {
1338	delimiter              []rune
1339	pattern                string
1340	tokenType              Emitter
1341	numberOfDelimiterChars int
1342	mutator                Mutator
1343	appendMutator          Mutator
1344	rulePosition           rulePosition
1345	stateName              string
1346	pop                    bool
1347	popState               bool
1348	pushState              bool
1349}
1350
1351// Pops rule from state-stack and replaces the rule with the previous rule
1352func popRule(rule ruleReplacingConfig) MutatorFunc {
1353	return func(state *LexerState) error {
1354		stackName := genStackName(rule.stateName, rule.rulePosition)
1355
1356		stack, ok := state.Get(stackName).([]ruleReplacingConfig)
1357
1358		if ok && len(stack) > 0 {
1359			// Pop from stack
1360			stack = stack[:len(stack)-1]
1361			lastRule := stack[len(stack)-1]
1362			lastRule.pushState = false
1363			lastRule.popState = false
1364			lastRule.pop = true
1365			state.Set(stackName, stack)
1366
1367			// Call replaceRule to use the last rule
1368			err := replaceRule(lastRule)(state)
1369			if err != nil {
1370				panic(err)
1371			}
1372		}
1373
1374		return nil
1375	}
1376}
1377
1378// Replaces a state's rule based on the rule config and position
1379func replaceRule(rule ruleReplacingConfig) MutatorFunc {
1380	return func(state *LexerState) error {
1381		stateName := rule.stateName
1382		stackName := genStackName(rule.stateName, rule.rulePosition)
1383
1384		stack, ok := state.Get(stackName).([]ruleReplacingConfig)
1385		if !ok {
1386			stack = []ruleReplacingConfig{}
1387		}
1388
1389		// If state-stack is empty fill it with the placeholder rule
1390		if len(stack) == 0 {
1391			stack = []ruleReplacingConfig{
1392				{
1393					// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
1394					pattern:      `\A\z`,
1395					tokenType:    nil,
1396					mutator:      nil,
1397					stateName:    stateName,
1398					rulePosition: rule.rulePosition,
1399				},
1400			}
1401			state.Set(stackName, stack)
1402		}
1403
1404		var mutator Mutator
1405		mutators := []Mutator{}
1406
1407		switch {
1408		case rule.rulePosition == topRule && rule.mutator == nil:
1409			// Default mutator for top rule
1410			mutators = []Mutator{Pop(1), popRule(rule)}
1411		case rule.rulePosition == topRule && rule.mutator != nil:
1412			// Default mutator for top rule, when rule.mutator is set
1413			mutators = []Mutator{rule.mutator, popRule(rule)}
1414		case rule.mutator != nil:
1415			mutators = []Mutator{rule.mutator}
1416		}
1417
1418		if rule.appendMutator != nil {
1419			mutators = append(mutators, rule.appendMutator)
1420		}
1421
1422		if len(mutators) > 0 {
1423			mutator = Mutators(mutators...)
1424		} else {
1425			mutator = nil
1426		}
1427
1428		ruleConfig := ruleMakingConfig{
1429			pattern:                rule.pattern,
1430			delimiter:              rule.delimiter,
1431			numberOfDelimiterChars: rule.numberOfDelimiterChars,
1432			tokenType:              rule.tokenType,
1433			mutator:                mutator,
1434		}
1435
1436		cRule := makeRule(ruleConfig)
1437
1438		switch rule.rulePosition {
1439		case topRule:
1440			state.Rules[stateName][0] = cRule
1441		case bottomRule:
1442			state.Rules[stateName][len(state.Rules[stateName])-1] = cRule
1443		}
1444
1445		// Pop state name from stack if asked. State should be popped first before Pushing
1446		if rule.popState {
1447			err := Pop(1).Mutate(state)
1448			if err != nil {
1449				panic(err)
1450			}
1451		}
1452
1453		// Push state name to stack if asked
1454		if rule.pushState {
1455			err := Push(stateName).Mutate(state)
1456			if err != nil {
1457				panic(err)
1458			}
1459		}
1460
1461		if !rule.pop {
1462			state.Set(stackName, append(stack, rule))
1463		}
1464
1465		return nil
1466	}
1467}
1468
1469// Generates rule replacing stack using state name and rule position
1470func genStackName(stateName string, rulePosition rulePosition) (stackName string) {
1471	switch rulePosition {
1472	case topRule:
1473		stackName = stateName + `-top-stack`
1474	case bottomRule:
1475		stackName = stateName + `-bottom-stack`
1476	}
1477	return
1478}
1479
1480// Makes a compiled rule and returns it
1481func makeRule(config ruleMakingConfig) *CompiledRule {
1482	var rePattern string
1483
1484	if len(config.delimiter) > 0 {
1485		delimiter := string(config.delimiter)
1486
1487		if config.numberOfDelimiterChars > 1 {
1488			delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars)
1489		}
1490
1491		rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter)
1492	} else {
1493		rePattern = config.pattern
1494	}
1495
1496	regex := regexp2.MustCompile(rePattern, regexp2.None)
1497
1498	cRule := &CompiledRule{
1499		Rule:   Rule{rePattern, config.tokenType, config.mutator},
1500		Regexp: regex,
1501	}
1502
1503	return cRule
1504}
1505
1506// Emitter for colon pairs, changes token state based on key and brackets
1507func colonPair(tokenClass TokenType) Emitter {
1508	return EmitterFunc(func(groups []string, state *LexerState) Iterator {
1509		iterators := []Iterator{}
1510		tokens := []Token{
1511			{Punctuation, state.NamedGroups[`colon`]},
1512			{Punctuation, state.NamedGroups[`opening_delimiters`]},
1513			{Punctuation, state.NamedGroups[`closing_delimiters`]},
1514		}
1515
1516		// Append colon
1517		iterators = append(iterators, Literator(tokens[0]))
1518
1519		if tokenClass == NameAttribute {
1520			iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]}))
1521		} else {
1522			var keyTokenState string
1523			keyre := regexp.MustCompile(`^\d+$`)
1524			if keyre.MatchString(state.NamedGroups[`key`]) {
1525				keyTokenState = "common"
1526			} else {
1527				keyTokenState = "Q"
1528			}
1529
1530			// Use token state to Tokenise key
1531			if keyTokenState != "" {
1532				iterator, err := state.Lexer.Tokenise(
1533					&TokeniseOptions{
1534						State:  keyTokenState,
1535						Nested: true,
1536					}, state.NamedGroups[`key`])
1537
1538				if err != nil {
1539					panic(err)
1540				} else {
1541					// Append key
1542					iterators = append(iterators, iterator)
1543				}
1544			}
1545		}
1546
1547		// Append punctuation
1548		iterators = append(iterators, Literator(tokens[1]))
1549
1550		var valueTokenState string
1551
1552		switch state.NamedGroups[`opening_delimiters`] {
1553		case "(", "{", "[":
1554			valueTokenState = "root"
1555		case "<<", "«":
1556			valueTokenState = "ww"
1557		case "<":
1558			valueTokenState = "Q"
1559		}
1560
1561		// Use token state to Tokenise value
1562		if valueTokenState != "" {
1563			iterator, err := state.Lexer.Tokenise(
1564				&TokeniseOptions{
1565					State:  valueTokenState,
1566					Nested: true,
1567				}, state.NamedGroups[`value`])
1568
1569			if err != nil {
1570				panic(err)
1571			} else {
1572				// Append value
1573				iterators = append(iterators, iterator)
1574			}
1575		}
1576		// Append last punctuation
1577		iterators = append(iterators, Literator(tokens[2]))
1578
1579		return Concaterator(iterators...)
1580	})
1581}
1582
1583// Emitter for quoting constructs, changes token state based on quote name and adverbs
1584func quote(groups []string, state *LexerState) Iterator {
1585	keyword := state.NamedGroups[`keyword`]
1586	adverbsStr := state.NamedGroups[`adverbs`]
1587	iterators := []Iterator{}
1588	tokens := []Token{
1589		{Keyword, keyword},
1590		{StringAffix, adverbsStr},
1591		{Text, state.NamedGroups[`ws`]},
1592		{Punctuation, state.NamedGroups[`opening_delimiters`]},
1593		{Punctuation, state.NamedGroups[`closing_delimiters`]},
1594	}
1595
1596	// Append all tokens before dealing with the main string
1597	iterators = append(iterators, Literator(tokens[:4]...))
1598
1599	var tokenStates []string
1600
1601	// Set tokenStates based on adverbs
1602	adverbs := strings.Split(adverbsStr, ":")
1603	for _, adverb := range adverbs {
1604		switch adverb {
1605		case "c", "closure":
1606			tokenStates = append(tokenStates, "Q-closure")
1607		case "qq":
1608			tokenStates = append(tokenStates, "qq")
1609		case "ww":
1610			tokenStates = append(tokenStates, "ww")
1611		case "s", "scalar", "a", "array", "h", "hash", "f", "function":
1612			tokenStates = append(tokenStates, "Q-variable")
1613		}
1614	}
1615
1616	var tokenState string
1617
1618	switch {
1619	case keyword == "qq" || slices.Contains(tokenStates, "qq"):
1620		tokenState = "qq"
1621	case adverbsStr == "ww" || slices.Contains(tokenStates, "ww"):
1622		tokenState = "ww"
1623	case slices.Contains(tokenStates, "Q-closure") && slices.Contains(tokenStates, "Q-variable"):
1624		tokenState = "qq"
1625	case slices.Contains(tokenStates, "Q-closure"):
1626		tokenState = "Q-closure"
1627	case slices.Contains(tokenStates, "Q-variable"):
1628		tokenState = "Q-variable"
1629	default:
1630		tokenState = "Q"
1631	}
1632
1633	iterator, err := state.Lexer.Tokenise(
1634		&TokeniseOptions{
1635			State:  tokenState,
1636			Nested: true,
1637		}, state.NamedGroups[`value`])
1638
1639	if err != nil {
1640		panic(err)
1641	} else {
1642		iterators = append(iterators, iterator)
1643	}
1644
1645	// Append the last punctuation
1646	iterators = append(iterators, Literator(tokens[4]))
1647
1648	return Concaterator(iterators...)
1649}
1650
1651// Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
1652func podConfig(groups []string, state *LexerState) Iterator {
1653	// Tokenise pod config
1654	iterator, err := state.Lexer.Tokenise(
1655		&TokeniseOptions{
1656			State:  "colon-pair-attribute",
1657			Nested: true,
1658		}, groups[0])
1659
1660	if err != nil {
1661		panic(err)
1662	} else {
1663		return iterator
1664	}
1665}
1666
1667// Emitter for pod code, tokenises the code based on the lang specified
1668func podCode(groups []string, state *LexerState) Iterator {
1669	iterators := []Iterator{}
1670	tokens := []Token{
1671		{Comment, state.NamedGroups[`ws`]},
1672		{Keyword, state.NamedGroups[`keyword`]},
1673		{Keyword, state.NamedGroups[`ws2`]},
1674		{Keyword, state.NamedGroups[`name`]},
1675		{StringDoc, state.NamedGroups[`value`]},
1676		{Comment, state.NamedGroups[`ws3`]},
1677		{Keyword, state.NamedGroups[`end_keyword`]},
1678		{Keyword, state.NamedGroups[`ws4`]},
1679		{Keyword, state.NamedGroups[`name`]},
1680	}
1681
1682	// Append all tokens before dealing with the pod config
1683	iterators = append(iterators, Literator(tokens[:4]...))
1684
1685	// Tokenise pod config
1686	iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state))
1687
1688	langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`])
1689	var lang string
1690	if len(langMatch) > 1 {
1691		lang = langMatch[1]
1692	}
1693
1694	// Tokenise code based on lang property
1695	sublexer := Get(lang)
1696	if sublexer != nil {
1697		iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`])
1698
1699		if err != nil {
1700			panic(err)
1701		} else {
1702			iterators = append(iterators, iterator)
1703		}
1704	} else {
1705		iterators = append(iterators, Literator(tokens[4]))
1706	}
1707
1708	// Append the rest of the tokens
1709	iterators = append(iterators, Literator(tokens[5:]...))
1710
1711	return Concaterator(iterators...)
1712}