diff options
Diffstat (limited to 'vendor/github.com/alecthomas/chroma/v2/lexers/raku.go')
| -rw-r--r-- | vendor/github.com/alecthomas/chroma/v2/lexers/raku.go | 1721 |
1 files changed, 1721 insertions, 0 deletions
diff --git a/vendor/github.com/alecthomas/chroma/v2/lexers/raku.go b/vendor/github.com/alecthomas/chroma/v2/lexers/raku.go new file mode 100644 index 0000000..04aa206 --- /dev/null +++ b/vendor/github.com/alecthomas/chroma/v2/lexers/raku.go | |||
| @@ -0,0 +1,1721 @@ | |||
| 1 | package lexers | ||
| 2 | |||
| 3 | import ( | ||
| 4 | "regexp" | ||
| 5 | "strings" | ||
| 6 | "unicode/utf8" | ||
| 7 | |||
| 8 | "github.com/dlclark/regexp2" | ||
| 9 | |||
| 10 | . "github.com/alecthomas/chroma/v2" // nolint | ||
| 11 | ) | ||
| 12 | |||
| 13 | // Raku lexer. | ||
| 14 | var Raku Lexer = Register(MustNewLexer( | ||
| 15 | &Config{ | ||
| 16 | Name: "Raku", | ||
| 17 | Aliases: []string{"perl6", "pl6", "raku"}, | ||
| 18 | Filenames: []string{ | ||
| 19 | "*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm", | ||
| 20 | "*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc", | ||
| 21 | }, | ||
| 22 | MimeTypes: []string{ | ||
| 23 | "text/x-perl6", "application/x-perl6", | ||
| 24 | "text/x-raku", "application/x-raku", | ||
| 25 | }, | ||
| 26 | DotAll: true, | ||
| 27 | }, | ||
| 28 | rakuRules, | ||
| 29 | )) | ||
| 30 | |||
| 31 | func rakuRules() Rules { | ||
| 32 | type RakuToken int | ||
| 33 | |||
| 34 | const ( | ||
| 35 | rakuQuote RakuToken = iota | ||
| 36 | rakuNameAttribute | ||
| 37 | rakuPod | ||
| 38 | rakuPodFormatter | ||
| 39 | rakuPodDeclaration | ||
| 40 | rakuMultilineComment | ||
| 41 | rakuMatchRegex | ||
| 42 | rakuSubstitutionRegex | ||
| 43 | ) | ||
| 44 | |||
| 45 | const ( | ||
| 46 | colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)` | ||
| 47 | colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})` | ||
| 48 | colonPairPattern = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)` | ||
| 49 | colonPairLookahead = `(?=(:['\w-]+` + | ||
| 50 | colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?` | ||
| 51 | namePattern = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+` | ||
| 52 | variablePattern = `[$@%&]+[.^:?=!~]?` + namePattern | ||
| 53 | globalVariablePattern = `[$@%&]+\*` + namePattern | ||
| 54 | ) | ||
| 55 | |||
| 56 | keywords := []string{ | ||
| 57 | `BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`, | ||
| 58 | `KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`, | ||
| 59 | `class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`, | ||
| 60 | `grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`, | ||
| 61 | `module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`, | ||
| 62 | `where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`, | ||
| 63 | `submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`, | ||
| 64 | `use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`, | ||
| 65 | `symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`, | ||
| 66 | `pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`, | ||
| 67 | `dynamic-scope`, `built`, `temp`, | ||
| 68 | } | ||
| 69 | |||
| 70 | keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...) | ||
| 71 | |||
| 72 | wordOperators := []string{ | ||
| 73 | `X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`, | ||
| 74 | `gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`, | ||
| 75 | `but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`, | ||
| 76 | `TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`, | ||
| 77 | `(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`, | ||
| 78 | } | ||
| 79 | |||
| 80 | wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...) | ||
| 81 | |||
| 82 | operators := []string{ | ||
| 83 | `++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`, | ||
| 84 | `+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`, | ||
| 85 | `<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`, | ||
| 86 | `::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`, | ||
| 87 | `??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`, | ||
| 88 | `,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`, | ||
| 89 | `⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`, | ||
| 90 | `⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`, | ||
| 91 | } | ||
| 92 | |||
| 93 | operatorsPattern := Words(``, ``, operators...) | ||
| 94 | |||
| 95 | builtinTypes := []string{ | ||
| 96 | `False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`, | ||
| 97 | `atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`, | ||
| 98 | `Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`, | ||
| 99 | `CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`, | ||
| 100 | `CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`, | ||
| 101 | `CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`, | ||
| 102 | `CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`, | ||
| 103 | `CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`, | ||
| 104 | `DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`, | ||
| 105 | `Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`, | ||
| 106 | `Encoding::GlobalLexerRegistry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`, | ||
| 107 | `Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`, | ||
| 108 | `IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`, | ||
| 109 | `IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`, | ||
| 110 | `IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`, | ||
| 111 | `IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`, | ||
| 112 | `IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`, | ||
| 113 | `Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`, | ||
| 114 | `Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`, | ||
| 115 | `Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`, | ||
| 116 | `Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`, | ||
| 117 | `Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`, | ||
| 118 | `Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`, | ||
| 119 | `Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`, | ||
| 120 | `Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`, | ||
| 121 | `Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`, | ||
| 122 | `MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`, | ||
| 123 | `Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`, | ||
| 124 | `Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`, | ||
| 125 | `Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`, | ||
| 126 | `Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`, | ||
| 127 | `PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`, | ||
| 128 | `Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`, | ||
| 129 | `Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`, | ||
| 130 | `Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`, | ||
| 131 | `Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`, | ||
| 132 | `Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`, | ||
| 133 | `Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`, | ||
| 134 | `uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`, | ||
| 135 | `WhateverCode`, `WrapHandle`, `NativeCall`, | ||
| 136 | // Pragmas | ||
| 137 | `precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`, | ||
| 138 | `MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`, | ||
| 139 | `strict`, `trace`, `variables`, | ||
| 140 | } | ||
| 141 | |||
| 142 | builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...) | ||
| 143 | |||
| 144 | builtinRoutines := []string{ | ||
| 145 | `ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`, | ||
| 146 | `acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`, | ||
| 147 | `add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`, | ||
| 148 | `add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`, | ||
| 149 | `all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`, | ||
| 150 | `antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`, | ||
| 151 | `archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`, | ||
| 152 | `ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`, | ||
| 153 | `atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`, | ||
| 154 | `atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`, | ||
| 155 | `await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`, | ||
| 156 | `basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`, | ||
| 157 | `bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`, | ||
| 158 | `bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`, | ||
| 159 | `callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`, | ||
| 160 | `candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`, | ||
| 161 | `cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`, | ||
| 162 | `cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`, | ||
| 163 | `child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`, | ||
| 164 | `classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`, | ||
| 165 | `codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`, | ||
| 166 | `command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`, | ||
| 167 | `compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`, | ||
| 168 | `configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`, | ||
| 169 | `content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`, | ||
| 170 | `count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`, | ||
| 171 | `curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`, | ||
| 172 | `day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`, | ||
| 173 | `default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`, | ||
| 174 | `DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`, | ||
| 175 | `diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`, | ||
| 176 | `DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`, | ||
| 177 | `eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`, | ||
| 178 | `endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`, | ||
| 179 | `eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`, | ||
| 180 | `excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`, | ||
| 181 | `expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`, | ||
| 182 | `FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`, | ||
| 183 | `find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`, | ||
| 184 | `flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`, | ||
| 185 | `free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`, | ||
| 186 | `full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`, | ||
| 187 | `gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`, | ||
| 188 | `has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`, | ||
| 189 | `hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`, | ||
| 190 | `indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`, | ||
| 191 | `install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`, | ||
| 192 | `invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`, | ||
| 193 | `is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`, | ||
| 194 | `is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`, | ||
| 195 | `is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`, | ||
| 196 | `kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`, | ||
| 197 | `lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`, | ||
| 198 | `List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`, | ||
| 199 | `loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`, | ||
| 200 | `map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`, | ||
| 201 | `methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`, | ||
| 202 | `MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`, | ||
| 203 | `mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`, | ||
| 204 | `nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`, | ||
| 205 | `new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`, | ||
| 206 | `nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`, | ||
| 207 | `nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`, | ||
| 208 | `Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`, | ||
| 209 | `ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`, | ||
| 210 | `ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`, | ||
| 211 | `package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`, | ||
| 212 | `parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`, | ||
| 213 | `parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`, | ||
| 214 | `permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`, | ||
| 215 | `polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`, | ||
| 216 | `precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`, | ||
| 217 | `primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`, | ||
| 218 | `private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`, | ||
| 219 | `protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`, | ||
| 220 | `push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`, | ||
| 221 | `quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`, | ||
| 222 | `read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`, | ||
| 223 | `read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`, | ||
| 224 | `read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`, | ||
| 225 | `reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`, | ||
| 226 | `rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`, | ||
| 227 | `replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`, | ||
| 228 | `result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`, | ||
| 229 | `rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`, | ||
| 230 | `rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`, | ||
| 231 | `samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`, | ||
| 232 | `sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`, | ||
| 233 | `set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`, | ||
| 234 | `set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`, | ||
| 235 | `set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`, | ||
| 236 | `setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`, | ||
| 237 | `short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`, | ||
| 238 | `signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`, | ||
| 239 | `skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`, | ||
| 240 | `Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`, | ||
| 241 | `socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`, | ||
| 242 | `splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`, | ||
| 243 | `started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`, | ||
| 244 | `store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`, | ||
| 245 | `subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`, | ||
| 246 | `subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`, | ||
| 247 | `take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`, | ||
| 248 | `term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`, | ||
| 249 | `tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`, | ||
| 250 | `trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`, | ||
| 251 | `trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`, | ||
| 252 | `typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`, | ||
| 253 | `uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`, | ||
| 254 | `unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`, | ||
| 255 | `USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`, | ||
| 256 | `verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`, | ||
| 257 | `watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`, | ||
| 258 | `what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`, | ||
| 259 | `with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`, | ||
| 260 | `write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`, | ||
| 261 | `write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`, | ||
| 262 | `write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`, | ||
| 263 | `yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`, | ||
| 264 | } | ||
| 265 | |||
| 266 | builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...) | ||
| 267 | |||
| 268 | // A map of opening and closing brackets | ||
| 269 | brackets := map[rune]rune{ | ||
| 270 | '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d', | ||
| 271 | '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b', | ||
| 272 | '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019', | ||
| 273 | '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d', | ||
| 274 | '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a', | ||
| 275 | '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e', | ||
| 276 | '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d', | ||
| 277 | '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd', | ||
| 278 | '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265', | ||
| 279 | '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b', | ||
| 280 | '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273', | ||
| 281 | '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279', | ||
| 282 | '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f', | ||
| 283 | '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285', | ||
| 284 | '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b', | ||
| 285 | '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8', | ||
| 286 | '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4', | ||
| 287 | '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1', | ||
| 288 | '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7', | ||
| 289 | '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1', | ||
| 290 | '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db', | ||
| 291 | '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1', | ||
| 292 | '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7', | ||
| 293 | '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed', | ||
| 294 | '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb', | ||
| 295 | '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe', | ||
| 296 | '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a', | ||
| 297 | '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b', | ||
| 298 | '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771', | ||
| 299 | '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4', | ||
| 300 | '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de', | ||
| 301 | '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7', | ||
| 302 | '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984', | ||
| 303 | '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a', | ||
| 304 | '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990', | ||
| 305 | '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996', | ||
| 306 | '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5', | ||
| 307 | '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5', | ||
| 308 | '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9', | ||
| 309 | '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e', | ||
| 310 | '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65', | ||
| 311 | '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80', | ||
| 312 | '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c', | ||
| 313 | '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96', | ||
| 314 | '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c', | ||
| 315 | '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9', | ||
| 316 | '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0', | ||
| 317 | '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe', | ||
| 318 | '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4', | ||
| 319 | '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0', | ||
| 320 | '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6', | ||
| 321 | '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa', | ||
| 322 | '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a', | ||
| 323 | '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21', | ||
| 324 | '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d', | ||
| 325 | '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015', | ||
| 326 | '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b', | ||
| 327 | '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18', | ||
| 328 | '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a', | ||
| 329 | '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40', | ||
| 330 | '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48', | ||
| 331 | '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e', | ||
| 332 | '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d', | ||
| 333 | '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63', | ||
| 334 | } | ||
| 335 | |||
| 336 | bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]` | ||
| 337 | |||
| 338 | // Finds opening brackets and their closing counterparts (including pod and heredoc) | ||
| 339 | // and modifies state groups and position accordingly | ||
| 340 | findBrackets := func(tokenClass RakuToken) MutatorFunc { | ||
| 341 | return func(state *LexerState) error { | ||
| 342 | var openingChars []rune | ||
| 343 | var adverbs []rune | ||
| 344 | switch tokenClass { | ||
| 345 | case rakuPod: | ||
| 346 | openingChars = []rune(strings.Join(state.Groups[1:5], ``)) | ||
| 347 | default: | ||
| 348 | adverbs = []rune(state.NamedGroups[`adverbs`]) | ||
| 349 | openingChars = []rune(state.NamedGroups[`opening_delimiters`]) | ||
| 350 | } | ||
| 351 | |||
| 352 | openingChar := openingChars[0] | ||
| 353 | |||
| 354 | nChars := len(openingChars) | ||
| 355 | |||
| 356 | var closingChar rune | ||
| 357 | var closingCharExists bool | ||
| 358 | var closingChars []rune | ||
| 359 | |||
| 360 | switch tokenClass { | ||
| 361 | case rakuPod: | ||
| 362 | closingCharExists = true | ||
| 363 | default: | ||
| 364 | closingChar, closingCharExists = brackets[openingChar] | ||
| 365 | } | ||
| 366 | |||
| 367 | switch tokenClass { | ||
| 368 | case rakuPodFormatter: | ||
| 369 | formatter := StringOther | ||
| 370 | |||
| 371 | switch state.NamedGroups[`keyword`] { | ||
| 372 | case "B": | ||
| 373 | formatter = GenericStrong | ||
| 374 | case "I": | ||
| 375 | formatter = GenericEmph | ||
| 376 | case "U": | ||
| 377 | formatter = GenericUnderline | ||
| 378 | } | ||
| 379 | |||
| 380 | formatterRule := ruleReplacingConfig{ | ||
| 381 | pattern: `.+?`, | ||
| 382 | tokenType: formatter, | ||
| 383 | mutator: nil, | ||
| 384 | stateName: `pod-formatter`, | ||
| 385 | rulePosition: bottomRule, | ||
| 386 | } | ||
| 387 | |||
| 388 | err := replaceRule(formatterRule)(state) | ||
| 389 | if err != nil { | ||
| 390 | panic(err) | ||
| 391 | } | ||
| 392 | |||
| 393 | err = replaceRule(ruleReplacingConfig{ | ||
| 394 | delimiter: []rune{closingChar}, | ||
| 395 | tokenType: Punctuation, | ||
| 396 | stateName: `pod-formatter`, | ||
| 397 | pushState: true, | ||
| 398 | numberOfDelimiterChars: nChars, | ||
| 399 | appendMutator: popRule(formatterRule), | ||
| 400 | })(state) | ||
| 401 | if err != nil { | ||
| 402 | panic(err) | ||
| 403 | } | ||
| 404 | |||
| 405 | return nil | ||
| 406 | case rakuMatchRegex: | ||
| 407 | var delimiter []rune | ||
| 408 | if closingCharExists { | ||
| 409 | delimiter = []rune{closingChar} | ||
| 410 | } else { | ||
| 411 | delimiter = openingChars | ||
| 412 | } | ||
| 413 | |||
| 414 | err := replaceRule(ruleReplacingConfig{ | ||
| 415 | delimiter: delimiter, | ||
| 416 | tokenType: Punctuation, | ||
| 417 | stateName: `regex`, | ||
| 418 | popState: true, | ||
| 419 | pushState: true, | ||
| 420 | })(state) | ||
| 421 | if err != nil { | ||
| 422 | panic(err) | ||
| 423 | } | ||
| 424 | |||
| 425 | return nil | ||
| 426 | case rakuSubstitutionRegex: | ||
| 427 | delimiter := regexp2.Escape(string(openingChars)) | ||
| 428 | |||
| 429 | err := replaceRule(ruleReplacingConfig{ | ||
| 430 | pattern: `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`, | ||
| 431 | tokenType: ByGroups(Punctuation, UsingSelf(`qq`), Punctuation), | ||
| 432 | rulePosition: topRule, | ||
| 433 | stateName: `regex`, | ||
| 434 | popState: true, | ||
| 435 | pushState: true, | ||
| 436 | })(state) | ||
| 437 | if err != nil { | ||
| 438 | panic(err) | ||
| 439 | } | ||
| 440 | |||
| 441 | return nil | ||
| 442 | } | ||
| 443 | |||
| 444 | text := state.Text | ||
| 445 | |||
| 446 | var endPos int | ||
| 447 | |||
| 448 | var nonMirroredOpeningCharPosition int | ||
| 449 | |||
| 450 | if !closingCharExists { | ||
| 451 | // it's not a mirrored character, which means we | ||
| 452 | // just need to look for the next occurrence | ||
| 453 | closingChars = openingChars | ||
| 454 | nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos) | ||
| 455 | endPos = nonMirroredOpeningCharPosition | ||
| 456 | } else { | ||
| 457 | var podRegex *regexp2.Regexp | ||
| 458 | if tokenClass == rakuPod { | ||
| 459 | podRegex = regexp2.MustCompile( | ||
| 460 | state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]), | ||
| 461 | 0, | ||
| 462 | ) | ||
| 463 | } else { | ||
| 464 | closingChars = []rune(strings.Repeat(string(closingChar), nChars)) | ||
| 465 | } | ||
| 466 | |||
| 467 | // we need to look for the corresponding closing character, | ||
| 468 | // keep nesting in mind | ||
| 469 | nestingLevel := 1 | ||
| 470 | |||
| 471 | searchPos := state.Pos - nChars | ||
| 472 | |||
| 473 | var nextClosePos int | ||
| 474 | |||
| 475 | for nestingLevel > 0 { | ||
| 476 | if tokenClass == rakuPod { | ||
| 477 | match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars) | ||
| 478 | if err == nil { | ||
| 479 | closingChars = match.Runes() | ||
| 480 | nextClosePos = match.Index | ||
| 481 | } else { | ||
| 482 | nextClosePos = -1 | ||
| 483 | } | ||
| 484 | } else { | ||
| 485 | nextClosePos = indexAt(text, closingChars, searchPos+nChars) | ||
| 486 | } | ||
| 487 | |||
| 488 | nextOpenPos := indexAt(text, openingChars, searchPos+nChars) | ||
| 489 | |||
| 490 | switch { | ||
| 491 | case nextClosePos == -1: | ||
| 492 | nextClosePos = len(text) | ||
| 493 | nestingLevel = 0 | ||
| 494 | case nextOpenPos != -1 && nextOpenPos < nextClosePos: | ||
| 495 | nestingLevel++ | ||
| 496 | nChars = len(openingChars) | ||
| 497 | searchPos = nextOpenPos | ||
| 498 | default: // next_close_pos < next_open_pos | ||
| 499 | nestingLevel-- | ||
| 500 | nChars = len(closingChars) | ||
| 501 | searchPos = nextClosePos | ||
| 502 | } | ||
| 503 | } | ||
| 504 | |||
| 505 | endPos = nextClosePos | ||
| 506 | } | ||
| 507 | |||
| 508 | if endPos < 0 { | ||
| 509 | // if we didn't find a closer, just highlight the | ||
| 510 | // rest of the text in this class | ||
| 511 | endPos = len(text) | ||
| 512 | } | ||
| 513 | |||
| 514 | adverbre := regexp.MustCompile(`:to\b|:heredoc\b`) | ||
| 515 | var heredocTerminator []rune | ||
| 516 | var endHeredocPos int | ||
| 517 | if adverbre.MatchString(string(adverbs)) { | ||
| 518 | if endPos != len(text) { | ||
| 519 | heredocTerminator = text[state.Pos:endPos] | ||
| 520 | nChars = len(heredocTerminator) | ||
| 521 | } else { | ||
| 522 | endPos = state.Pos + 1 | ||
| 523 | heredocTerminator = []rune{} | ||
| 524 | nChars = 0 | ||
| 525 | } | ||
| 526 | |||
| 527 | if nChars > 0 { | ||
| 528 | endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0) | ||
| 529 | if endHeredocPos > -1 { | ||
| 530 | endPos += endHeredocPos | ||
| 531 | } else { | ||
| 532 | endPos = len(text) | ||
| 533 | } | ||
| 534 | } | ||
| 535 | } | ||
| 536 | |||
| 537 | textBetweenBrackets := string(text[state.Pos:endPos]) | ||
| 538 | switch tokenClass { | ||
| 539 | case rakuPod, rakuPodDeclaration, rakuNameAttribute: | ||
| 540 | state.NamedGroups[`value`] = textBetweenBrackets | ||
| 541 | state.NamedGroups[`closing_delimiters`] = string(closingChars) | ||
| 542 | case rakuQuote: | ||
| 543 | if len(heredocTerminator) > 0 { | ||
| 544 | // Length of heredoc terminator + closing chars + `;` | ||
| 545 | heredocFristPunctuationLen := nChars + len(openingChars) + 1 | ||
| 546 | |||
| 547 | state.NamedGroups[`opening_delimiters`] = string(openingChars) + | ||
| 548 | string(text[state.Pos:state.Pos+heredocFristPunctuationLen]) | ||
| 549 | |||
| 550 | state.NamedGroups[`value`] = | ||
| 551 | string(text[state.Pos+heredocFristPunctuationLen : endPos]) | ||
| 552 | |||
| 553 | if endHeredocPos > -1 { | ||
| 554 | state.NamedGroups[`closing_delimiters`] = string(heredocTerminator) | ||
| 555 | } | ||
| 556 | } else { | ||
| 557 | state.NamedGroups[`value`] = textBetweenBrackets | ||
| 558 | if nChars > 0 { | ||
| 559 | state.NamedGroups[`closing_delimiters`] = string(closingChars) | ||
| 560 | } | ||
| 561 | } | ||
| 562 | default: | ||
| 563 | state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])} | ||
| 564 | } | ||
| 565 | |||
| 566 | state.Pos = endPos + nChars | ||
| 567 | |||
| 568 | return nil | ||
| 569 | } | ||
| 570 | } | ||
| 571 | |||
| 572 | // Raku rules | ||
| 573 | // Empty capture groups are placeholders and will be replaced by mutators | ||
| 574 | // DO NOT REMOVE THEM! | ||
| 575 | return Rules{ | ||
| 576 | "root": { | ||
| 577 | // Placeholder, will be overwritten by mutators, DO NOT REMOVE! | ||
| 578 | {`\A\z`, nil, nil}, | ||
| 579 | Include("common"), | ||
| 580 | {`{`, Punctuation, Push(`root`)}, | ||
| 581 | {`\(`, Punctuation, Push(`root`)}, | ||
| 582 | {`[)}]`, Punctuation, Pop(1)}, | ||
| 583 | {`;`, Punctuation, nil}, | ||
| 584 | {`\[|\]`, Operator, nil}, | ||
| 585 | {`.+?`, Text, nil}, | ||
| 586 | }, | ||
| 587 | "common": { | ||
| 588 | {`^#![^\n]*$`, CommentHashbang, nil}, | ||
| 589 | Include("pod"), | ||
| 590 | // Multi-line, Embedded comment | ||
| 591 | { | ||
| 592 | "#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`, | ||
| 593 | CommentMultiline, | ||
| 594 | findBrackets(rakuMultilineComment), | ||
| 595 | }, | ||
| 596 | {`#[^\n]*$`, CommentSingle, nil}, | ||
| 597 | // /regex/ | ||
| 598 | { | ||
| 599 | `(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`, | ||
| 600 | ByGroups(Punctuation, UsingSelf("regex"), Punctuation), | ||
| 601 | nil, | ||
| 602 | }, | ||
| 603 | Include("variable"), | ||
| 604 | // ::?VARIABLE | ||
| 605 | {`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil}, | ||
| 606 | // Version | ||
| 607 | { | ||
| 608 | `\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`, | ||
| 609 | ByGroups(Keyword, NumberInteger, NameEntity, Operator), | ||
| 610 | nil, | ||
| 611 | }, | ||
| 612 | Include("number"), | ||
| 613 | // Hyperoperator | »*« | ||
| 614 | {`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | ||
| 615 | {`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | ||
| 616 | // Hyperoperator | «*« | ||
| 617 | {`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | ||
| 618 | {`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | ||
| 619 | // Hyperoperator | »*» | ||
| 620 | {`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | ||
| 621 | {`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | ||
| 622 | // <<quoted words>> | ||
| 623 | {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")}, | ||
| 624 | // «quoted words» | ||
| 625 | {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")}, | ||
| 626 | // [<] | ||
| 627 | {`(?<=\[\\?)<(?=\])`, Operator, nil}, | ||
| 628 | // < and > operators | something < onething > something | ||
| 629 | { | ||
| 630 | `(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`, | ||
| 631 | ByGroups(Operator, UsingSelf("root"), Operator), | ||
| 632 | nil, | ||
| 633 | }, | ||
| 634 | // <quoted words> | ||
| 635 | { | ||
| 636 | `(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`, | ||
| 637 | ByGroups(Punctuation, String, Punctuation), | ||
| 638 | nil, | ||
| 639 | }, | ||
| 640 | {`C?X::['\w:-]+`, NameException, nil}, | ||
| 641 | Include("metaoperator"), | ||
| 642 | // Pair | key => value | ||
| 643 | { | ||
| 644 | `(\w[\w'-]*)(\s*)(=>)`, | ||
| 645 | ByGroups(String, Text, Operator), | ||
| 646 | nil, | ||
| 647 | }, | ||
| 648 | Include("colon-pair"), | ||
| 649 | // Token | ||
| 650 | { | ||
| 651 | `(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`, | ||
| 652 | NameFunction, | ||
| 653 | Push("token", "name-adverb"), | ||
| 654 | }, | ||
| 655 | // Substitution | ||
| 656 | {`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")}, | ||
| 657 | {keywordsPattern, Keyword, nil}, | ||
| 658 | {builtinTypesPattern, NameBuiltin, nil}, | ||
| 659 | {builtinRoutinesPattern, NameBuiltin, nil}, | ||
| 660 | // Class name | ||
| 661 | { | ||
| 662 | `(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern, | ||
| 663 | NameClass, | ||
| 664 | Push("name-adverb"), | ||
| 665 | }, | ||
| 666 | // Routine | ||
| 667 | { | ||
| 668 | `(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`, | ||
| 669 | NameFunction, | ||
| 670 | Push("name-adverb"), | ||
| 671 | }, | ||
| 672 | // Constant | ||
| 673 | {`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")}, | ||
| 674 | // Namespace | ||
| 675 | {`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")}, | ||
| 676 | Include("operator"), | ||
| 677 | Include("single-quote"), | ||
| 678 | {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")}, | ||
| 679 | // m,rx regex | ||
| 680 | {`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")}, | ||
| 681 | // Quote constructs | ||
| 682 | { | ||
| 683 | `(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`, | ||
| 684 | EmitterFunc(quote), | ||
| 685 | findBrackets(rakuQuote), | ||
| 686 | }, | ||
| 687 | // Function | ||
| 688 | { | ||
| 689 | `\b` + namePattern + colonPairLookahead + `\()`, | ||
| 690 | NameFunction, | ||
| 691 | Push("name-adverb"), | ||
| 692 | }, | ||
| 693 | // Method | ||
| 694 | { | ||
| 695 | `(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`, | ||
| 696 | NameFunction, | ||
| 697 | Push("name-adverb"), | ||
| 698 | }, | ||
| 699 | // Indirect invocant | ||
| 700 | {namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")}, | ||
| 701 | {`(?<=\W)(?:∅|i|e|𝑒|tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil}, | ||
| 702 | {`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil}, | ||
| 703 | {`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil}, | ||
| 704 | // Sigilless variable | ||
| 705 | { | ||
| 706 | `(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern, | ||
| 707 | NameVariable, | ||
| 708 | Push("name-adverb"), | ||
| 709 | }, | ||
| 710 | {namePattern, Name, Push("name-adverb")}, | ||
| 711 | }, | ||
| 712 | "rx": { | ||
| 713 | Include("colon-pair-attribute"), | ||
| 714 | { | ||
| 715 | `(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`, | ||
| 716 | ByGroupNames( | ||
| 717 | map[string]Emitter{ | ||
| 718 | `opening_delimiters`: Punctuation, | ||
| 719 | `delimiter`: nil, | ||
| 720 | }, | ||
| 721 | ), | ||
| 722 | findBrackets(rakuMatchRegex), | ||
| 723 | }, | ||
| 724 | }, | ||
| 725 | "substitution": { | ||
| 726 | Include("colon-pair-attribute"), | ||
| 727 | // Substitution | s{regex} = value | ||
| 728 | { | ||
| 729 | `(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`, | ||
| 730 | ByGroupNames(map[string]Emitter{ | ||
| 731 | `opening_delimiters`: Punctuation, | ||
| 732 | `delimiter`: nil, | ||
| 733 | }), | ||
| 734 | findBrackets(rakuMatchRegex), | ||
| 735 | }, | ||
| 736 | // Substitution | s/regex/string/ | ||
| 737 | { | ||
| 738 | `(?<opening_delimiters>[^\w:\s])`, | ||
| 739 | Punctuation, | ||
| 740 | findBrackets(rakuSubstitutionRegex), | ||
| 741 | }, | ||
| 742 | }, | ||
| 743 | "number": { | ||
| 744 | {`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil}, | ||
| 745 | {`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil}, | ||
| 746 | {`0b[01]+(_[01]+)*`, LiteralNumberBin, nil}, | ||
| 747 | { | ||
| 748 | `(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`, | ||
| 749 | LiteralNumberFloat, | ||
| 750 | nil, | ||
| 751 | }, | ||
| 752 | {`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil}, | ||
| 753 | {`(?<=\d+)i`, NameConstant, nil}, | ||
| 754 | {`\d+(_\d+)*`, LiteralNumberInteger, nil}, | ||
| 755 | }, | ||
| 756 | "name-adverb": { | ||
| 757 | Include("colon-pair-attribute-keyvalue"), | ||
| 758 | Default(Pop(1)), | ||
| 759 | }, | ||
| 760 | "colon-pair": { | ||
| 761 | // :key(value) | ||
| 762 | {colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)}, | ||
| 763 | // :123abc | ||
| 764 | { | ||
| 765 | `(:)(\d+)(\w[\w'-]*)`, | ||
| 766 | ByGroups(Punctuation, UsingSelf("number"), String), | ||
| 767 | nil, | ||
| 768 | }, | ||
| 769 | // :key | ||
| 770 | {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil}, | ||
| 771 | {`\s+`, Text, nil}, | ||
| 772 | }, | ||
| 773 | "colon-pair-attribute": { | ||
| 774 | // :key(value) | ||
| 775 | {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)}, | ||
| 776 | // :123abc | ||
| 777 | { | ||
| 778 | `(:)(\d+)(\w[\w'-]*)`, | ||
| 779 | ByGroups(Punctuation, UsingSelf("number"), NameAttribute), | ||
| 780 | nil, | ||
| 781 | }, | ||
| 782 | // :key | ||
| 783 | {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil}, | ||
| 784 | {`\s+`, Text, nil}, | ||
| 785 | }, | ||
| 786 | "colon-pair-attribute-keyvalue": { | ||
| 787 | // :key(value) | ||
| 788 | {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)}, | ||
| 789 | }, | ||
| 790 | "escape-qq": { | ||
| 791 | { | ||
| 792 | `(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`, | ||
| 793 | ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation), | ||
| 794 | nil, | ||
| 795 | }, | ||
| 796 | }, | ||
| 797 | `escape-char`: { | ||
| 798 | {`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil}, | ||
| 799 | }, | ||
| 800 | `escape-single-quote`: { | ||
| 801 | {`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil}, | ||
| 802 | }, | ||
| 803 | "escape-c-name": { | ||
| 804 | { | ||
| 805 | `(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`, | ||
| 806 | ByGroups(StringEscape, Punctuation, String, Punctuation), | ||
| 807 | nil, | ||
| 808 | }, | ||
| 809 | }, | ||
| 810 | "escape-hexadecimal": { | ||
| 811 | { | ||
| 812 | `(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`, | ||
| 813 | ByGroups(StringEscape, Punctuation, NumberHex, Punctuation), | ||
| 814 | nil, | ||
| 815 | }, | ||
| 816 | {`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil}, | ||
| 817 | }, | ||
| 818 | "regex": { | ||
| 819 | // Placeholder, will be overwritten by mutators, DO NOT REMOVE! | ||
| 820 | {`\A\z`, nil, nil}, | ||
| 821 | Include("regex-escape-class"), | ||
| 822 | Include(`regex-character-escape`), | ||
| 823 | // $(code) | ||
| 824 | { | ||
| 825 | `([$@])((?<!(?<!\\)\\)\()`, | ||
| 826 | ByGroups(Keyword, Punctuation), | ||
| 827 | replaceRule(ruleReplacingConfig{ | ||
| 828 | delimiter: []rune(`)`), | ||
| 829 | tokenType: Punctuation, | ||
| 830 | stateName: `root`, | ||
| 831 | pushState: true, | ||
| 832 | }), | ||
| 833 | }, | ||
| 834 | // Exclude $/ from variables, because we can't get out of the end of the slash regex: $/; | ||
| 835 | {`\$(?=/)`, NameEntity, nil}, | ||
| 836 | // Exclude $ from variables | ||
| 837 | {`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil}, | ||
| 838 | Include("variable"), | ||
| 839 | Include("escape-c-name"), | ||
| 840 | Include("escape-hexadecimal"), | ||
| 841 | Include("number"), | ||
| 842 | Include("single-quote"), | ||
| 843 | // :my variable code ... | ||
| 844 | { | ||
| 845 | `(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`, | ||
| 846 | ByGroups(Operator, KeywordDeclaration), | ||
| 847 | replaceRule(ruleReplacingConfig{ | ||
| 848 | delimiter: []rune(`;`), | ||
| 849 | tokenType: Punctuation, | ||
| 850 | stateName: `root`, | ||
| 851 | pushState: true, | ||
| 852 | }), | ||
| 853 | }, | ||
| 854 | // <{code}> | ||
| 855 | { | ||
| 856 | `(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`, | ||
| 857 | ByGroups(Punctuation, Operator, Punctuation), | ||
| 858 | replaceRule(ruleReplacingConfig{ | ||
| 859 | delimiter: []rune(`}>`), | ||
| 860 | tokenType: Punctuation, | ||
| 861 | stateName: `root`, | ||
| 862 | pushState: true, | ||
| 863 | }), | ||
| 864 | }, | ||
| 865 | // {code} | ||
| 866 | Include(`closure`), | ||
| 867 | // Properties | ||
| 868 | {`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil}, | ||
| 869 | // Operator | ||
| 870 | {`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil}, | ||
| 871 | // Anchors | ||
| 872 | {`\^\^|\^|\$\$|\$`, NameEntity, nil}, | ||
| 873 | {`\.`, NameEntity, nil}, | ||
| 874 | {`#[^\n]*\n`, CommentSingle, nil}, | ||
| 875 | // Lookaround | ||
| 876 | { | ||
| 877 | `(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`, | ||
| 878 | ByGroups(Punctuation, Text, Operator, Text, OperatorWord), | ||
| 879 | replaceRule(ruleReplacingConfig{ | ||
| 880 | delimiter: []rune(`>`), | ||
| 881 | tokenType: Punctuation, | ||
| 882 | stateName: `regex`, | ||
| 883 | pushState: true, | ||
| 884 | }), | ||
| 885 | }, | ||
| 886 | { | ||
| 887 | `(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`, | ||
| 888 | ByGroups(Punctuation, Operator, OperatorWord, Punctuation), | ||
| 889 | nil, | ||
| 890 | }, | ||
| 891 | // <$variable> | ||
| 892 | { | ||
| 893 | `(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`, | ||
| 894 | ByGroups(Punctuation, Operator, NameVariable, Punctuation), | ||
| 895 | nil, | ||
| 896 | }, | ||
| 897 | // Capture markers | ||
| 898 | {`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil}, | ||
| 899 | { | ||
| 900 | `(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`, | ||
| 901 | ByGroups(Punctuation, NameVariable, Operator), | ||
| 902 | Push(`regex-variable`), | ||
| 903 | }, | ||
| 904 | { | ||
| 905 | `(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`, | ||
| 906 | ByGroups(Punctuation, Operator, NameFunction), | ||
| 907 | Push(`regex-function`), | ||
| 908 | }, | ||
| 909 | {`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")}, | ||
| 910 | {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")}, | ||
| 911 | {`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)}, | ||
| 912 | {`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")}, | ||
| 913 | {`.+?`, StringRegex, nil}, | ||
| 914 | }, | ||
| 915 | "regex-class-builtin": { | ||
| 916 | { | ||
| 917 | `\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`, | ||
| 918 | NameBuiltin, | ||
| 919 | nil, | ||
| 920 | }, | ||
| 921 | }, | ||
| 922 | "regex-function": { | ||
| 923 | // <function> | ||
| 924 | {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)}, | ||
| 925 | // <function(parameter)> | ||
| 926 | { | ||
| 927 | `\(`, | ||
| 928 | Punctuation, | ||
| 929 | replaceRule(ruleReplacingConfig{ | ||
| 930 | delimiter: []rune(`)>`), | ||
| 931 | tokenType: Punctuation, | ||
| 932 | stateName: `root`, | ||
| 933 | popState: true, | ||
| 934 | pushState: true, | ||
| 935 | }), | ||
| 936 | }, | ||
| 937 | // <function value> | ||
| 938 | { | ||
| 939 | `\s+`, | ||
| 940 | StringRegex, | ||
| 941 | replaceRule(ruleReplacingConfig{ | ||
| 942 | delimiter: []rune(`>`), | ||
| 943 | tokenType: Punctuation, | ||
| 944 | stateName: `regex`, | ||
| 945 | popState: true, | ||
| 946 | pushState: true, | ||
| 947 | }), | ||
| 948 | }, | ||
| 949 | // <function: value> | ||
| 950 | { | ||
| 951 | `:`, | ||
| 952 | Punctuation, | ||
| 953 | replaceRule(ruleReplacingConfig{ | ||
| 954 | delimiter: []rune(`>`), | ||
| 955 | tokenType: Punctuation, | ||
| 956 | stateName: `root`, | ||
| 957 | popState: true, | ||
| 958 | pushState: true, | ||
| 959 | }), | ||
| 960 | }, | ||
| 961 | }, | ||
| 962 | "regex-variable": { | ||
| 963 | Include(`regex-starting-operators`), | ||
| 964 | // <var=function( | ||
| 965 | { | ||
| 966 | `(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`, | ||
| 967 | ByGroups(Operator, NameFunction), | ||
| 968 | Mutators(Pop(1), Push(`regex-function`)), | ||
| 969 | }, | ||
| 970 | // <var=function> | ||
| 971 | {`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)}, | ||
| 972 | // <var= | ||
| 973 | Default(Pop(1), Push(`regex-property`)), | ||
| 974 | }, | ||
| 975 | "regex-property": { | ||
| 976 | {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)}, | ||
| 977 | Include("regex-class-builtin"), | ||
| 978 | Include("variable"), | ||
| 979 | Include(`regex-starting-operators`), | ||
| 980 | Include("colon-pair-attribute"), | ||
| 981 | {`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")}, | ||
| 982 | {`\+|\-`, Operator, nil}, | ||
| 983 | {`@[\w':-]+`, NameVariable, nil}, | ||
| 984 | {`.+?`, StringRegex, nil}, | ||
| 985 | }, | ||
| 986 | `regex-starting-operators`: { | ||
| 987 | {`(?<=<)[|!?.]+`, Operator, nil}, | ||
| 988 | }, | ||
| 989 | "regex-escape-class": { | ||
| 990 | {`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil}, | ||
| 991 | }, | ||
| 992 | `regex-character-escape`: { | ||
| 993 | {`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil}, | ||
| 994 | }, | ||
| 995 | "regex-character-class": { | ||
| 996 | {`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)}, | ||
| 997 | Include("regex-escape-class"), | ||
| 998 | Include("escape-c-name"), | ||
| 999 | Include("escape-hexadecimal"), | ||
| 1000 | Include(`regex-character-escape`), | ||
| 1001 | Include("number"), | ||
| 1002 | {`\.\.`, Operator, nil}, | ||
| 1003 | {`.+?`, StringRegex, nil}, | ||
| 1004 | }, | ||
| 1005 | "metaoperator": { | ||
| 1006 | // Z[=>] | ||
| 1007 | { | ||
| 1008 | `\b([RZX]+)\b(\[)([^\s\]]+?)(\])`, | ||
| 1009 | ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation), | ||
| 1010 | nil, | ||
| 1011 | }, | ||
| 1012 | // Z=> | ||
| 1013 | {`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil}, | ||
| 1014 | }, | ||
| 1015 | "operator": { | ||
| 1016 | // Word Operator | ||
| 1017 | {wordOperatorsPattern, OperatorWord, nil}, | ||
| 1018 | // Operator | ||
| 1019 | {operatorsPattern, Operator, nil}, | ||
| 1020 | }, | ||
| 1021 | "pod": { | ||
| 1022 | // Single-line pod declaration | ||
| 1023 | {`(#[|=])\s`, Keyword, Push("pod-single")}, | ||
| 1024 | // Multi-line pod declaration | ||
| 1025 | { | ||
| 1026 | "(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`, | ||
| 1027 | ByGroupNames( | ||
| 1028 | map[string]Emitter{ | ||
| 1029 | `keyword`: Keyword, | ||
| 1030 | `opening_delimiters`: Punctuation, | ||
| 1031 | `delimiter`: nil, | ||
| 1032 | `value`: UsingSelf("pod-declaration"), | ||
| 1033 | `closing_delimiters`: Punctuation, | ||
| 1034 | }), | ||
| 1035 | findBrackets(rakuPodDeclaration), | ||
| 1036 | }, | ||
| 1037 | Include("pod-blocks"), | ||
| 1038 | }, | ||
| 1039 | "pod-blocks": { | ||
| 1040 | // =begin code | ||
| 1041 | { | ||
| 1042 | `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`, | ||
| 1043 | EmitterFunc(podCode), | ||
| 1044 | nil, | ||
| 1045 | }, | ||
| 1046 | // =begin | ||
| 1047 | { | ||
| 1048 | `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`, | ||
| 1049 | ByGroupNames( | ||
| 1050 | map[string]Emitter{ | ||
| 1051 | `ws`: Comment, | ||
| 1052 | `keyword`: Keyword, | ||
| 1053 | `ws2`: StringDoc, | ||
| 1054 | `name`: Keyword, | ||
| 1055 | `config`: EmitterFunc(podConfig), | ||
| 1056 | `value`: UsingSelf("pod-begin"), | ||
| 1057 | `closing_delimiters`: Keyword, | ||
| 1058 | }), | ||
| 1059 | findBrackets(rakuPod), | ||
| 1060 | }, | ||
| 1061 | // =for ... | ||
| 1062 | { | ||
| 1063 | `(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`, | ||
| 1064 | ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)), | ||
| 1065 | Push("pod-paragraph"), | ||
| 1066 | }, | ||
| 1067 | // =config | ||
| 1068 | { | ||
| 1069 | `(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`, | ||
| 1070 | ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)), | ||
| 1071 | nil, | ||
| 1072 | }, | ||
| 1073 | // =alias | ||
| 1074 | { | ||
| 1075 | `(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`, | ||
| 1076 | ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc), | ||
| 1077 | nil, | ||
| 1078 | }, | ||
| 1079 | // =encoding | ||
| 1080 | { | ||
| 1081 | `(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`, | ||
| 1082 | ByGroups(Comment, Keyword, StringDoc, Name), | ||
| 1083 | nil, | ||
| 1084 | }, | ||
| 1085 | // =para ... | ||
| 1086 | { | ||
| 1087 | `(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`, | ||
| 1088 | ByGroups(Comment, Keyword, EmitterFunc(podConfig)), | ||
| 1089 | Push("pod-paragraph"), | ||
| 1090 | }, | ||
| 1091 | // =head1 ... | ||
| 1092 | { | ||
| 1093 | `(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`, | ||
| 1094 | ByGroups(Comment, Keyword, GenericHeading, Keyword), | ||
| 1095 | Push("pod-heading"), | ||
| 1096 | }, | ||
| 1097 | // =item ... | ||
| 1098 | { | ||
| 1099 | `(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`, | ||
| 1100 | ByGroups(Comment, Keyword, StringDoc, Keyword), | ||
| 1101 | Push("pod-paragraph"), | ||
| 1102 | }, | ||
| 1103 | { | ||
| 1104 | `(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`, | ||
| 1105 | ByGroups(Comment, Keyword, EmitterFunc(podConfig)), | ||
| 1106 | Push("pod-finish"), | ||
| 1107 | }, | ||
| 1108 | // ={custom} ... | ||
| 1109 | { | ||
| 1110 | `(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`, | ||
| 1111 | ByGroups(Comment, Name, StringDoc, Keyword), | ||
| 1112 | Push("pod-paragraph"), | ||
| 1113 | }, | ||
| 1114 | // = podconfig | ||
| 1115 | { | ||
| 1116 | `(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` + | ||
| 1117 | colonPairClosingBrackets + `) *)*\n)`, | ||
| 1118 | ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)), | ||
| 1119 | nil, | ||
| 1120 | }, | ||
| 1121 | }, | ||
| 1122 | "pod-begin": { | ||
| 1123 | Include("pod-blocks"), | ||
| 1124 | Include("pre-pod-formatter"), | ||
| 1125 | {`.+?`, StringDoc, nil}, | ||
| 1126 | }, | ||
| 1127 | "pod-declaration": { | ||
| 1128 | Include("pre-pod-formatter"), | ||
| 1129 | {`.+?`, StringDoc, nil}, | ||
| 1130 | }, | ||
| 1131 | "pod-paragraph": { | ||
| 1132 | {`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)}, | ||
| 1133 | Include("pre-pod-formatter"), | ||
| 1134 | {`.+?`, StringDoc, nil}, | ||
| 1135 | }, | ||
| 1136 | "pod-single": { | ||
| 1137 | {`\n`, StringDoc, Pop(1)}, | ||
| 1138 | Include("pre-pod-formatter"), | ||
| 1139 | {`.+?`, StringDoc, nil}, | ||
| 1140 | }, | ||
| 1141 | "pod-heading": { | ||
| 1142 | {`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)}, | ||
| 1143 | Include("pre-pod-formatter"), | ||
| 1144 | {`.+?`, GenericHeading, nil}, | ||
| 1145 | }, | ||
| 1146 | "pod-finish": { | ||
| 1147 | {`\z`, nil, Pop(1)}, | ||
| 1148 | Include("pre-pod-formatter"), | ||
| 1149 | {`.+?`, StringDoc, nil}, | ||
| 1150 | }, | ||
| 1151 | "pre-pod-formatter": { | ||
| 1152 | // C<code>, B<bold>, ... | ||
| 1153 | { | ||
| 1154 | `(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`, | ||
| 1155 | ByGroups(Keyword, Punctuation), | ||
| 1156 | findBrackets(rakuPodFormatter), | ||
| 1157 | }, | ||
| 1158 | }, | ||
| 1159 | "pod-formatter": { | ||
| 1160 | // Placeholder rule, will be replaced by mutators. DO NOT REMOVE! | ||
| 1161 | {`>`, Punctuation, Pop(1)}, | ||
| 1162 | Include("pre-pod-formatter"), | ||
| 1163 | // Placeholder rule, will be replaced by mutators. DO NOT REMOVE! | ||
| 1164 | {`.+?`, StringOther, nil}, | ||
| 1165 | }, | ||
| 1166 | "variable": { | ||
| 1167 | {variablePattern, NameVariable, Push("name-adverb")}, | ||
| 1168 | {globalVariablePattern, NameVariableGlobal, Push("name-adverb")}, | ||
| 1169 | {`[$@]<[^>]+>`, NameVariable, nil}, | ||
| 1170 | {`\$[/!¢]`, NameVariable, nil}, | ||
| 1171 | {`[$@%]`, NameVariable, nil}, | ||
| 1172 | }, | ||
| 1173 | "single-quote": { | ||
| 1174 | {`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")}, | ||
| 1175 | }, | ||
| 1176 | "single-quote-inner": { | ||
| 1177 | {`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)}, | ||
| 1178 | Include("escape-single-quote"), | ||
| 1179 | Include("escape-qq"), | ||
| 1180 | {`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil}, | ||
| 1181 | }, | ||
| 1182 | "double-quotes": { | ||
| 1183 | {`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)}, | ||
| 1184 | Include("qq"), | ||
| 1185 | }, | ||
| 1186 | "<<": { | ||
| 1187 | {`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)}, | ||
| 1188 | Include("ww"), | ||
| 1189 | }, | ||
| 1190 | "«": { | ||
| 1191 | {`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)}, | ||
| 1192 | Include("ww"), | ||
| 1193 | }, | ||
| 1194 | "ww": { | ||
| 1195 | Include("single-quote"), | ||
| 1196 | Include("qq"), | ||
| 1197 | }, | ||
| 1198 | "qq": { | ||
| 1199 | Include("qq-variable"), | ||
| 1200 | Include("closure"), | ||
| 1201 | Include(`escape-char`), | ||
| 1202 | Include("escape-hexadecimal"), | ||
| 1203 | Include("escape-c-name"), | ||
| 1204 | Include("escape-qq"), | ||
| 1205 | {`.+?`, StringDouble, nil}, | ||
| 1206 | }, | ||
| 1207 | "qq-variable": { | ||
| 1208 | { | ||
| 1209 | `(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`, | ||
| 1210 | NameVariable, | ||
| 1211 | Push("qq-variable-extras", "name-adverb"), | ||
| 1212 | }, | ||
| 1213 | }, | ||
| 1214 | "qq-variable-extras": { | ||
| 1215 | // Method | ||
| 1216 | { | ||
| 1217 | `(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`, | ||
| 1218 | ByGroupNames(map[string]Emitter{ | ||
| 1219 | `operator`: Operator, | ||
| 1220 | `method_name`: NameFunction, | ||
| 1221 | }), | ||
| 1222 | Push(`name-adverb`), | ||
| 1223 | }, | ||
| 1224 | // Function/Signature | ||
| 1225 | { | ||
| 1226 | `\(`, Punctuation, replaceRule( | ||
| 1227 | ruleReplacingConfig{ | ||
| 1228 | delimiter: []rune(`)`), | ||
| 1229 | tokenType: Punctuation, | ||
| 1230 | stateName: `root`, | ||
| 1231 | pushState: true, | ||
| 1232 | }), | ||
| 1233 | }, | ||
| 1234 | Default(Pop(1)), | ||
| 1235 | }, | ||
| 1236 | "Q": { | ||
| 1237 | Include("escape-qq"), | ||
| 1238 | {`.+?`, String, nil}, | ||
| 1239 | }, | ||
| 1240 | "Q-closure": { | ||
| 1241 | Include("escape-qq"), | ||
| 1242 | Include("closure"), | ||
| 1243 | {`.+?`, String, nil}, | ||
| 1244 | }, | ||
| 1245 | "Q-variable": { | ||
| 1246 | Include("escape-qq"), | ||
| 1247 | Include("qq-variable"), | ||
| 1248 | {`.+?`, String, nil}, | ||
| 1249 | }, | ||
| 1250 | "closure": { | ||
| 1251 | {`(?<!(?<!\\)\\){`, Punctuation, replaceRule( | ||
| 1252 | ruleReplacingConfig{ | ||
| 1253 | delimiter: []rune(`}`), | ||
| 1254 | tokenType: Punctuation, | ||
| 1255 | stateName: `root`, | ||
| 1256 | pushState: true, | ||
| 1257 | }), | ||
| 1258 | }, | ||
| 1259 | }, | ||
| 1260 | "token": { | ||
| 1261 | // Token signature | ||
| 1262 | {`\(`, Punctuation, replaceRule( | ||
| 1263 | ruleReplacingConfig{ | ||
| 1264 | delimiter: []rune(`)`), | ||
| 1265 | tokenType: Punctuation, | ||
| 1266 | stateName: `root`, | ||
| 1267 | pushState: true, | ||
| 1268 | }), | ||
| 1269 | }, | ||
| 1270 | {`{`, Punctuation, replaceRule( | ||
| 1271 | ruleReplacingConfig{ | ||
| 1272 | delimiter: []rune(`}`), | ||
| 1273 | tokenType: Punctuation, | ||
| 1274 | stateName: `regex`, | ||
| 1275 | popState: true, | ||
| 1276 | pushState: true, | ||
| 1277 | }), | ||
| 1278 | }, | ||
| 1279 | {`\s*`, Text, nil}, | ||
| 1280 | Default(Pop(1)), | ||
| 1281 | }, | ||
| 1282 | } | ||
| 1283 | } | ||
| 1284 | |||
| 1285 | // Joins keys of rune map | ||
| 1286 | func joinRuneMap(m map[rune]rune) string { | ||
| 1287 | runes := make([]rune, 0, len(m)) | ||
| 1288 | for k := range m { | ||
| 1289 | runes = append(runes, k) | ||
| 1290 | } | ||
| 1291 | |||
| 1292 | return string(runes) | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | // Finds the index of substring in the string starting at position n | ||
| 1296 | func indexAt(str []rune, substr []rune, pos int) int { | ||
| 1297 | strFromPos := str[pos:] | ||
| 1298 | text := string(strFromPos) | ||
| 1299 | |||
| 1300 | idx := strings.Index(text, string(substr)) | ||
| 1301 | if idx > -1 { | ||
| 1302 | idx = utf8.RuneCountInString(text[:idx]) | ||
| 1303 | |||
| 1304 | // Search again if the substr is escaped with backslash | ||
| 1305 | if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') || | ||
| 1306 | (idx == 1 && strFromPos[idx-1] == '\\') { | ||
| 1307 | idx = indexAt(str[pos:], substr, idx+1) | ||
| 1308 | |||
| 1309 | idx = utf8.RuneCountInString(text[:idx]) | ||
| 1310 | |||
| 1311 | if idx < 0 { | ||
| 1312 | return idx | ||
| 1313 | } | ||
| 1314 | } | ||
| 1315 | idx += pos | ||
| 1316 | } | ||
| 1317 | |||
| 1318 | return idx | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | // Tells if an array of string contains a string | ||
| 1322 | func contains(s []string, e string) bool { | ||
| 1323 | for _, value := range s { | ||
| 1324 | if value == e { | ||
| 1325 | return true | ||
| 1326 | } | ||
| 1327 | } | ||
| 1328 | return false | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | type rulePosition int | ||
| 1332 | |||
| 1333 | const ( | ||
| 1334 | topRule rulePosition = 0 | ||
| 1335 | bottomRule = -1 | ||
| 1336 | ) | ||
| 1337 | |||
| 1338 | type ruleMakingConfig struct { | ||
| 1339 | delimiter []rune | ||
| 1340 | pattern string | ||
| 1341 | tokenType Emitter | ||
| 1342 | mutator Mutator | ||
| 1343 | numberOfDelimiterChars int | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | type ruleReplacingConfig struct { | ||
| 1347 | delimiter []rune | ||
| 1348 | pattern string | ||
| 1349 | tokenType Emitter | ||
| 1350 | numberOfDelimiterChars int | ||
| 1351 | mutator Mutator | ||
| 1352 | appendMutator Mutator | ||
| 1353 | rulePosition rulePosition | ||
| 1354 | stateName string | ||
| 1355 | pop bool | ||
| 1356 | popState bool | ||
| 1357 | pushState bool | ||
| 1358 | } | ||
| 1359 | |||
| 1360 | // Pops rule from state-stack and replaces the rule with the previous rule | ||
| 1361 | func popRule(rule ruleReplacingConfig) MutatorFunc { | ||
| 1362 | return func(state *LexerState) error { | ||
| 1363 | stackName := genStackName(rule.stateName, rule.rulePosition) | ||
| 1364 | |||
| 1365 | stack, ok := state.Get(stackName).([]ruleReplacingConfig) | ||
| 1366 | |||
| 1367 | if ok && len(stack) > 0 { | ||
| 1368 | // Pop from stack | ||
| 1369 | stack = stack[:len(stack)-1] | ||
| 1370 | lastRule := stack[len(stack)-1] | ||
| 1371 | lastRule.pushState = false | ||
| 1372 | lastRule.popState = false | ||
| 1373 | lastRule.pop = true | ||
| 1374 | state.Set(stackName, stack) | ||
| 1375 | |||
| 1376 | // Call replaceRule to use the last rule | ||
| 1377 | err := replaceRule(lastRule)(state) | ||
| 1378 | if err != nil { | ||
| 1379 | panic(err) | ||
| 1380 | } | ||
| 1381 | } | ||
| 1382 | |||
| 1383 | return nil | ||
| 1384 | } | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | // Replaces a state's rule based on the rule config and position | ||
| 1388 | func replaceRule(rule ruleReplacingConfig) MutatorFunc { | ||
| 1389 | return func(state *LexerState) error { | ||
| 1390 | stateName := rule.stateName | ||
| 1391 | stackName := genStackName(rule.stateName, rule.rulePosition) | ||
| 1392 | |||
| 1393 | stack, ok := state.Get(stackName).([]ruleReplacingConfig) | ||
| 1394 | if !ok { | ||
| 1395 | stack = []ruleReplacingConfig{} | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | // If state-stack is empty fill it with the placeholder rule | ||
| 1399 | if len(stack) == 0 { | ||
| 1400 | stack = []ruleReplacingConfig{ | ||
| 1401 | { | ||
| 1402 | // Placeholder, will be overwritten by mutators, DO NOT REMOVE! | ||
| 1403 | pattern: `\A\z`, | ||
| 1404 | tokenType: nil, | ||
| 1405 | mutator: nil, | ||
| 1406 | stateName: stateName, | ||
| 1407 | rulePosition: rule.rulePosition, | ||
| 1408 | }, | ||
| 1409 | } | ||
| 1410 | state.Set(stackName, stack) | ||
| 1411 | } | ||
| 1412 | |||
| 1413 | var mutator Mutator | ||
| 1414 | mutators := []Mutator{} | ||
| 1415 | |||
| 1416 | switch { | ||
| 1417 | case rule.rulePosition == topRule && rule.mutator == nil: | ||
| 1418 | // Default mutator for top rule | ||
| 1419 | mutators = []Mutator{Pop(1), popRule(rule)} | ||
| 1420 | case rule.rulePosition == topRule && rule.mutator != nil: | ||
| 1421 | // Default mutator for top rule, when rule.mutator is set | ||
| 1422 | mutators = []Mutator{rule.mutator, popRule(rule)} | ||
| 1423 | case rule.mutator != nil: | ||
| 1424 | mutators = []Mutator{rule.mutator} | ||
| 1425 | } | ||
| 1426 | |||
| 1427 | if rule.appendMutator != nil { | ||
| 1428 | mutators = append(mutators, rule.appendMutator) | ||
| 1429 | } | ||
| 1430 | |||
| 1431 | if len(mutators) > 0 { | ||
| 1432 | mutator = Mutators(mutators...) | ||
| 1433 | } else { | ||
| 1434 | mutator = nil | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | ruleConfig := ruleMakingConfig{ | ||
| 1438 | pattern: rule.pattern, | ||
| 1439 | delimiter: rule.delimiter, | ||
| 1440 | numberOfDelimiterChars: rule.numberOfDelimiterChars, | ||
| 1441 | tokenType: rule.tokenType, | ||
| 1442 | mutator: mutator, | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | cRule := makeRule(ruleConfig) | ||
| 1446 | |||
| 1447 | switch rule.rulePosition { | ||
| 1448 | case topRule: | ||
| 1449 | state.Rules[stateName][0] = cRule | ||
| 1450 | case bottomRule: | ||
| 1451 | state.Rules[stateName][len(state.Rules[stateName])-1] = cRule | ||
| 1452 | } | ||
| 1453 | |||
| 1454 | // Pop state name from stack if asked. State should be popped first before Pushing | ||
| 1455 | if rule.popState { | ||
| 1456 | err := Pop(1).Mutate(state) | ||
| 1457 | if err != nil { | ||
| 1458 | panic(err) | ||
| 1459 | } | ||
| 1460 | } | ||
| 1461 | |||
| 1462 | // Push state name to stack if asked | ||
| 1463 | if rule.pushState { | ||
| 1464 | err := Push(stateName).Mutate(state) | ||
| 1465 | if err != nil { | ||
| 1466 | panic(err) | ||
| 1467 | } | ||
| 1468 | } | ||
| 1469 | |||
| 1470 | if !rule.pop { | ||
| 1471 | state.Set(stackName, append(stack, rule)) | ||
| 1472 | } | ||
| 1473 | |||
| 1474 | return nil | ||
| 1475 | } | ||
| 1476 | } | ||
| 1477 | |||
| 1478 | // Generates rule replacing stack using state name and rule position | ||
| 1479 | func genStackName(stateName string, rulePosition rulePosition) (stackName string) { | ||
| 1480 | switch rulePosition { | ||
| 1481 | case topRule: | ||
| 1482 | stackName = stateName + `-top-stack` | ||
| 1483 | case bottomRule: | ||
| 1484 | stackName = stateName + `-bottom-stack` | ||
| 1485 | } | ||
| 1486 | return | ||
| 1487 | } | ||
| 1488 | |||
| 1489 | // Makes a compiled rule and returns it | ||
| 1490 | func makeRule(config ruleMakingConfig) *CompiledRule { | ||
| 1491 | var rePattern string | ||
| 1492 | |||
| 1493 | if len(config.delimiter) > 0 { | ||
| 1494 | delimiter := string(config.delimiter) | ||
| 1495 | |||
| 1496 | if config.numberOfDelimiterChars > 1 { | ||
| 1497 | delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars) | ||
| 1498 | } | ||
| 1499 | |||
| 1500 | rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter) | ||
| 1501 | } else { | ||
| 1502 | rePattern = config.pattern | ||
| 1503 | } | ||
| 1504 | |||
| 1505 | regex := regexp2.MustCompile(rePattern, regexp2.None) | ||
| 1506 | |||
| 1507 | cRule := &CompiledRule{ | ||
| 1508 | Rule: Rule{rePattern, config.tokenType, config.mutator}, | ||
| 1509 | Regexp: regex, | ||
| 1510 | } | ||
| 1511 | |||
| 1512 | return cRule | ||
| 1513 | } | ||
| 1514 | |||
| 1515 | // Emitter for colon pairs, changes token state based on key and brackets | ||
| 1516 | func colonPair(tokenClass TokenType) Emitter { | ||
| 1517 | return EmitterFunc(func(groups []string, state *LexerState) Iterator { | ||
| 1518 | iterators := []Iterator{} | ||
| 1519 | tokens := []Token{ | ||
| 1520 | {Punctuation, state.NamedGroups[`colon`]}, | ||
| 1521 | {Punctuation, state.NamedGroups[`opening_delimiters`]}, | ||
| 1522 | {Punctuation, state.NamedGroups[`closing_delimiters`]}, | ||
| 1523 | } | ||
| 1524 | |||
| 1525 | // Append colon | ||
| 1526 | iterators = append(iterators, Literator(tokens[0])) | ||
| 1527 | |||
| 1528 | if tokenClass == NameAttribute { | ||
| 1529 | iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]})) | ||
| 1530 | } else { | ||
| 1531 | var keyTokenState string | ||
| 1532 | keyre := regexp.MustCompile(`^\d+$`) | ||
| 1533 | if keyre.MatchString(state.NamedGroups[`key`]) { | ||
| 1534 | keyTokenState = "common" | ||
| 1535 | } else { | ||
| 1536 | keyTokenState = "Q" | ||
| 1537 | } | ||
| 1538 | |||
| 1539 | // Use token state to Tokenise key | ||
| 1540 | if keyTokenState != "" { | ||
| 1541 | iterator, err := state.Lexer.Tokenise( | ||
| 1542 | &TokeniseOptions{ | ||
| 1543 | State: keyTokenState, | ||
| 1544 | Nested: true, | ||
| 1545 | }, state.NamedGroups[`key`]) | ||
| 1546 | |||
| 1547 | if err != nil { | ||
| 1548 | panic(err) | ||
| 1549 | } else { | ||
| 1550 | // Append key | ||
| 1551 | iterators = append(iterators, iterator) | ||
| 1552 | } | ||
| 1553 | } | ||
| 1554 | } | ||
| 1555 | |||
| 1556 | // Append punctuation | ||
| 1557 | iterators = append(iterators, Literator(tokens[1])) | ||
| 1558 | |||
| 1559 | var valueTokenState string | ||
| 1560 | |||
| 1561 | switch state.NamedGroups[`opening_delimiters`] { | ||
| 1562 | case "(", "{", "[": | ||
| 1563 | valueTokenState = "root" | ||
| 1564 | case "<<", "«": | ||
| 1565 | valueTokenState = "ww" | ||
| 1566 | case "<": | ||
| 1567 | valueTokenState = "Q" | ||
| 1568 | } | ||
| 1569 | |||
| 1570 | // Use token state to Tokenise value | ||
| 1571 | if valueTokenState != "" { | ||
| 1572 | iterator, err := state.Lexer.Tokenise( | ||
| 1573 | &TokeniseOptions{ | ||
| 1574 | State: valueTokenState, | ||
| 1575 | Nested: true, | ||
| 1576 | }, state.NamedGroups[`value`]) | ||
| 1577 | |||
| 1578 | if err != nil { | ||
| 1579 | panic(err) | ||
| 1580 | } else { | ||
| 1581 | // Append value | ||
| 1582 | iterators = append(iterators, iterator) | ||
| 1583 | } | ||
| 1584 | } | ||
| 1585 | // Append last punctuation | ||
| 1586 | iterators = append(iterators, Literator(tokens[2])) | ||
| 1587 | |||
| 1588 | return Concaterator(iterators...) | ||
| 1589 | }) | ||
| 1590 | } | ||
| 1591 | |||
| 1592 | // Emitter for quoting constructs, changes token state based on quote name and adverbs | ||
| 1593 | func quote(groups []string, state *LexerState) Iterator { | ||
| 1594 | keyword := state.NamedGroups[`keyword`] | ||
| 1595 | adverbsStr := state.NamedGroups[`adverbs`] | ||
| 1596 | iterators := []Iterator{} | ||
| 1597 | tokens := []Token{ | ||
| 1598 | {Keyword, keyword}, | ||
| 1599 | {StringAffix, adverbsStr}, | ||
| 1600 | {Text, state.NamedGroups[`ws`]}, | ||
| 1601 | {Punctuation, state.NamedGroups[`opening_delimiters`]}, | ||
| 1602 | {Punctuation, state.NamedGroups[`closing_delimiters`]}, | ||
| 1603 | } | ||
| 1604 | |||
| 1605 | // Append all tokens before dealing with the main string | ||
| 1606 | iterators = append(iterators, Literator(tokens[:4]...)) | ||
| 1607 | |||
| 1608 | var tokenStates []string | ||
| 1609 | |||
| 1610 | // Set tokenStates based on adverbs | ||
| 1611 | adverbs := strings.Split(adverbsStr, ":") | ||
| 1612 | for _, adverb := range adverbs { | ||
| 1613 | switch adverb { | ||
| 1614 | case "c", "closure": | ||
| 1615 | tokenStates = append(tokenStates, "Q-closure") | ||
| 1616 | case "qq": | ||
| 1617 | tokenStates = append(tokenStates, "qq") | ||
| 1618 | case "ww": | ||
| 1619 | tokenStates = append(tokenStates, "ww") | ||
| 1620 | case "s", "scalar", "a", "array", "h", "hash", "f", "function": | ||
| 1621 | tokenStates = append(tokenStates, "Q-variable") | ||
| 1622 | } | ||
| 1623 | } | ||
| 1624 | |||
| 1625 | var tokenState string | ||
| 1626 | |||
| 1627 | switch { | ||
| 1628 | case keyword == "qq" || contains(tokenStates, "qq"): | ||
| 1629 | tokenState = "qq" | ||
| 1630 | case adverbsStr == "ww" || contains(tokenStates, "ww"): | ||
| 1631 | tokenState = "ww" | ||
| 1632 | case contains(tokenStates, "Q-closure") && contains(tokenStates, "Q-variable"): | ||
| 1633 | tokenState = "qq" | ||
| 1634 | case contains(tokenStates, "Q-closure"): | ||
| 1635 | tokenState = "Q-closure" | ||
| 1636 | case contains(tokenStates, "Q-variable"): | ||
| 1637 | tokenState = "Q-variable" | ||
| 1638 | default: | ||
| 1639 | tokenState = "Q" | ||
| 1640 | } | ||
| 1641 | |||
| 1642 | iterator, err := state.Lexer.Tokenise( | ||
| 1643 | &TokeniseOptions{ | ||
| 1644 | State: tokenState, | ||
| 1645 | Nested: true, | ||
| 1646 | }, state.NamedGroups[`value`]) | ||
| 1647 | |||
| 1648 | if err != nil { | ||
| 1649 | panic(err) | ||
| 1650 | } else { | ||
| 1651 | iterators = append(iterators, iterator) | ||
| 1652 | } | ||
| 1653 | |||
| 1654 | // Append the last punctuation | ||
| 1655 | iterators = append(iterators, Literator(tokens[4])) | ||
| 1656 | |||
| 1657 | return Concaterator(iterators...) | ||
| 1658 | } | ||
| 1659 | |||
| 1660 | // Emitter for pod config, tokenises the properties with "colon-pair-attribute" state | ||
| 1661 | func podConfig(groups []string, state *LexerState) Iterator { | ||
| 1662 | // Tokenise pod config | ||
| 1663 | iterator, err := state.Lexer.Tokenise( | ||
| 1664 | &TokeniseOptions{ | ||
| 1665 | State: "colon-pair-attribute", | ||
| 1666 | Nested: true, | ||
| 1667 | }, groups[0]) | ||
| 1668 | |||
| 1669 | if err != nil { | ||
| 1670 | panic(err) | ||
| 1671 | } else { | ||
| 1672 | return iterator | ||
| 1673 | } | ||
| 1674 | } | ||
| 1675 | |||
| 1676 | // Emitter for pod code, tokenises the code based on the lang specified | ||
| 1677 | func podCode(groups []string, state *LexerState) Iterator { | ||
| 1678 | iterators := []Iterator{} | ||
| 1679 | tokens := []Token{ | ||
| 1680 | {Comment, state.NamedGroups[`ws`]}, | ||
| 1681 | {Keyword, state.NamedGroups[`keyword`]}, | ||
| 1682 | {Keyword, state.NamedGroups[`ws2`]}, | ||
| 1683 | {Keyword, state.NamedGroups[`name`]}, | ||
| 1684 | {StringDoc, state.NamedGroups[`value`]}, | ||
| 1685 | {Comment, state.NamedGroups[`ws3`]}, | ||
| 1686 | {Keyword, state.NamedGroups[`end_keyword`]}, | ||
| 1687 | {Keyword, state.NamedGroups[`ws4`]}, | ||
| 1688 | {Keyword, state.NamedGroups[`name`]}, | ||
| 1689 | } | ||
| 1690 | |||
| 1691 | // Append all tokens before dealing with the pod config | ||
| 1692 | iterators = append(iterators, Literator(tokens[:4]...)) | ||
| 1693 | |||
| 1694 | // Tokenise pod config | ||
| 1695 | iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state)) | ||
| 1696 | |||
| 1697 | langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`]) | ||
| 1698 | var lang string | ||
| 1699 | if len(langMatch) > 1 { | ||
| 1700 | lang = langMatch[1] | ||
| 1701 | } | ||
| 1702 | |||
| 1703 | // Tokenise code based on lang property | ||
| 1704 | sublexer := Get(lang) | ||
| 1705 | if sublexer != nil { | ||
| 1706 | iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`]) | ||
| 1707 | |||
| 1708 | if err != nil { | ||
| 1709 | panic(err) | ||
| 1710 | } else { | ||
| 1711 | iterators = append(iterators, iterator) | ||
| 1712 | } | ||
| 1713 | } else { | ||
| 1714 | iterators = append(iterators, Literator(tokens[4])) | ||
| 1715 | } | ||
| 1716 | |||
| 1717 | // Append the rest of the tokens | ||
| 1718 | iterators = append(iterators, Literator(tokens[5:]...)) | ||
| 1719 | |||
| 1720 | return Concaterator(iterators...) | ||
| 1721 | } | ||
