summaryrefslogtreecommitdiff
path: root/vendor/github.com/dlclark
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/dlclark')
-rw-r--r--vendor/github.com/dlclark/regexp2/.gitignore27
-rw-r--r--vendor/github.com/dlclark/regexp2/.travis.yml7
-rw-r--r--vendor/github.com/dlclark/regexp2/ATTRIB133
-rw-r--r--vendor/github.com/dlclark/regexp2/LICENSE21
-rw-r--r--vendor/github.com/dlclark/regexp2/README.md107
-rw-r--r--vendor/github.com/dlclark/regexp2/match.go347
-rw-r--r--vendor/github.com/dlclark/regexp2/regexp.go356
-rw-r--r--vendor/github.com/dlclark/regexp2/replace.go177
-rw-r--r--vendor/github.com/dlclark/regexp2/runner.go1634
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/charclass.go865
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/code.go274
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/escape.go94
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/fuzz.go20
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/parser.go2251
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/prefix.go896
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/replacerdata.go87
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/tree.go654
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/writer.go500
-rw-r--r--vendor/github.com/dlclark/regexp2/testoutput17061
19 files changed, 15511 insertions, 0 deletions
diff --git a/vendor/github.com/dlclark/regexp2/.gitignore b/vendor/github.com/dlclark/regexp2/.gitignore
new file mode 100644
index 0000000..fb844c3
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/.gitignore
@@ -0,0 +1,27 @@
1# Compiled Object files, Static and Dynamic libs (Shared Objects)
2*.o
3*.a
4*.so
5
6# Folders
7_obj
8_test
9
10# Architecture specific extensions/prefixes
11*.[568vq]
12[568vq].out
13
14*.cgo1.go
15*.cgo2.c
16_cgo_defun.c
17_cgo_gotypes.go
18_cgo_export.*
19
20_testmain.go
21
22*.exe
23*.test
24*.prof
25*.out
26
27.DS_Store
diff --git a/vendor/github.com/dlclark/regexp2/.travis.yml b/vendor/github.com/dlclark/regexp2/.travis.yml
new file mode 100644
index 0000000..a2da6be
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/.travis.yml
@@ -0,0 +1,7 @@
1language: go
2arch:
3 - AMD64
4 - ppc64le
5go:
6 - 1.9
7 - tip
diff --git a/vendor/github.com/dlclark/regexp2/ATTRIB b/vendor/github.com/dlclark/regexp2/ATTRIB
new file mode 100644
index 0000000..cdf4560
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/ATTRIB
@@ -0,0 +1,133 @@
1============
2These pieces of code were ported from dotnet/corefx:
3
4syntax/charclass.go (from RegexCharClass.cs): ported to use the built-in Go unicode classes. Canonicalize is
5 a direct port, but most of the other code required large changes because the C# implementation
6 used a string to represent the CharSet data structure and I cleaned that up in my implementation.
7
8syntax/code.go (from RegexCode.cs): ported literally with various cleanups and layout to make it more Go-ish.
9
10syntax/escape.go (from RegexParser.cs): ported Escape method and added some optimizations. Unescape is inspired by
11 the C# implementation but couldn't be directly ported because of the lack of do-while syntax in Go.
12
13syntax/parser.go (from RegexpParser.cs and RegexOptions.cs): ported parser struct and associated methods as
14 literally as possible. Several language differences required changes. E.g. lack pre/post-fix increments as
15 expressions, lack of do-while loops, lack of overloads, etc.
16
17syntax/prefix.go (from RegexFCD.cs and RegexBoyerMoore.cs): ported as literally as possible and added support
18 for unicode chars that are longer than the 16-bit char in C# for the 32-bit rune in Go.
19
20syntax/replacerdata.go (from RegexReplacement.cs): conceptually ported and re-organized to handle differences
21 in charclass implementation, and fix odd code layout between RegexParser.cs, Regex.cs, and RegexReplacement.cs.
22
23syntax/tree.go (from RegexTree.cs and RegexNode.cs): ported literally as possible.
24
25syntax/writer.go (from RegexWriter.cs): ported literally with minor changes to make it more Go-ish.
26
27match.go (from RegexMatch.cs): ported, simplified, and changed to handle Go's lack of inheritence.
28
29regexp.go (from Regex.cs and RegexOptions.cs): conceptually serves the same "starting point", but is simplified
30 and changed to handle differences in C# strings and Go strings/runes.
31
32replace.go (from RegexReplacement.cs): ported closely and then cleaned up to combine the MatchEvaluator and
33 simple string replace implementations.
34
35runner.go (from RegexRunner.cs): ported literally as possible.
36
37regexp_test.go (from CaptureTests.cs and GroupNamesAndNumbers.cs): conceptually ported, but the code was
38 manually structured like Go tests.
39
40replace_test.go (from RegexReplaceStringTest0.cs): conceptually ported
41
42rtl_test.go (from RightToLeft.cs): conceptually ported
43---
44dotnet/corefx was released under this license:
45
46The MIT License (MIT)
47
48Copyright (c) Microsoft Corporation
49
50Permission is hereby granted, free of charge, to any person obtaining a copy
51of this software and associated documentation files (the "Software"), to deal
52in the Software without restriction, including without limitation the rights
53to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
54copies of the Software, and to permit persons to whom the Software is
55furnished to do so, subject to the following conditions:
56
57The above copyright notice and this permission notice shall be included in all
58copies or substantial portions of the Software.
59
60THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
61IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
62FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
63AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
64LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
65OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
66SOFTWARE.
67
68============
69These pieces of code are copied from the Go framework:
70
71- The overall directory structure of regexp2 was inspired by the Go runtime regexp package.
72- The optimization in the escape method of syntax/escape.go is from the Go runtime QuoteMeta() func in regexp/regexp.go
73- The method signatures in regexp.go are designed to match the Go framework regexp methods closely
74- func regexp2.MustCompile and func quote are almost identifical to the regexp package versions
75- BenchmarkMatch* and TestProgramTooLong* funcs in regexp_performance_test.go were copied from the framework
76 regexp/exec_test.go
77---
78The Go framework was released under this license:
79
80Copyright (c) 2012 The Go Authors. All rights reserved.
81
82Redistribution and use in source and binary forms, with or without
83modification, are permitted provided that the following conditions are
84met:
85
86 * Redistributions of source code must retain the above copyright
87notice, this list of conditions and the following disclaimer.
88 * Redistributions in binary form must reproduce the above
89copyright notice, this list of conditions and the following disclaimer
90in the documentation and/or other materials provided with the
91distribution.
92 * Neither the name of Google Inc. nor the names of its
93contributors may be used to endorse or promote products derived from
94this software without specific prior written permission.
95
96THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
97"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
98LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
99A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
100OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
101SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
102LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
103DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
104THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
105(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
106OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
107
108============
109Some test data were gathered from the Mono project.
110
111regexp_mono_test.go: ported from https://github.com/mono/mono/blob/master/mcs/class/System/Test/System.Text.RegularExpressions/PerlTrials.cs
112---
113Mono tests released under this license:
114
115Permission is hereby granted, free of charge, to any person obtaining
116a copy of this software and associated documentation files (the
117"Software"), to deal in the Software without restriction, including
118without limitation the rights to use, copy, modify, merge, publish,
119distribute, sublicense, and/or sell copies of the Software, and to
120permit persons to whom the Software is furnished to do so, subject to
121the following conditions:
122
123The above copyright notice and this permission notice shall be
124included in all copies or substantial portions of the Software.
125
126THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
127EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
128MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
129NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
130LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
131OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
132WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
133
diff --git a/vendor/github.com/dlclark/regexp2/LICENSE b/vendor/github.com/dlclark/regexp2/LICENSE
new file mode 100644
index 0000000..fe83dfd
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/LICENSE
@@ -0,0 +1,21 @@
1The MIT License (MIT)
2
3Copyright (c) Doug Clark
4
5Permission is hereby granted, free of charge, to any person obtaining a copy
6of this software and associated documentation files (the "Software"), to deal
7in the Software without restriction, including without limitation the rights
8to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9copies of the Software, and to permit persons to whom the Software is
10furnished to do so, subject to the following conditions:
11
12The above copyright notice and this permission notice shall be included in all
13copies or substantial portions of the Software.
14
15THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21SOFTWARE.
diff --git a/vendor/github.com/dlclark/regexp2/README.md b/vendor/github.com/dlclark/regexp2/README.md
new file mode 100644
index 0000000..b404471
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/README.md
@@ -0,0 +1,107 @@
1# regexp2 - full featured regular expressions for Go
2Regexp2 is a feature-rich RegExp engine for Go. It doesn't have constant time guarantees like the built-in `regexp` package, but it allows backtracking and is compatible with Perl5 and .NET. You'll likely be better off with the RE2 engine from the `regexp` package and should only use this if you need to write very complex patterns or require compatibility with .NET.
3
4## Basis of the engine
5The engine is ported from the .NET framework's System.Text.RegularExpressions.Regex engine. That engine was open sourced in 2015 under the MIT license. There are some fundamental differences between .NET strings and Go strings that required a bit of borrowing from the Go framework regex engine as well. I cleaned up a couple of the dirtier bits during the port (regexcharclass.cs was terrible), but the parse tree, code emmitted, and therefore patterns matched should be identical.
6
7## Installing
8This is a go-gettable library, so install is easy:
9
10 go get github.com/dlclark/regexp2/...
11
12## Usage
13Usage is similar to the Go `regexp` package. Just like in `regexp`, you start by converting a regex into a state machine via the `Compile` or `MustCompile` methods. They ultimately do the same thing, but `MustCompile` will panic if the regex is invalid. You can then use the provided `Regexp` struct to find matches repeatedly. A `Regexp` struct is safe to use across goroutines.
14
15```go
16re := regexp2.MustCompile(`Your pattern`, 0)
17if isMatch, _ := re.MatchString(`Something to match`); isMatch {
18 //do something
19}
20```
21
22The only error that the `*Match*` methods *should* return is a Timeout if you set the `re.MatchTimeout` field. Any other error is a bug in the `regexp2` package. If you need more details about capture groups in a match then use the `FindStringMatch` method, like so:
23
24```go
25if m, _ := re.FindStringMatch(`Something to match`); m != nil {
26 // the whole match is always group 0
27 fmt.Printf("Group 0: %v\n", m.String())
28
29 // you can get all the groups too
30 gps := m.Groups()
31
32 // a group can be captured multiple times, so each cap is separately addressable
33 fmt.Printf("Group 1, first capture", gps[1].Captures[0].String())
34 fmt.Printf("Group 1, second capture", gps[1].Captures[1].String())
35}
36```
37
38Group 0 is embedded in the Match. Group 0 is an automatically-assigned group that encompasses the whole pattern. This means that `m.String()` is the same as `m.Group.String()` and `m.Groups()[0].String()`
39
40The __last__ capture is embedded in each group, so `g.String()` will return the same thing as `g.Capture.String()` and `g.Captures[len(g.Captures)-1].String()`.
41
42If you want to find multiple matches from a single input string you should use the `FindNextMatch` method. For example, to implement a function similar to `regexp.FindAllString`:
43
44```go
45func regexp2FindAllString(re *regexp2.Regexp, s string) []string {
46 var matches []string
47 m, _ := re.FindStringMatch(s)
48 for m != nil {
49 matches = append(matches, m.String())
50 m, _ = re.FindNextMatch(m)
51 }
52 return matches
53}
54```
55
56`FindNextMatch` is optmized so that it re-uses the underlying string/rune slice.
57
58The internals of `regexp2` always operate on `[]rune` so `Index` and `Length` data in a `Match` always reference a position in `rune`s rather than `byte`s (even if the input was given as a string). This is a dramatic difference between `regexp` and `regexp2`. It's advisable to use the provided `String()` methods to avoid having to work with indices.
59
60## Compare `regexp` and `regexp2`
61| Category | regexp | regexp2 |
62| --- | --- | --- |
63| Catastrophic backtracking possible | no, constant execution time guarantees | yes, if your pattern is at risk you can use the `re.MatchTimeout` field |
64| Python-style capture groups `(?P<name>re)` | yes | no (yes in RE2 compat mode) |
65| .NET-style capture groups `(?<name>re)` or `(?'name're)` | no | yes |
66| comments `(?#comment)` | no | yes |
67| branch numbering reset `(?\|a\|b)` | no | no |
68| possessive match `(?>re)` | no | yes |
69| positive lookahead `(?=re)` | no | yes |
70| negative lookahead `(?!re)` | no | yes |
71| positive lookbehind `(?<=re)` | no | yes |
72| negative lookbehind `(?<!re)` | no | yes |
73| back reference `\1` | no | yes |
74| named back reference `\k'name'` | no | yes |
75| named ascii character class `[[:foo:]]`| yes | no (yes in RE2 compat mode) |
76| conditionals `(?(expr)yes\|no)` | no | yes |
77
78## RE2 compatibility mode
79The default behavior of `regexp2` is to match the .NET regexp engine, however the `RE2` option is provided to change the parsing to increase compatibility with RE2. Using the `RE2` option when compiling a regexp will not take away any features, but will change the following behaviors:
80* add support for named ascii character classes (e.g. `[[:foo:]]`)
81* add support for python-style capture groups (e.g. `(P<name>re)`)
82* change singleline behavior for `$` to only match end of string (like RE2) (see [#24](https://github.com/dlclark/regexp2/issues/24))
83* change the character classes `\d` `\s` and `\w` to match the same characters as RE2. NOTE: if you also use the `ECMAScript` option then this will change the `\s` character class to match ECMAScript instead of RE2. ECMAScript allows more whitespace characters in `\s` than RE2 (but still fewer than the the default behavior).
84* allow character escape sequences to have defaults. For example, by default `\_` isn't a known character escape and will fail to compile, but in RE2 mode it will match the literal character `_`
85
86```go
87re := regexp2.MustCompile(`Your RE2-compatible pattern`, regexp2.RE2)
88if isMatch, _ := re.MatchString(`Something to match`); isMatch {
89 //do something
90}
91```
92
93This feature is a work in progress and I'm open to ideas for more things to put here (maybe more relaxed character escaping rules?).
94
95## ECMAScript compatibility mode
96In this mode the engine provides compatibility with the [regex engine](https://tc39.es/ecma262/multipage/text-processing.html#sec-regexp-regular-expression-objects) described in the ECMAScript specification.
97
98Additionally a Unicode mode is provided which allows parsing of `\u{CodePoint}` syntax that is only when both are provided.
99
100## Library features that I'm still working on
101- Regex split
102
103## Potential bugs
104I've run a battery of tests against regexp2 from various sources and found the debug output matches the .NET engine, but .NET and Go handle strings very differently. I've attempted to handle these differences, but most of my testing deals with basic ASCII with a little bit of multi-byte Unicode. There's a chance that there are bugs in the string handling related to character sets with supplementary Unicode chars. Right-to-Left support is coded, but not well tested either.
105
106## Find a bug?
107I'm open to new issues and pull requests with tests if you find something odd!
diff --git a/vendor/github.com/dlclark/regexp2/match.go b/vendor/github.com/dlclark/regexp2/match.go
new file mode 100644
index 0000000..1871cff
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/match.go
@@ -0,0 +1,347 @@
1package regexp2
2
3import (
4 "bytes"
5 "fmt"
6)
7
8// Match is a single regex result match that contains groups and repeated captures
9// -Groups
10// -Capture
11type Match struct {
12 Group //embeded group 0
13
14 regex *Regexp
15 otherGroups []Group
16
17 // input to the match
18 textpos int
19 textstart int
20
21 capcount int
22 caps []int
23 sparseCaps map[int]int
24
25 // output from the match
26 matches [][]int
27 matchcount []int
28
29 // whether we've done any balancing with this match. If we
30 // have done balancing, we'll need to do extra work in Tidy().
31 balancing bool
32}
33
34// Group is an explicit or implit (group 0) matched group within the pattern
35type Group struct {
36 Capture // the last capture of this group is embeded for ease of use
37
38 Name string // group name
39 Captures []Capture // captures of this group
40}
41
42// Capture is a single capture of text within the larger original string
43type Capture struct {
44 // the original string
45 text []rune
46 // the position in the original string where the first character of
47 // captured substring was found.
48 Index int
49 // the length of the captured substring.
50 Length int
51}
52
53// String returns the captured text as a String
54func (c *Capture) String() string {
55 return string(c.text[c.Index : c.Index+c.Length])
56}
57
58// Runes returns the captured text as a rune slice
59func (c *Capture) Runes() []rune {
60 return c.text[c.Index : c.Index+c.Length]
61}
62
63func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
64 m := Match{
65 regex: regex,
66 matchcount: make([]int, capcount),
67 matches: make([][]int, capcount),
68 textstart: startpos,
69 balancing: false,
70 }
71 m.Name = "0"
72 m.text = text
73 m.matches[0] = make([]int, 2)
74 return &m
75}
76
77func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
78 m := newMatch(regex, capcount, text, startpos)
79 m.sparseCaps = caps
80 return m
81}
82
83func (m *Match) reset(text []rune, textstart int) {
84 m.text = text
85 m.textstart = textstart
86 for i := 0; i < len(m.matchcount); i++ {
87 m.matchcount[i] = 0
88 }
89 m.balancing = false
90}
91
92func (m *Match) tidy(textpos int) {
93
94 interval := m.matches[0]
95 m.Index = interval[0]
96 m.Length = interval[1]
97 m.textpos = textpos
98 m.capcount = m.matchcount[0]
99 //copy our root capture to the list
100 m.Group.Captures = []Capture{m.Group.Capture}
101
102 if m.balancing {
103 // The idea here is that we want to compact all of our unbalanced captures. To do that we
104 // use j basically as a count of how many unbalanced captures we have at any given time
105 // (really j is an index, but j/2 is the count). First we skip past all of the real captures
106 // until we find a balance captures. Then we check each subsequent entry. If it's a balance
107 // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
108 // it down to the last free position.
109 for cap := 0; cap < len(m.matchcount); cap++ {
110 limit := m.matchcount[cap] * 2
111 matcharray := m.matches[cap]
112
113 var i, j int
114
115 for i = 0; i < limit; i++ {
116 if matcharray[i] < 0 {
117 break
118 }
119 }
120
121 for j = i; i < limit; i++ {
122 if matcharray[i] < 0 {
123 // skip negative values
124 j--
125 } else {
126 // but if we find something positive (an actual capture), copy it back to the last
127 // unbalanced position.
128 if i != j {
129 matcharray[j] = matcharray[i]
130 }
131 j++
132 }
133 }
134
135 m.matchcount[cap] = j / 2
136 }
137
138 m.balancing = false
139 }
140}
141
142// isMatched tells if a group was matched by capnum
143func (m *Match) isMatched(cap int) bool {
144 return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
145}
146
147// matchIndex returns the index of the last specified matched group by capnum
148func (m *Match) matchIndex(cap int) int {
149 i := m.matches[cap][m.matchcount[cap]*2-2]
150 if i >= 0 {
151 return i
152 }
153
154 return m.matches[cap][-3-i]
155}
156
157// matchLength returns the length of the last specified matched group by capnum
158func (m *Match) matchLength(cap int) int {
159 i := m.matches[cap][m.matchcount[cap]*2-1]
160 if i >= 0 {
161 return i
162 }
163
164 return m.matches[cap][-3-i]
165}
166
167// Nonpublic builder: add a capture to the group specified by "c"
168func (m *Match) addMatch(c, start, l int) {
169
170 if m.matches[c] == nil {
171 m.matches[c] = make([]int, 2)
172 }
173
174 capcount := m.matchcount[c]
175
176 if capcount*2+2 > len(m.matches[c]) {
177 oldmatches := m.matches[c]
178 newmatches := make([]int, capcount*8)
179 copy(newmatches, oldmatches[:capcount*2])
180 m.matches[c] = newmatches
181 }
182
183 m.matches[c][capcount*2] = start
184 m.matches[c][capcount*2+1] = l
185 m.matchcount[c] = capcount + 1
186 //log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
187}
188
189// Nonpublic builder: Add a capture to balance the specified group. This is used by the
190// balanced match construct. (?<foo-foo2>...)
191//
192// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
193// However, since we have backtracking, we need to keep track of everything.
194func (m *Match) balanceMatch(c int) {
195 m.balancing = true
196
197 // we'll look at the last capture first
198 capcount := m.matchcount[c]
199 target := capcount*2 - 2
200
201 // first see if it is negative, and therefore is a reference to the next available
202 // capture group for balancing. If it is, we'll reset target to point to that capture.
203 if m.matches[c][target] < 0 {
204 target = -3 - m.matches[c][target]
205 }
206
207 // move back to the previous capture
208 target -= 2
209
210 // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
211 if target >= 0 && m.matches[c][target] < 0 {
212 m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
213 } else {
214 m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
215 }
216}
217
218// Nonpublic builder: removes a group match by capnum
219func (m *Match) removeMatch(c int) {
220 m.matchcount[c]--
221}
222
223// GroupCount returns the number of groups this match has matched
224func (m *Match) GroupCount() int {
225 return len(m.matchcount)
226}
227
228// GroupByName returns a group based on the name of the group, or nil if the group name does not exist
229func (m *Match) GroupByName(name string) *Group {
230 num := m.regex.GroupNumberFromName(name)
231 if num < 0 {
232 return nil
233 }
234 return m.GroupByNumber(num)
235}
236
237// GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
238func (m *Match) GroupByNumber(num int) *Group {
239 // check our sparse map
240 if m.sparseCaps != nil {
241 if newNum, ok := m.sparseCaps[num]; ok {
242 num = newNum
243 }
244 }
245 if num >= len(m.matchcount) || num < 0 {
246 return nil
247 }
248
249 if num == 0 {
250 return &m.Group
251 }
252
253 m.populateOtherGroups()
254
255 return &m.otherGroups[num-1]
256}
257
258// Groups returns all the capture groups, starting with group 0 (the full match)
259func (m *Match) Groups() []Group {
260 m.populateOtherGroups()
261 g := make([]Group, len(m.otherGroups)+1)
262 g[0] = m.Group
263 copy(g[1:], m.otherGroups)
264 return g
265}
266
267func (m *Match) populateOtherGroups() {
268 // Construct all the Group objects first time called
269 if m.otherGroups == nil {
270 m.otherGroups = make([]Group, len(m.matchcount)-1)
271 for i := 0; i < len(m.otherGroups); i++ {
272 m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
273 }
274 }
275}
276
277func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
278 c := m.matchcount[groupnum]
279 if c == 0 {
280 return
281 }
282
283 matches := m.matches[groupnum]
284
285 index := matches[(c-1)*2]
286 last := index + matches[(c*2)-1]
287
288 for ; index < last; index++ {
289 buf.WriteRune(m.text[index])
290 }
291}
292
293func newGroup(name string, text []rune, caps []int, capcount int) Group {
294 g := Group{}
295 g.text = text
296 if capcount > 0 {
297 g.Index = caps[(capcount-1)*2]
298 g.Length = caps[(capcount*2)-1]
299 }
300 g.Name = name
301 g.Captures = make([]Capture, capcount)
302 for i := 0; i < capcount; i++ {
303 g.Captures[i] = Capture{
304 text: text,
305 Index: caps[i*2],
306 Length: caps[i*2+1],
307 }
308 }
309 //log.Printf("newGroup! capcount %v, %+v", capcount, g)
310
311 return g
312}
313
314func (m *Match) dump() string {
315 buf := &bytes.Buffer{}
316 buf.WriteRune('\n')
317 if len(m.sparseCaps) > 0 {
318 for k, v := range m.sparseCaps {
319 fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
320 }
321 }
322
323 for i, g := range m.Groups() {
324 fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
325
326 for _, c := range g.Captures {
327 fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String())
328 }
329 }
330 /*
331 for i := 0; i < len(m.matchcount); i++ {
332 fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
333
334 for j := 0; j < m.matchcount[i]; j++ {
335 text := ""
336
337 if m.matches[i][j*2] >= 0 {
338 start := m.matches[i][j*2]
339 text = m.text[start : start+m.matches[i][j*2+1]]
340 }
341
342 fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
343 }
344 }
345 */
346 return buf.String()
347}
diff --git a/vendor/github.com/dlclark/regexp2/regexp.go b/vendor/github.com/dlclark/regexp2/regexp.go
new file mode 100644
index 0000000..818c766
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/regexp.go
@@ -0,0 +1,356 @@
1/*
2Package regexp2 is a regexp package that has an interface similar to Go's framework regexp engine but uses a
3more feature full regex engine behind the scenes.
4
5It doesn't have constant time guarantees, but it allows backtracking and is compatible with Perl5 and .NET.
6You'll likely be better off with the RE2 engine from the regexp package and should only use this if you
7need to write very complex patterns or require compatibility with .NET.
8*/
9package regexp2
10
11import (
12 "errors"
13 "math"
14 "strconv"
15 "sync"
16 "time"
17
18 "github.com/dlclark/regexp2/syntax"
19)
20
21// Default timeout used when running regexp matches -- "forever"
22var DefaultMatchTimeout = time.Duration(math.MaxInt64)
23
24// Regexp is the representation of a compiled regular expression.
25// A Regexp is safe for concurrent use by multiple goroutines.
26type Regexp struct {
27 //timeout when trying to find matches
28 MatchTimeout time.Duration
29
30 // read-only after Compile
31 pattern string // as passed to Compile
32 options RegexOptions // options
33
34 caps map[int]int // capnum->index
35 capnames map[string]int //capture group name -> index
36 capslist []string //sorted list of capture group names
37 capsize int // size of the capture array
38
39 code *syntax.Code // compiled program
40
41 // cache of machines for running regexp
42 muRun sync.Mutex
43 runner []*runner
44}
45
46// Compile parses a regular expression and returns, if successful,
47// a Regexp object that can be used to match against text.
48func Compile(expr string, opt RegexOptions) (*Regexp, error) {
49 // parse it
50 tree, err := syntax.Parse(expr, syntax.RegexOptions(opt))
51 if err != nil {
52 return nil, err
53 }
54
55 // translate it to code
56 code, err := syntax.Write(tree)
57 if err != nil {
58 return nil, err
59 }
60
61 // return it
62 return &Regexp{
63 pattern: expr,
64 options: opt,
65 caps: code.Caps,
66 capnames: tree.Capnames,
67 capslist: tree.Caplist,
68 capsize: code.Capsize,
69 code: code,
70 MatchTimeout: DefaultMatchTimeout,
71 }, nil
72}
73
74// MustCompile is like Compile but panics if the expression cannot be parsed.
75// It simplifies safe initialization of global variables holding compiled regular
76// expressions.
77func MustCompile(str string, opt RegexOptions) *Regexp {
78 regexp, error := Compile(str, opt)
79 if error != nil {
80 panic(`regexp2: Compile(` + quote(str) + `): ` + error.Error())
81 }
82 return regexp
83}
84
85// Escape adds backslashes to any special characters in the input string
86func Escape(input string) string {
87 return syntax.Escape(input)
88}
89
90// Unescape removes any backslashes from previously-escaped special characters in the input string
91func Unescape(input string) (string, error) {
92 return syntax.Unescape(input)
93}
94
95// String returns the source text used to compile the regular expression.
96func (re *Regexp) String() string {
97 return re.pattern
98}
99
100func quote(s string) string {
101 if strconv.CanBackquote(s) {
102 return "`" + s + "`"
103 }
104 return strconv.Quote(s)
105}
106
107// RegexOptions impact the runtime and parsing behavior
108// for each specific regex. They are setable in code as well
109// as in the regex pattern itself.
110type RegexOptions int32
111
112const (
113 None RegexOptions = 0x0
114 IgnoreCase = 0x0001 // "i"
115 Multiline = 0x0002 // "m"
116 ExplicitCapture = 0x0004 // "n"
117 Compiled = 0x0008 // "c"
118 Singleline = 0x0010 // "s"
119 IgnorePatternWhitespace = 0x0020 // "x"
120 RightToLeft = 0x0040 // "r"
121 Debug = 0x0080 // "d"
122 ECMAScript = 0x0100 // "e"
123 RE2 = 0x0200 // RE2 (regexp package) compatibility mode
124 Unicode = 0x0400 // "u"
125)
126
127func (re *Regexp) RightToLeft() bool {
128 return re.options&RightToLeft != 0
129}
130
131func (re *Regexp) Debug() bool {
132 return re.options&Debug != 0
133}
134
135// Replace searches the input string and replaces each match found with the replacement text.
136// Count will limit the number of matches attempted and startAt will allow
137// us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
138// Set startAt and count to -1 to go through the whole string
139func (re *Regexp) Replace(input, replacement string, startAt, count int) (string, error) {
140 data, err := syntax.NewReplacerData(replacement, re.caps, re.capsize, re.capnames, syntax.RegexOptions(re.options))
141 if err != nil {
142 return "", err
143 }
144 //TODO: cache ReplacerData
145
146 return replace(re, data, nil, input, startAt, count)
147}
148
149// ReplaceFunc searches the input string and replaces each match found using the string from the evaluator
150// Count will limit the number of matches attempted and startAt will allow
151// us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
152// Set startAt and count to -1 to go through the whole string.
153func (re *Regexp) ReplaceFunc(input string, evaluator MatchEvaluator, startAt, count int) (string, error) {
154 return replace(re, nil, evaluator, input, startAt, count)
155}
156
157// FindStringMatch searches the input string for a Regexp match
158func (re *Regexp) FindStringMatch(s string) (*Match, error) {
159 // convert string to runes
160 return re.run(false, -1, getRunes(s))
161}
162
163// FindRunesMatch searches the input rune slice for a Regexp match
164func (re *Regexp) FindRunesMatch(r []rune) (*Match, error) {
165 return re.run(false, -1, r)
166}
167
168// FindStringMatchStartingAt searches the input string for a Regexp match starting at the startAt index
169func (re *Regexp) FindStringMatchStartingAt(s string, startAt int) (*Match, error) {
170 if startAt > len(s) {
171 return nil, errors.New("startAt must be less than the length of the input string")
172 }
173 r, startAt := re.getRunesAndStart(s, startAt)
174 if startAt == -1 {
175 // we didn't find our start index in the string -- that's a problem
176 return nil, errors.New("startAt must align to the start of a valid rune in the input string")
177 }
178
179 return re.run(false, startAt, r)
180}
181
182// FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt index
183func (re *Regexp) FindRunesMatchStartingAt(r []rune, startAt int) (*Match, error) {
184 return re.run(false, startAt, r)
185}
186
187// FindNextMatch returns the next match in the same input string as the match parameter.
188// Will return nil if there is no next match or if given a nil match.
189func (re *Regexp) FindNextMatch(m *Match) (*Match, error) {
190 if m == nil {
191 return nil, nil
192 }
193
194 // If previous match was empty, advance by one before matching to prevent
195 // infinite loop
196 startAt := m.textpos
197 if m.Length == 0 {
198 if m.textpos == len(m.text) {
199 return nil, nil
200 }
201
202 if re.RightToLeft() {
203 startAt--
204 } else {
205 startAt++
206 }
207 }
208 return re.run(false, startAt, m.text)
209}
210
211// MatchString return true if the string matches the regex
212// error will be set if a timeout occurs
213func (re *Regexp) MatchString(s string) (bool, error) {
214 m, err := re.run(true, -1, getRunes(s))
215 if err != nil {
216 return false, err
217 }
218 return m != nil, nil
219}
220
221func (re *Regexp) getRunesAndStart(s string, startAt int) ([]rune, int) {
222 if startAt < 0 {
223 if re.RightToLeft() {
224 r := getRunes(s)
225 return r, len(r)
226 }
227 return getRunes(s), 0
228 }
229 ret := make([]rune, len(s))
230 i := 0
231 runeIdx := -1
232 for strIdx, r := range s {
233 if strIdx == startAt {
234 runeIdx = i
235 }
236 ret[i] = r
237 i++
238 }
239 if startAt == len(s) {
240 runeIdx = i
241 }
242 return ret[:i], runeIdx
243}
244
245func getRunes(s string) []rune {
246 return []rune(s)
247}
248
249// MatchRunes return true if the runes matches the regex
250// error will be set if a timeout occurs
251func (re *Regexp) MatchRunes(r []rune) (bool, error) {
252 m, err := re.run(true, -1, r)
253 if err != nil {
254 return false, err
255 }
256 return m != nil, nil
257}
258
259// GetGroupNames Returns the set of strings used to name capturing groups in the expression.
260func (re *Regexp) GetGroupNames() []string {
261 var result []string
262
263 if re.capslist == nil {
264 result = make([]string, re.capsize)
265
266 for i := 0; i < len(result); i++ {
267 result[i] = strconv.Itoa(i)
268 }
269 } else {
270 result = make([]string, len(re.capslist))
271 copy(result, re.capslist)
272 }
273
274 return result
275}
276
277// GetGroupNumbers returns the integer group numbers corresponding to a group name.
278func (re *Regexp) GetGroupNumbers() []int {
279 var result []int
280
281 if re.caps == nil {
282 result = make([]int, re.capsize)
283
284 for i := 0; i < len(result); i++ {
285 result[i] = i
286 }
287 } else {
288 result = make([]int, len(re.caps))
289
290 for k, v := range re.caps {
291 result[v] = k
292 }
293 }
294
295 return result
296}
297
298// GroupNameFromNumber retrieves a group name that corresponds to a group number.
299// It will return "" for and unknown group number. Unnamed groups automatically
300// receive a name that is the decimal string equivalent of its number.
301func (re *Regexp) GroupNameFromNumber(i int) string {
302 if re.capslist == nil {
303 if i >= 0 && i < re.capsize {
304 return strconv.Itoa(i)
305 }
306
307 return ""
308 }
309
310 if re.caps != nil {
311 var ok bool
312 if i, ok = re.caps[i]; !ok {
313 return ""
314 }
315 }
316
317 if i >= 0 && i < len(re.capslist) {
318 return re.capslist[i]
319 }
320
321 return ""
322}
323
324// GroupNumberFromName returns a group number that corresponds to a group name.
325// Returns -1 if the name is not a recognized group name. Numbered groups
326// automatically get a group name that is the decimal string equivalent of its number.
327func (re *Regexp) GroupNumberFromName(name string) int {
328 // look up name if we have a hashtable of names
329 if re.capnames != nil {
330 if k, ok := re.capnames[name]; ok {
331 return k
332 }
333
334 return -1
335 }
336
337 // convert to an int if it looks like a number
338 result := 0
339 for i := 0; i < len(name); i++ {
340 ch := name[i]
341
342 if ch > '9' || ch < '0' {
343 return -1
344 }
345
346 result *= 10
347 result += int(ch - '0')
348 }
349
350 // return int if it's in range
351 if result >= 0 && result < re.capsize {
352 return result
353 }
354
355 return -1
356}
diff --git a/vendor/github.com/dlclark/regexp2/replace.go b/vendor/github.com/dlclark/regexp2/replace.go
new file mode 100644
index 0000000..0376bd9
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/replace.go
@@ -0,0 +1,177 @@
1package regexp2
2
3import (
4 "bytes"
5 "errors"
6
7 "github.com/dlclark/regexp2/syntax"
8)
9
10const (
11 replaceSpecials = 4
12 replaceLeftPortion = -1
13 replaceRightPortion = -2
14 replaceLastGroup = -3
15 replaceWholeString = -4
16)
17
18// MatchEvaluator is a function that takes a match and returns a replacement string to be used
19type MatchEvaluator func(Match) string
20
21// Three very similar algorithms appear below: replace (pattern),
22// replace (evaluator), and split.
23
24// Replace Replaces all occurrences of the regex in the string with the
25// replacement pattern.
26//
27// Note that the special case of no matches is handled on its own:
28// with no matches, the input string is returned unchanged.
29// The right-to-left case is split out because StringBuilder
30// doesn't handle right-to-left string building directly very well.
31func replace(regex *Regexp, data *syntax.ReplacerData, evaluator MatchEvaluator, input string, startAt, count int) (string, error) {
32 if count < -1 {
33 return "", errors.New("Count too small")
34 }
35 if count == 0 {
36 return "", nil
37 }
38
39 m, err := regex.FindStringMatchStartingAt(input, startAt)
40
41 if err != nil {
42 return "", err
43 }
44 if m == nil {
45 return input, nil
46 }
47
48 buf := &bytes.Buffer{}
49 text := m.text
50
51 if !regex.RightToLeft() {
52 prevat := 0
53 for m != nil {
54 if m.Index != prevat {
55 buf.WriteString(string(text[prevat:m.Index]))
56 }
57 prevat = m.Index + m.Length
58 if evaluator == nil {
59 replacementImpl(data, buf, m)
60 } else {
61 buf.WriteString(evaluator(*m))
62 }
63
64 count--
65 if count == 0 {
66 break
67 }
68 m, err = regex.FindNextMatch(m)
69 if err != nil {
70 return "", nil
71 }
72 }
73
74 if prevat < len(text) {
75 buf.WriteString(string(text[prevat:]))
76 }
77 } else {
78 prevat := len(text)
79 var al []string
80
81 for m != nil {
82 if m.Index+m.Length != prevat {
83 al = append(al, string(text[m.Index+m.Length:prevat]))
84 }
85 prevat = m.Index
86 if evaluator == nil {
87 replacementImplRTL(data, &al, m)
88 } else {
89 al = append(al, evaluator(*m))
90 }
91
92 count--
93 if count == 0 {
94 break
95 }
96 m, err = regex.FindNextMatch(m)
97 if err != nil {
98 return "", nil
99 }
100 }
101
102 if prevat > 0 {
103 buf.WriteString(string(text[:prevat]))
104 }
105
106 for i := len(al) - 1; i >= 0; i-- {
107 buf.WriteString(al[i])
108 }
109 }
110
111 return buf.String(), nil
112}
113
114// Given a Match, emits into the StringBuilder the evaluated
115// substitution pattern.
116func replacementImpl(data *syntax.ReplacerData, buf *bytes.Buffer, m *Match) {
117 for _, r := range data.Rules {
118
119 if r >= 0 { // string lookup
120 buf.WriteString(data.Strings[r])
121 } else if r < -replaceSpecials { // group lookup
122 m.groupValueAppendToBuf(-replaceSpecials-1-r, buf)
123 } else {
124 switch -replaceSpecials - 1 - r { // special insertion patterns
125 case replaceLeftPortion:
126 for i := 0; i < m.Index; i++ {
127 buf.WriteRune(m.text[i])
128 }
129 case replaceRightPortion:
130 for i := m.Index + m.Length; i < len(m.text); i++ {
131 buf.WriteRune(m.text[i])
132 }
133 case replaceLastGroup:
134 m.groupValueAppendToBuf(m.GroupCount()-1, buf)
135 case replaceWholeString:
136 for i := 0; i < len(m.text); i++ {
137 buf.WriteRune(m.text[i])
138 }
139 }
140 }
141 }
142}
143
144func replacementImplRTL(data *syntax.ReplacerData, al *[]string, m *Match) {
145 l := *al
146 buf := &bytes.Buffer{}
147
148 for _, r := range data.Rules {
149 buf.Reset()
150 if r >= 0 { // string lookup
151 l = append(l, data.Strings[r])
152 } else if r < -replaceSpecials { // group lookup
153 m.groupValueAppendToBuf(-replaceSpecials-1-r, buf)
154 l = append(l, buf.String())
155 } else {
156 switch -replaceSpecials - 1 - r { // special insertion patterns
157 case replaceLeftPortion:
158 for i := 0; i < m.Index; i++ {
159 buf.WriteRune(m.text[i])
160 }
161 case replaceRightPortion:
162 for i := m.Index + m.Length; i < len(m.text); i++ {
163 buf.WriteRune(m.text[i])
164 }
165 case replaceLastGroup:
166 m.groupValueAppendToBuf(m.GroupCount()-1, buf)
167 case replaceWholeString:
168 for i := 0; i < len(m.text); i++ {
169 buf.WriteRune(m.text[i])
170 }
171 }
172 l = append(l, buf.String())
173 }
174 }
175
176 *al = l
177}
diff --git a/vendor/github.com/dlclark/regexp2/runner.go b/vendor/github.com/dlclark/regexp2/runner.go
new file mode 100644
index 0000000..4d7f9b0
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/runner.go
@@ -0,0 +1,1634 @@
1package regexp2
2
3import (
4 "bytes"
5 "errors"
6 "fmt"
7 "math"
8 "strconv"
9 "strings"
10 "time"
11 "unicode"
12
13 "github.com/dlclark/regexp2/syntax"
14)
15
16type runner struct {
17 re *Regexp
18 code *syntax.Code
19
20 runtextstart int // starting point for search
21
22 runtext []rune // text to search
23 runtextpos int // current position in text
24 runtextend int
25
26 // The backtracking stack. Opcodes use this to store data regarding
27 // what they have matched and where to backtrack to. Each "frame" on
28 // the stack takes the form of [CodePosition Data1 Data2...], where
29 // CodePosition is the position of the current opcode and
30 // the data values are all optional. The CodePosition can be negative, and
31 // these values (also called "back2") are used by the BranchMark family of opcodes
32 // to indicate whether they are backtracking after a successful or failed
33 // match.
34 // When we backtrack, we pop the CodePosition off the stack, set the current
35 // instruction pointer to that code position, and mark the opcode
36 // with a backtracking flag ("Back"). Each opcode then knows how to
37 // handle its own data.
38 runtrack []int
39 runtrackpos int
40
41 // This stack is used to track text positions across different opcodes.
42 // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
43 // pair. SetMark records the text position before we match a*b. Then
44 // CaptureMark uses that position to figure out where the capture starts.
45 // Opcodes which push onto this stack are always paired with other opcodes
46 // which will pop the value from it later. A successful match should mean
47 // that this stack is empty.
48 runstack []int
49 runstackpos int
50
51 // The crawl stack is used to keep track of captures. Every time a group
52 // has a capture, we push its group number onto the runcrawl stack. In
53 // the case of a balanced match, we push BOTH groups onto the stack.
54 runcrawl []int
55 runcrawlpos int
56
57 runtrackcount int // count of states that may do backtracking
58
59 runmatch *Match // result object
60
61 ignoreTimeout bool
62 timeout time.Duration // timeout in milliseconds (needed for actual)
63 timeoutChecksToSkip int
64 timeoutAt time.Time
65
66 operator syntax.InstOp
67 codepos int
68 rightToLeft bool
69 caseInsensitive bool
70}
71
72// run searches for matches and can continue from the previous match
73//
74// quick is usually false, but can be true to not return matches, just put it in caches
75// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
76// input is the string to search for our regex pattern
77func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
78
79 // get a cached runner
80 runner := re.getRunner()
81 defer re.putRunner(runner)
82
83 if textstart < 0 {
84 if re.RightToLeft() {
85 textstart = len(input)
86 } else {
87 textstart = 0
88 }
89 }
90
91 return runner.scan(input, textstart, quick, re.MatchTimeout)
92}
93
94// Scans the string to find the first match. Uses the Match object
95// both to feed text in and as a place to store matches that come out.
96//
97// All the action is in the Go() method. Our
98// responsibility is to load up the class members before
99// calling Go.
100//
101// The optimizer can compute a set of candidate starting characters,
102// and we could use a separate method Skip() that will quickly scan past
103// any characters that we know can't match.
104func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
105 r.timeout = timeout
106 r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
107 r.runtextstart = textstart
108 r.runtext = rt
109 r.runtextend = len(rt)
110
111 stoppos := r.runtextend
112 bump := 1
113
114 if r.re.RightToLeft() {
115 bump = -1
116 stoppos = 0
117 }
118
119 r.runtextpos = textstart
120 initted := false
121
122 r.startTimeoutWatch()
123 for {
124 if r.re.Debug() {
125 //fmt.Printf("\nSearch content: %v\n", string(r.runtext))
126 fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
127 fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
128 }
129
130 if r.findFirstChar() {
131 if err := r.checkTimeout(); err != nil {
132 return nil, err
133 }
134
135 if !initted {
136 r.initMatch()
137 initted = true
138 }
139
140 if r.re.Debug() {
141 fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
142 }
143
144 if err := r.execute(); err != nil {
145 return nil, err
146 }
147
148 if r.runmatch.matchcount[0] > 0 {
149 // We'll return a match even if it touches a previous empty match
150 return r.tidyMatch(quick), nil
151 }
152
153 // reset state for another go
154 r.runtrackpos = len(r.runtrack)
155 r.runstackpos = len(r.runstack)
156 r.runcrawlpos = len(r.runcrawl)
157 }
158
159 // failure!
160
161 if r.runtextpos == stoppos {
162 r.tidyMatch(true)
163 return nil, nil
164 }
165
166 // Recognize leading []* and various anchors, and bump on failure accordingly
167
168 // r.bump by one and start again
169
170 r.runtextpos += bump
171 }
172 // We never get here
173}
174
175func (r *runner) execute() error {
176
177 r.goTo(0)
178
179 for {
180
181 if r.re.Debug() {
182 r.dumpState()
183 }
184
185 if err := r.checkTimeout(); err != nil {
186 return err
187 }
188
189 switch r.operator {
190 case syntax.Stop:
191 return nil
192
193 case syntax.Nothing:
194 break
195
196 case syntax.Goto:
197 r.goTo(r.operand(0))
198 continue
199
200 case syntax.Testref:
201 if !r.runmatch.isMatched(r.operand(0)) {
202 break
203 }
204 r.advance(1)
205 continue
206
207 case syntax.Lazybranch:
208 r.trackPush1(r.textPos())
209 r.advance(1)
210 continue
211
212 case syntax.Lazybranch | syntax.Back:
213 r.trackPop()
214 r.textto(r.trackPeek())
215 r.goTo(r.operand(0))
216 continue
217
218 case syntax.Setmark:
219 r.stackPush(r.textPos())
220 r.trackPush()
221 r.advance(0)
222 continue
223
224 case syntax.Nullmark:
225 r.stackPush(-1)
226 r.trackPush()
227 r.advance(0)
228 continue
229
230 case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
231 r.stackPop()
232 break
233
234 case syntax.Getmark:
235 r.stackPop()
236 r.trackPush1(r.stackPeek())
237 r.textto(r.stackPeek())
238 r.advance(0)
239 continue
240
241 case syntax.Getmark | syntax.Back:
242 r.trackPop()
243 r.stackPush(r.trackPeek())
244 break
245
246 case syntax.Capturemark:
247 if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
248 break
249 }
250 r.stackPop()
251 if r.operand(1) != -1 {
252 r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
253 } else {
254 r.capture(r.operand(0), r.stackPeek(), r.textPos())
255 }
256 r.trackPush1(r.stackPeek())
257
258 r.advance(2)
259
260 continue
261
262 case syntax.Capturemark | syntax.Back:
263 r.trackPop()
264 r.stackPush(r.trackPeek())
265 r.uncapture()
266 if r.operand(0) != -1 && r.operand(1) != -1 {
267 r.uncapture()
268 }
269
270 break
271
272 case syntax.Branchmark:
273 r.stackPop()
274
275 matched := r.textPos() - r.stackPeek()
276
277 if matched != 0 { // Nonempty match -> loop now
278 r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
279 r.stackPush(r.textPos()) // Make new mark
280 r.goTo(r.operand(0)) // Loop
281 } else { // Empty match -> straight now
282 r.trackPushNeg1(r.stackPeek()) // Save old mark
283 r.advance(1) // Straight
284 }
285 continue
286
287 case syntax.Branchmark | syntax.Back:
288 r.trackPopN(2)
289 r.stackPop()
290 r.textto(r.trackPeekN(1)) // Recall position
291 r.trackPushNeg1(r.trackPeek()) // Save old mark
292 r.advance(1) // Straight
293 continue
294
295 case syntax.Branchmark | syntax.Back2:
296 r.trackPop()
297 r.stackPush(r.trackPeek()) // Recall old mark
298 break // Backtrack
299
300 case syntax.Lazybranchmark:
301 {
302 // We hit this the first time through a lazy loop and after each
303 // successful match of the inner expression. It simply continues
304 // on and doesn't loop.
305 r.stackPop()
306
307 oldMarkPos := r.stackPeek()
308
309 if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
310 if oldMarkPos != -1 {
311 r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
312 } else {
313 r.trackPush2(r.textPos(), r.textPos())
314 }
315 } else {
316 // The inner expression found an empty match, so we'll go directly to 'back2' if we
317 // backtrack. In this case, we need to push something on the stack, since back2 pops.
318 // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
319 // position associated with that empty match.
320 r.stackPush(oldMarkPos)
321
322 r.trackPushNeg1(r.stackPeek()) // Save old mark
323 }
324 r.advance(1)
325 continue
326 }
327
328 case syntax.Lazybranchmark | syntax.Back:
329
330 // After the first time, Lazybranchmark | syntax.Back occurs
331 // with each iteration of the loop, and therefore with every attempted
332 // match of the inner expression. We'll try to match the inner expression,
333 // then go back to Lazybranchmark if successful. If the inner expression
334 // fails, we go to Lazybranchmark | syntax.Back2
335
336 r.trackPopN(2)
337 pos := r.trackPeekN(1)
338 r.trackPushNeg1(r.trackPeek()) // Save old mark
339 r.stackPush(pos) // Make new mark
340 r.textto(pos) // Recall position
341 r.goTo(r.operand(0)) // Loop
342 continue
343
344 case syntax.Lazybranchmark | syntax.Back2:
345 // The lazy loop has failed. We'll do a true backtrack and
346 // start over before the lazy loop.
347 r.stackPop()
348 r.trackPop()
349 r.stackPush(r.trackPeek()) // Recall old mark
350 break
351
352 case syntax.Setcount:
353 r.stackPush2(r.textPos(), r.operand(0))
354 r.trackPush()
355 r.advance(1)
356 continue
357
358 case syntax.Nullcount:
359 r.stackPush2(-1, r.operand(0))
360 r.trackPush()
361 r.advance(1)
362 continue
363
364 case syntax.Setcount | syntax.Back:
365 r.stackPopN(2)
366 break
367
368 case syntax.Nullcount | syntax.Back:
369 r.stackPopN(2)
370 break
371
372 case syntax.Branchcount:
373 // r.stackPush:
374 // 0: Mark
375 // 1: Count
376
377 r.stackPopN(2)
378 mark := r.stackPeek()
379 count := r.stackPeekN(1)
380 matched := r.textPos() - mark
381
382 if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
383 r.trackPushNeg2(mark, count) // Save old mark, count
384 r.advance(2) // Straight
385 } else { // Nonempty match -> count+loop now
386 r.trackPush1(mark) // remember mark
387 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
388 r.goTo(r.operand(0)) // Loop
389 }
390 continue
391
392 case syntax.Branchcount | syntax.Back:
393 // r.trackPush:
394 // 0: Previous mark
395 // r.stackPush:
396 // 0: Mark (= current pos, discarded)
397 // 1: Count
398 r.trackPop()
399 r.stackPopN(2)
400 if r.stackPeekN(1) > 0 { // Positive -> can go straight
401 r.textto(r.stackPeek()) // Zap to mark
402 r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
403 r.advance(2) // Straight
404 continue
405 }
406 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
407 break
408
409 case syntax.Branchcount | syntax.Back2:
410 // r.trackPush:
411 // 0: Previous mark
412 // 1: Previous count
413 r.trackPopN(2)
414 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
415 break // Backtrack
416
417 case syntax.Lazybranchcount:
418 // r.stackPush:
419 // 0: Mark
420 // 1: Count
421
422 r.stackPopN(2)
423 mark := r.stackPeek()
424 count := r.stackPeekN(1)
425
426 if count < 0 { // Negative count -> loop now
427 r.trackPushNeg1(mark) // Save old mark
428 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
429 r.goTo(r.operand(0)) // Loop
430 } else { // Nonneg count -> straight now
431 r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
432 r.advance(2) // Straight
433 }
434 continue
435
436 case syntax.Lazybranchcount | syntax.Back:
437 // r.trackPush:
438 // 0: Mark
439 // 1: Count
440 // 2: r.textPos
441
442 r.trackPopN(3)
443 mark := r.trackPeek()
444 textpos := r.trackPeekN(2)
445
446 if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
447 r.textto(textpos) // Recall position
448 r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
449 r.trackPushNeg1(mark) // Save old mark
450 r.goTo(r.operand(0)) // Loop
451 continue
452 } else { // Max loops or empty match -> backtrack
453 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
454 break // backtrack
455 }
456
457 case syntax.Lazybranchcount | syntax.Back2:
458 // r.trackPush:
459 // 0: Previous mark
460 // r.stackPush:
461 // 0: Mark (== current pos, discarded)
462 // 1: Count
463 r.trackPop()
464 r.stackPopN(2)
465 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
466 break // Backtrack
467
468 case syntax.Setjump:
469 r.stackPush2(r.trackpos(), r.crawlpos())
470 r.trackPush()
471 r.advance(0)
472 continue
473
474 case syntax.Setjump | syntax.Back:
475 r.stackPopN(2)
476 break
477
478 case syntax.Backjump:
479 // r.stackPush:
480 // 0: Saved trackpos
481 // 1: r.crawlpos
482 r.stackPopN(2)
483 r.trackto(r.stackPeek())
484
485 for r.crawlpos() != r.stackPeekN(1) {
486 r.uncapture()
487 }
488
489 break
490
491 case syntax.Forejump:
492 // r.stackPush:
493 // 0: Saved trackpos
494 // 1: r.crawlpos
495 r.stackPopN(2)
496 r.trackto(r.stackPeek())
497 r.trackPush1(r.stackPeekN(1))
498 r.advance(0)
499 continue
500
501 case syntax.Forejump | syntax.Back:
502 // r.trackPush:
503 // 0: r.crawlpos
504 r.trackPop()
505
506 for r.crawlpos() != r.trackPeek() {
507 r.uncapture()
508 }
509
510 break
511
512 case syntax.Bol:
513 if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
514 break
515 }
516 r.advance(0)
517 continue
518
519 case syntax.Eol:
520 if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
521 break
522 }
523 r.advance(0)
524 continue
525
526 case syntax.Boundary:
527 if !r.isBoundary(r.textPos(), 0, r.runtextend) {
528 break
529 }
530 r.advance(0)
531 continue
532
533 case syntax.Nonboundary:
534 if r.isBoundary(r.textPos(), 0, r.runtextend) {
535 break
536 }
537 r.advance(0)
538 continue
539
540 case syntax.ECMABoundary:
541 if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
542 break
543 }
544 r.advance(0)
545 continue
546
547 case syntax.NonECMABoundary:
548 if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
549 break
550 }
551 r.advance(0)
552 continue
553
554 case syntax.Beginning:
555 if r.leftchars() > 0 {
556 break
557 }
558 r.advance(0)
559 continue
560
561 case syntax.Start:
562 if r.textPos() != r.textstart() {
563 break
564 }
565 r.advance(0)
566 continue
567
568 case syntax.EndZ:
569 rchars := r.rightchars()
570 if rchars > 1 {
571 break
572 }
573 // RE2 and EcmaScript define $ as "asserts position at the end of the string"
574 // PCRE/.NET adds "or before the line terminator right at the end of the string (if any)"
575 if (r.re.options & (RE2 | ECMAScript)) != 0 {
576 // RE2/Ecmascript mode
577 if rchars > 0 {
578 break
579 }
580 } else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
581 // "regular" mode
582 break
583 }
584
585 r.advance(0)
586 continue
587
588 case syntax.End:
589 if r.rightchars() > 0 {
590 break
591 }
592 r.advance(0)
593 continue
594
595 case syntax.One:
596 if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
597 break
598 }
599
600 r.advance(1)
601 continue
602
603 case syntax.Notone:
604 if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
605 break
606 }
607
608 r.advance(1)
609 continue
610
611 case syntax.Set:
612
613 if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
614 break
615 }
616
617 r.advance(1)
618 continue
619
620 case syntax.Multi:
621 if !r.runematch(r.code.Strings[r.operand(0)]) {
622 break
623 }
624
625 r.advance(1)
626 continue
627
628 case syntax.Ref:
629
630 capnum := r.operand(0)
631
632 if r.runmatch.isMatched(capnum) {
633 if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
634 break
635 }
636 } else {
637 if (r.re.options & ECMAScript) == 0 {
638 break
639 }
640 }
641
642 r.advance(1)
643 continue
644
645 case syntax.Onerep:
646
647 c := r.operand(1)
648
649 if r.forwardchars() < c {
650 break
651 }
652
653 ch := rune(r.operand(0))
654
655 for c > 0 {
656 if r.forwardcharnext() != ch {
657 goto BreakBackward
658 }
659 c--
660 }
661
662 r.advance(2)
663 continue
664
665 case syntax.Notonerep:
666
667 c := r.operand(1)
668
669 if r.forwardchars() < c {
670 break
671 }
672 ch := rune(r.operand(0))
673
674 for c > 0 {
675 if r.forwardcharnext() == ch {
676 goto BreakBackward
677 }
678 c--
679 }
680
681 r.advance(2)
682 continue
683
684 case syntax.Setrep:
685
686 c := r.operand(1)
687
688 if r.forwardchars() < c {
689 break
690 }
691
692 set := r.code.Sets[r.operand(0)]
693
694 for c > 0 {
695 if !set.CharIn(r.forwardcharnext()) {
696 goto BreakBackward
697 }
698 c--
699 }
700
701 r.advance(2)
702 continue
703
704 case syntax.Oneloop:
705
706 c := r.operand(1)
707
708 if c > r.forwardchars() {
709 c = r.forwardchars()
710 }
711
712 ch := rune(r.operand(0))
713 i := c
714
715 for ; i > 0; i-- {
716 if r.forwardcharnext() != ch {
717 r.backwardnext()
718 break
719 }
720 }
721
722 if c > i {
723 r.trackPush2(c-i-1, r.textPos()-r.bump())
724 }
725
726 r.advance(2)
727 continue
728
729 case syntax.Notoneloop:
730
731 c := r.operand(1)
732
733 if c > r.forwardchars() {
734 c = r.forwardchars()
735 }
736
737 ch := rune(r.operand(0))
738 i := c
739
740 for ; i > 0; i-- {
741 if r.forwardcharnext() == ch {
742 r.backwardnext()
743 break
744 }
745 }
746
747 if c > i {
748 r.trackPush2(c-i-1, r.textPos()-r.bump())
749 }
750
751 r.advance(2)
752 continue
753
754 case syntax.Setloop:
755
756 c := r.operand(1)
757
758 if c > r.forwardchars() {
759 c = r.forwardchars()
760 }
761
762 set := r.code.Sets[r.operand(0)]
763 i := c
764
765 for ; i > 0; i-- {
766 if !set.CharIn(r.forwardcharnext()) {
767 r.backwardnext()
768 break
769 }
770 }
771
772 if c > i {
773 r.trackPush2(c-i-1, r.textPos()-r.bump())
774 }
775
776 r.advance(2)
777 continue
778
779 case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
780
781 r.trackPopN(2)
782 i := r.trackPeek()
783 pos := r.trackPeekN(1)
784
785 r.textto(pos)
786
787 if i > 0 {
788 r.trackPush2(i-1, pos-r.bump())
789 }
790
791 r.advance(2)
792 continue
793
794 case syntax.Setloop | syntax.Back:
795
796 r.trackPopN(2)
797 i := r.trackPeek()
798 pos := r.trackPeekN(1)
799
800 r.textto(pos)
801
802 if i > 0 {
803 r.trackPush2(i-1, pos-r.bump())
804 }
805
806 r.advance(2)
807 continue
808
809 case syntax.Onelazy, syntax.Notonelazy:
810
811 c := r.operand(1)
812
813 if c > r.forwardchars() {
814 c = r.forwardchars()
815 }
816
817 if c > 0 {
818 r.trackPush2(c-1, r.textPos())
819 }
820
821 r.advance(2)
822 continue
823
824 case syntax.Setlazy:
825
826 c := r.operand(1)
827
828 if c > r.forwardchars() {
829 c = r.forwardchars()
830 }
831
832 if c > 0 {
833 r.trackPush2(c-1, r.textPos())
834 }
835
836 r.advance(2)
837 continue
838
839 case syntax.Onelazy | syntax.Back:
840
841 r.trackPopN(2)
842 pos := r.trackPeekN(1)
843 r.textto(pos)
844
845 if r.forwardcharnext() != rune(r.operand(0)) {
846 break
847 }
848
849 i := r.trackPeek()
850
851 if i > 0 {
852 r.trackPush2(i-1, pos+r.bump())
853 }
854
855 r.advance(2)
856 continue
857
858 case syntax.Notonelazy | syntax.Back:
859
860 r.trackPopN(2)
861 pos := r.trackPeekN(1)
862 r.textto(pos)
863
864 if r.forwardcharnext() == rune(r.operand(0)) {
865 break
866 }
867
868 i := r.trackPeek()
869
870 if i > 0 {
871 r.trackPush2(i-1, pos+r.bump())
872 }
873
874 r.advance(2)
875 continue
876
877 case syntax.Setlazy | syntax.Back:
878
879 r.trackPopN(2)
880 pos := r.trackPeekN(1)
881 r.textto(pos)
882
883 if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
884 break
885 }
886
887 i := r.trackPeek()
888
889 if i > 0 {
890 r.trackPush2(i-1, pos+r.bump())
891 }
892
893 r.advance(2)
894 continue
895
896 default:
897 return errors.New("unknown state in regex runner")
898 }
899
900 BreakBackward:
901 ;
902
903 // "break Backward" comes here:
904 r.backtrack()
905 }
906}
907
908// increase the size of stack and track storage
909func (r *runner) ensureStorage() {
910 if r.runstackpos < r.runtrackcount*4 {
911 doubleIntSlice(&r.runstack, &r.runstackpos)
912 }
913 if r.runtrackpos < r.runtrackcount*4 {
914 doubleIntSlice(&r.runtrack, &r.runtrackpos)
915 }
916}
917
918func doubleIntSlice(s *[]int, pos *int) {
919 oldLen := len(*s)
920 newS := make([]int, oldLen*2)
921
922 copy(newS[oldLen:], *s)
923 *pos += oldLen
924 *s = newS
925}
926
927// Save a number on the longjump unrolling stack
928func (r *runner) crawl(i int) {
929 if r.runcrawlpos == 0 {
930 doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
931 }
932 r.runcrawlpos--
933 r.runcrawl[r.runcrawlpos] = i
934}
935
936// Remove a number from the longjump unrolling stack
937func (r *runner) popcrawl() int {
938 val := r.runcrawl[r.runcrawlpos]
939 r.runcrawlpos++
940 return val
941}
942
943// Get the height of the stack
944func (r *runner) crawlpos() int {
945 return len(r.runcrawl) - r.runcrawlpos
946}
947
948func (r *runner) advance(i int) {
949 r.codepos += (i + 1)
950 r.setOperator(r.code.Codes[r.codepos])
951}
952
953func (r *runner) goTo(newpos int) {
954 // when branching backward or in place, ensure storage
955 if newpos <= r.codepos {
956 r.ensureStorage()
957 }
958
959 r.setOperator(r.code.Codes[newpos])
960 r.codepos = newpos
961}
962
963func (r *runner) textto(newpos int) {
964 r.runtextpos = newpos
965}
966
967func (r *runner) trackto(newpos int) {
968 r.runtrackpos = len(r.runtrack) - newpos
969}
970
971func (r *runner) textstart() int {
972 return r.runtextstart
973}
974
975func (r *runner) textPos() int {
976 return r.runtextpos
977}
978
979// push onto the backtracking stack
980func (r *runner) trackpos() int {
981 return len(r.runtrack) - r.runtrackpos
982}
983
984func (r *runner) trackPush() {
985 r.runtrackpos--
986 r.runtrack[r.runtrackpos] = r.codepos
987}
988
989func (r *runner) trackPush1(I1 int) {
990 r.runtrackpos--
991 r.runtrack[r.runtrackpos] = I1
992 r.runtrackpos--
993 r.runtrack[r.runtrackpos] = r.codepos
994}
995
996func (r *runner) trackPush2(I1, I2 int) {
997 r.runtrackpos--
998 r.runtrack[r.runtrackpos] = I1
999 r.runtrackpos--
1000 r.runtrack[r.runtrackpos] = I2
1001 r.runtrackpos--
1002 r.runtrack[r.runtrackpos] = r.codepos
1003}
1004
1005func (r *runner) trackPush3(I1, I2, I3 int) {
1006 r.runtrackpos--
1007 r.runtrack[r.runtrackpos] = I1
1008 r.runtrackpos--
1009 r.runtrack[r.runtrackpos] = I2
1010 r.runtrackpos--
1011 r.runtrack[r.runtrackpos] = I3
1012 r.runtrackpos--
1013 r.runtrack[r.runtrackpos] = r.codepos
1014}
1015
1016func (r *runner) trackPushNeg1(I1 int) {
1017 r.runtrackpos--
1018 r.runtrack[r.runtrackpos] = I1
1019 r.runtrackpos--
1020 r.runtrack[r.runtrackpos] = -r.codepos
1021}
1022
1023func (r *runner) trackPushNeg2(I1, I2 int) {
1024 r.runtrackpos--
1025 r.runtrack[r.runtrackpos] = I1
1026 r.runtrackpos--
1027 r.runtrack[r.runtrackpos] = I2
1028 r.runtrackpos--
1029 r.runtrack[r.runtrackpos] = -r.codepos
1030}
1031
1032func (r *runner) backtrack() {
1033 newpos := r.runtrack[r.runtrackpos]
1034 r.runtrackpos++
1035
1036 if r.re.Debug() {
1037 if newpos < 0 {
1038 fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
1039 } else {
1040 fmt.Printf(" Backtracking to code position %v\n", newpos)
1041 }
1042 }
1043
1044 if newpos < 0 {
1045 newpos = -newpos
1046 r.setOperator(r.code.Codes[newpos] | syntax.Back2)
1047 } else {
1048 r.setOperator(r.code.Codes[newpos] | syntax.Back)
1049 }
1050
1051 // When branching backward, ensure storage
1052 if newpos < r.codepos {
1053 r.ensureStorage()
1054 }
1055
1056 r.codepos = newpos
1057}
1058
1059func (r *runner) setOperator(op int) {
1060 r.caseInsensitive = (0 != (op & syntax.Ci))
1061 r.rightToLeft = (0 != (op & syntax.Rtl))
1062 r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
1063}
1064
1065func (r *runner) trackPop() {
1066 r.runtrackpos++
1067}
1068
1069// pop framesize items from the backtracking stack
1070func (r *runner) trackPopN(framesize int) {
1071 r.runtrackpos += framesize
1072}
1073
1074// Technically we are actually peeking at items already popped. So if you want to
1075// get and pop the top item from the stack, you do
1076// r.trackPop();
1077// r.trackPeek();
1078func (r *runner) trackPeek() int {
1079 return r.runtrack[r.runtrackpos-1]
1080}
1081
1082// get the ith element down on the backtracking stack
1083func (r *runner) trackPeekN(i int) int {
1084 return r.runtrack[r.runtrackpos-i-1]
1085}
1086
1087// Push onto the grouping stack
1088func (r *runner) stackPush(I1 int) {
1089 r.runstackpos--
1090 r.runstack[r.runstackpos] = I1
1091}
1092
1093func (r *runner) stackPush2(I1, I2 int) {
1094 r.runstackpos--
1095 r.runstack[r.runstackpos] = I1
1096 r.runstackpos--
1097 r.runstack[r.runstackpos] = I2
1098}
1099
1100func (r *runner) stackPop() {
1101 r.runstackpos++
1102}
1103
1104// pop framesize items from the grouping stack
1105func (r *runner) stackPopN(framesize int) {
1106 r.runstackpos += framesize
1107}
1108
1109// Technically we are actually peeking at items already popped. So if you want to
1110// get and pop the top item from the stack, you do
1111// r.stackPop();
1112// r.stackPeek();
1113func (r *runner) stackPeek() int {
1114 return r.runstack[r.runstackpos-1]
1115}
1116
1117// get the ith element down on the grouping stack
1118func (r *runner) stackPeekN(i int) int {
1119 return r.runstack[r.runstackpos-i-1]
1120}
1121
1122func (r *runner) operand(i int) int {
1123 return r.code.Codes[r.codepos+i+1]
1124}
1125
1126func (r *runner) leftchars() int {
1127 return r.runtextpos
1128}
1129
1130func (r *runner) rightchars() int {
1131 return r.runtextend - r.runtextpos
1132}
1133
1134func (r *runner) bump() int {
1135 if r.rightToLeft {
1136 return -1
1137 }
1138 return 1
1139}
1140
1141func (r *runner) forwardchars() int {
1142 if r.rightToLeft {
1143 return r.runtextpos
1144 }
1145 return r.runtextend - r.runtextpos
1146}
1147
1148func (r *runner) forwardcharnext() rune {
1149 var ch rune
1150 if r.rightToLeft {
1151 r.runtextpos--
1152 ch = r.runtext[r.runtextpos]
1153 } else {
1154 ch = r.runtext[r.runtextpos]
1155 r.runtextpos++
1156 }
1157
1158 if r.caseInsensitive {
1159 return unicode.ToLower(ch)
1160 }
1161 return ch
1162}
1163
1164func (r *runner) runematch(str []rune) bool {
1165 var pos int
1166
1167 c := len(str)
1168 if !r.rightToLeft {
1169 if r.runtextend-r.runtextpos < c {
1170 return false
1171 }
1172
1173 pos = r.runtextpos + c
1174 } else {
1175 if r.runtextpos-0 < c {
1176 return false
1177 }
1178
1179 pos = r.runtextpos
1180 }
1181
1182 if !r.caseInsensitive {
1183 for c != 0 {
1184 c--
1185 pos--
1186 if str[c] != r.runtext[pos] {
1187 return false
1188 }
1189 }
1190 } else {
1191 for c != 0 {
1192 c--
1193 pos--
1194 if str[c] != unicode.ToLower(r.runtext[pos]) {
1195 return false
1196 }
1197 }
1198 }
1199
1200 if !r.rightToLeft {
1201 pos += len(str)
1202 }
1203
1204 r.runtextpos = pos
1205
1206 return true
1207}
1208
1209func (r *runner) refmatch(index, len int) bool {
1210 var c, pos, cmpos int
1211
1212 if !r.rightToLeft {
1213 if r.runtextend-r.runtextpos < len {
1214 return false
1215 }
1216
1217 pos = r.runtextpos + len
1218 } else {
1219 if r.runtextpos-0 < len {
1220 return false
1221 }
1222
1223 pos = r.runtextpos
1224 }
1225 cmpos = index + len
1226
1227 c = len
1228
1229 if !r.caseInsensitive {
1230 for c != 0 {
1231 c--
1232 cmpos--
1233 pos--
1234 if r.runtext[cmpos] != r.runtext[pos] {
1235 return false
1236 }
1237
1238 }
1239 } else {
1240 for c != 0 {
1241 c--
1242 cmpos--
1243 pos--
1244
1245 if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
1246 return false
1247 }
1248 }
1249 }
1250
1251 if !r.rightToLeft {
1252 pos += len
1253 }
1254
1255 r.runtextpos = pos
1256
1257 return true
1258}
1259
1260func (r *runner) backwardnext() {
1261 if r.rightToLeft {
1262 r.runtextpos++
1263 } else {
1264 r.runtextpos--
1265 }
1266}
1267
1268func (r *runner) charAt(j int) rune {
1269 return r.runtext[j]
1270}
1271
1272func (r *runner) findFirstChar() bool {
1273
1274 if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
1275 if !r.code.RightToLeft {
1276 if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
1277 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
1278 r.runtextpos = r.runtextend
1279 return false
1280 }
1281 if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
1282 r.runtextpos = r.runtextend - 1
1283 } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
1284 r.runtextpos = r.runtextend
1285 }
1286 } else {
1287 if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
1288 (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
1289 (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
1290 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
1291 r.runtextpos = 0
1292 return false
1293 }
1294 if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
1295 r.runtextpos = 0
1296 }
1297 }
1298
1299 if r.code.BmPrefix != nil {
1300 return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
1301 }
1302
1303 return true // found a valid start or end anchor
1304 } else if r.code.BmPrefix != nil {
1305 r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
1306
1307 if r.runtextpos == -1 {
1308 if r.code.RightToLeft {
1309 r.runtextpos = 0
1310 } else {
1311 r.runtextpos = r.runtextend
1312 }
1313 return false
1314 }
1315
1316 return true
1317 } else if r.code.FcPrefix == nil {
1318 return true
1319 }
1320
1321 r.rightToLeft = r.code.RightToLeft
1322 r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
1323
1324 set := r.code.FcPrefix.PrefixSet
1325 if set.IsSingleton() {
1326 ch := set.SingletonChar()
1327 for i := r.forwardchars(); i > 0; i-- {
1328 if ch == r.forwardcharnext() {
1329 r.backwardnext()
1330 return true
1331 }
1332 }
1333 } else {
1334 for i := r.forwardchars(); i > 0; i-- {
1335 n := r.forwardcharnext()
1336 //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
1337 if set.CharIn(n) {
1338 r.backwardnext()
1339 return true
1340 }
1341 }
1342 }
1343
1344 return false
1345}
1346
1347func (r *runner) initMatch() {
1348 // Use a hashtable'ed Match object if the capture numbers are sparse
1349
1350 if r.runmatch == nil {
1351 if r.re.caps != nil {
1352 r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
1353 } else {
1354 r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
1355 }
1356 } else {
1357 r.runmatch.reset(r.runtext, r.runtextstart)
1358 }
1359
1360 // note we test runcrawl, because it is the last one to be allocated
1361 // If there is an alloc failure in the middle of the three allocations,
1362 // we may still return to reuse this instance, and we want to behave
1363 // as if the allocations didn't occur. (we used to test _trackcount != 0)
1364
1365 if r.runcrawl != nil {
1366 r.runtrackpos = len(r.runtrack)
1367 r.runstackpos = len(r.runstack)
1368 r.runcrawlpos = len(r.runcrawl)
1369 return
1370 }
1371
1372 r.initTrackCount()
1373
1374 tracksize := r.runtrackcount * 8
1375 stacksize := r.runtrackcount * 8
1376
1377 if tracksize < 32 {
1378 tracksize = 32
1379 }
1380 if stacksize < 16 {
1381 stacksize = 16
1382 }
1383
1384 r.runtrack = make([]int, tracksize)
1385 r.runtrackpos = tracksize
1386
1387 r.runstack = make([]int, stacksize)
1388 r.runstackpos = stacksize
1389
1390 r.runcrawl = make([]int, 32)
1391 r.runcrawlpos = 32
1392}
1393
1394func (r *runner) tidyMatch(quick bool) *Match {
1395 if !quick {
1396 match := r.runmatch
1397
1398 r.runmatch = nil
1399
1400 match.tidy(r.runtextpos)
1401 return match
1402 } else {
1403 // send back our match -- it's not leaving the package, so it's safe to not clean it up
1404 // this reduces allocs for frequent calls to the "IsMatch" bool-only functions
1405 return r.runmatch
1406 }
1407}
1408
1409// capture captures a subexpression. Note that the
1410// capnum used here has already been mapped to a non-sparse
1411// index (by the code generator RegexWriter).
1412func (r *runner) capture(capnum, start, end int) {
1413 if end < start {
1414 T := end
1415 end = start
1416 start = T
1417 }
1418
1419 r.crawl(capnum)
1420 r.runmatch.addMatch(capnum, start, end-start)
1421}
1422
1423// transferCapture captures a subexpression. Note that the
1424// capnum used here has already been mapped to a non-sparse
1425// index (by the code generator RegexWriter).
1426func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
1427 var start2, end2 int
1428
1429 // these are the two intervals that are cancelling each other
1430
1431 if end < start {
1432 T := end
1433 end = start
1434 start = T
1435 }
1436
1437 start2 = r.runmatch.matchIndex(uncapnum)
1438 end2 = start2 + r.runmatch.matchLength(uncapnum)
1439
1440 // The new capture gets the innermost defined interval
1441
1442 if start >= end2 {
1443 end = start
1444 start = end2
1445 } else if end <= start2 {
1446 start = start2
1447 } else {
1448 if end > end2 {
1449 end = end2
1450 }
1451 if start2 > start {
1452 start = start2
1453 }
1454 }
1455
1456 r.crawl(uncapnum)
1457 r.runmatch.balanceMatch(uncapnum)
1458
1459 if capnum != -1 {
1460 r.crawl(capnum)
1461 r.runmatch.addMatch(capnum, start, end-start)
1462 }
1463}
1464
1465// revert the last capture
1466func (r *runner) uncapture() {
1467 capnum := r.popcrawl()
1468 r.runmatch.removeMatch(capnum)
1469}
1470
1471//debug
1472
1473func (r *runner) dumpState() {
1474 back := ""
1475 if r.operator&syntax.Back != 0 {
1476 back = " Back"
1477 }
1478 if r.operator&syntax.Back2 != 0 {
1479 back += " Back2"
1480 }
1481 fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
1482 r.textposDescription(),
1483 r.stackDescription(r.runtrack, r.runtrackpos),
1484 r.stackDescription(r.runstack, r.runstackpos),
1485 r.code.OpcodeDescription(r.codepos),
1486 back)
1487}
1488
1489func (r *runner) stackDescription(a []int, index int) string {
1490 buf := &bytes.Buffer{}
1491
1492 fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
1493 if buf.Len() < 8 {
1494 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1495 }
1496
1497 buf.WriteRune('(')
1498 for i := index; i < len(a); i++ {
1499 if i > index {
1500 buf.WriteRune(' ')
1501 }
1502
1503 buf.WriteString(strconv.Itoa(a[i]))
1504 }
1505
1506 buf.WriteRune(')')
1507
1508 return buf.String()
1509}
1510
1511func (r *runner) textposDescription() string {
1512 buf := &bytes.Buffer{}
1513
1514 buf.WriteString(strconv.Itoa(r.runtextpos))
1515
1516 if buf.Len() < 8 {
1517 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1518 }
1519
1520 if r.runtextpos > 0 {
1521 buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
1522 } else {
1523 buf.WriteRune('^')
1524 }
1525
1526 buf.WriteRune('>')
1527
1528 for i := r.runtextpos; i < r.runtextend; i++ {
1529 buf.WriteString(syntax.CharDescription(r.runtext[i]))
1530 }
1531 if buf.Len() >= 64 {
1532 buf.Truncate(61)
1533 buf.WriteString("...")
1534 } else {
1535 buf.WriteRune('$')
1536 }
1537
1538 return buf.String()
1539}
1540
1541// decide whether the pos
1542// at the specified index is a boundary or not. It's just not worth
1543// emitting inline code for this logic.
1544func (r *runner) isBoundary(index, startpos, endpos int) bool {
1545 return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
1546 (index < endpos && syntax.IsWordChar(r.runtext[index]))
1547}
1548
1549func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
1550 return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
1551 (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
1552}
1553
1554// this seems like a comment to justify randomly picking 1000 :-P
1555// We have determined this value in a series of experiments where x86 retail
1556// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
1557// of TimeoutCheckFrequency did not tend to increase performance; smaller values
1558// of TimeoutCheckFrequency tended to slow down the execution.
1559const timeoutCheckFrequency int = 1000
1560
1561func (r *runner) startTimeoutWatch() {
1562 if r.ignoreTimeout {
1563 return
1564 }
1565
1566 r.timeoutChecksToSkip = timeoutCheckFrequency
1567 r.timeoutAt = time.Now().Add(r.timeout)
1568}
1569
1570func (r *runner) checkTimeout() error {
1571 if r.ignoreTimeout {
1572 return nil
1573 }
1574 r.timeoutChecksToSkip--
1575 if r.timeoutChecksToSkip != 0 {
1576 return nil
1577 }
1578
1579 r.timeoutChecksToSkip = timeoutCheckFrequency
1580 return r.doCheckTimeout()
1581}
1582
1583func (r *runner) doCheckTimeout() error {
1584 current := time.Now()
1585
1586 if current.Before(r.timeoutAt) {
1587 return nil
1588 }
1589
1590 if r.re.Debug() {
1591 //Debug.WriteLine("")
1592 //Debug.WriteLine("RegEx match timeout occurred!")
1593 //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
1594 //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
1595 //Debug.WriteLine("Search pattern: " + _runregex._pattern)
1596 //Debug.WriteLine("Input: " + r.runtext)
1597 //Debug.WriteLine("About to throw RegexMatchTimeoutException.")
1598 }
1599
1600 return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
1601}
1602
1603func (r *runner) initTrackCount() {
1604 r.runtrackcount = r.code.TrackCount
1605}
1606
1607// getRunner returns a run to use for matching re.
1608// It uses the re's runner cache if possible, to avoid
1609// unnecessary allocation.
1610func (re *Regexp) getRunner() *runner {
1611 re.muRun.Lock()
1612 if n := len(re.runner); n > 0 {
1613 z := re.runner[n-1]
1614 re.runner = re.runner[:n-1]
1615 re.muRun.Unlock()
1616 return z
1617 }
1618 re.muRun.Unlock()
1619 z := &runner{
1620 re: re,
1621 code: re.code,
1622 }
1623 return z
1624}
1625
1626// putRunner returns a runner to the re's cache.
1627// There is no attempt to limit the size of the cache, so it will
1628// grow to the maximum number of simultaneous matches
1629// run using re. (The cache empties when re gets garbage collected.)
1630func (re *Regexp) putRunner(r *runner) {
1631 re.muRun.Lock()
1632 re.runner = append(re.runner, r)
1633 re.muRun.Unlock()
1634}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/charclass.go b/vendor/github.com/dlclark/regexp2/syntax/charclass.go
new file mode 100644
index 0000000..6881a0e
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/charclass.go
@@ -0,0 +1,865 @@
1package syntax
2
3import (
4 "bytes"
5 "encoding/binary"
6 "fmt"
7 "sort"
8 "unicode"
9 "unicode/utf8"
10)
11
12// CharSet combines start-end rune ranges and unicode categories representing a set of characters
13type CharSet struct {
14 ranges []singleRange
15 categories []category
16 sub *CharSet //optional subtractor
17 negate bool
18 anything bool
19}
20
21type category struct {
22 negate bool
23 cat string
24}
25
26type singleRange struct {
27 first rune
28 last rune
29}
30
31const (
32 spaceCategoryText = " "
33 wordCategoryText = "W"
34)
35
36var (
37 ecmaSpace = []rune{0x0009, 0x000e, 0x0020, 0x0021, 0x00a0, 0x00a1, 0x1680, 0x1681, 0x2000, 0x200b, 0x2028, 0x202a, 0x202f, 0x2030, 0x205f, 0x2060, 0x3000, 0x3001, 0xfeff, 0xff00}
38 ecmaWord = []rune{0x0030, 0x003a, 0x0041, 0x005b, 0x005f, 0x0060, 0x0061, 0x007b}
39 ecmaDigit = []rune{0x0030, 0x003a}
40
41 re2Space = []rune{0x0009, 0x000b, 0x000c, 0x000e, 0x0020, 0x0021}
42)
43
44var (
45 AnyClass = getCharSetFromOldString([]rune{0}, false)
46 ECMAAnyClass = getCharSetFromOldString([]rune{0, 0x000a, 0x000b, 0x000d, 0x000e}, false)
47 NoneClass = getCharSetFromOldString(nil, false)
48 ECMAWordClass = getCharSetFromOldString(ecmaWord, false)
49 NotECMAWordClass = getCharSetFromOldString(ecmaWord, true)
50 ECMASpaceClass = getCharSetFromOldString(ecmaSpace, false)
51 NotECMASpaceClass = getCharSetFromOldString(ecmaSpace, true)
52 ECMADigitClass = getCharSetFromOldString(ecmaDigit, false)
53 NotECMADigitClass = getCharSetFromOldString(ecmaDigit, true)
54
55 WordClass = getCharSetFromCategoryString(false, false, wordCategoryText)
56 NotWordClass = getCharSetFromCategoryString(true, false, wordCategoryText)
57 SpaceClass = getCharSetFromCategoryString(false, false, spaceCategoryText)
58 NotSpaceClass = getCharSetFromCategoryString(true, false, spaceCategoryText)
59 DigitClass = getCharSetFromCategoryString(false, false, "Nd")
60 NotDigitClass = getCharSetFromCategoryString(false, true, "Nd")
61
62 RE2SpaceClass = getCharSetFromOldString(re2Space, false)
63 NotRE2SpaceClass = getCharSetFromOldString(re2Space, true)
64)
65
66var unicodeCategories = func() map[string]*unicode.RangeTable {
67 retVal := make(map[string]*unicode.RangeTable)
68 for k, v := range unicode.Scripts {
69 retVal[k] = v
70 }
71 for k, v := range unicode.Categories {
72 retVal[k] = v
73 }
74 for k, v := range unicode.Properties {
75 retVal[k] = v
76 }
77 return retVal
78}()
79
80func getCharSetFromCategoryString(negateSet bool, negateCat bool, cats ...string) func() *CharSet {
81 if negateCat && negateSet {
82 panic("BUG! You should only negate the set OR the category in a constant setup, but not both")
83 }
84
85 c := CharSet{negate: negateSet}
86
87 c.categories = make([]category, len(cats))
88 for i, cat := range cats {
89 c.categories[i] = category{cat: cat, negate: negateCat}
90 }
91 return func() *CharSet {
92 //make a copy each time
93 local := c
94 //return that address
95 return &local
96 }
97}
98
99func getCharSetFromOldString(setText []rune, negate bool) func() *CharSet {
100 c := CharSet{}
101 if len(setText) > 0 {
102 fillFirst := false
103 l := len(setText)
104 if negate {
105 if setText[0] == 0 {
106 setText = setText[1:]
107 } else {
108 l++
109 fillFirst = true
110 }
111 }
112
113 if l%2 == 0 {
114 c.ranges = make([]singleRange, l/2)
115 } else {
116 c.ranges = make([]singleRange, l/2+1)
117 }
118
119 first := true
120 if fillFirst {
121 c.ranges[0] = singleRange{first: 0}
122 first = false
123 }
124
125 i := 0
126 for _, r := range setText {
127 if first {
128 // lower bound in a new range
129 c.ranges[i] = singleRange{first: r}
130 first = false
131 } else {
132 c.ranges[i].last = r - 1
133 i++
134 first = true
135 }
136 }
137 if !first {
138 c.ranges[i].last = utf8.MaxRune
139 }
140 }
141
142 return func() *CharSet {
143 local := c
144 return &local
145 }
146}
147
148// Copy makes a deep copy to prevent accidental mutation of a set
149func (c CharSet) Copy() CharSet {
150 ret := CharSet{
151 anything: c.anything,
152 negate: c.negate,
153 }
154
155 ret.ranges = append(ret.ranges, c.ranges...)
156 ret.categories = append(ret.categories, c.categories...)
157
158 if c.sub != nil {
159 sub := c.sub.Copy()
160 ret.sub = &sub
161 }
162
163 return ret
164}
165
166// gets a human-readable description for a set string
167func (c CharSet) String() string {
168 buf := &bytes.Buffer{}
169 buf.WriteRune('[')
170
171 if c.IsNegated() {
172 buf.WriteRune('^')
173 }
174
175 for _, r := range c.ranges {
176
177 buf.WriteString(CharDescription(r.first))
178 if r.first != r.last {
179 if r.last-r.first != 1 {
180 //groups that are 1 char apart skip the dash
181 buf.WriteRune('-')
182 }
183 buf.WriteString(CharDescription(r.last))
184 }
185 }
186
187 for _, c := range c.categories {
188 buf.WriteString(c.String())
189 }
190
191 if c.sub != nil {
192 buf.WriteRune('-')
193 buf.WriteString(c.sub.String())
194 }
195
196 buf.WriteRune(']')
197
198 return buf.String()
199}
200
201// mapHashFill converts a charset into a buffer for use in maps
202func (c CharSet) mapHashFill(buf *bytes.Buffer) {
203 if c.negate {
204 buf.WriteByte(0)
205 } else {
206 buf.WriteByte(1)
207 }
208
209 binary.Write(buf, binary.LittleEndian, len(c.ranges))
210 binary.Write(buf, binary.LittleEndian, len(c.categories))
211 for _, r := range c.ranges {
212 buf.WriteRune(r.first)
213 buf.WriteRune(r.last)
214 }
215 for _, ct := range c.categories {
216 buf.WriteString(ct.cat)
217 if ct.negate {
218 buf.WriteByte(1)
219 } else {
220 buf.WriteByte(0)
221 }
222 }
223
224 if c.sub != nil {
225 c.sub.mapHashFill(buf)
226 }
227}
228
229// CharIn returns true if the rune is in our character set (either ranges or categories).
230// It handles negations and subtracted sub-charsets.
231func (c CharSet) CharIn(ch rune) bool {
232 val := false
233 // in s && !s.subtracted
234
235 //check ranges
236 for _, r := range c.ranges {
237 if ch < r.first {
238 continue
239 }
240 if ch <= r.last {
241 val = true
242 break
243 }
244 }
245
246 //check categories if we haven't already found a range
247 if !val && len(c.categories) > 0 {
248 for _, ct := range c.categories {
249 // special categories...then unicode
250 if ct.cat == spaceCategoryText {
251 if unicode.IsSpace(ch) {
252 // we found a space so we're done
253 // negate means this is a "bad" thing
254 val = !ct.negate
255 break
256 } else if ct.negate {
257 val = true
258 break
259 }
260 } else if ct.cat == wordCategoryText {
261 if IsWordChar(ch) {
262 val = !ct.negate
263 break
264 } else if ct.negate {
265 val = true
266 break
267 }
268 } else if unicode.Is(unicodeCategories[ct.cat], ch) {
269 // if we're in this unicode category then we're done
270 // if negate=true on this category then we "failed" our test
271 // otherwise we're good that we found it
272 val = !ct.negate
273 break
274 } else if ct.negate {
275 val = true
276 break
277 }
278 }
279 }
280
281 // negate the whole char set
282 if c.negate {
283 val = !val
284 }
285
286 // get subtracted recurse
287 if val && c.sub != nil {
288 val = !c.sub.CharIn(ch)
289 }
290
291 //log.Printf("Char '%v' in %v == %v", string(ch), c.String(), val)
292 return val
293}
294
295func (c category) String() string {
296 switch c.cat {
297 case spaceCategoryText:
298 if c.negate {
299 return "\\S"
300 }
301 return "\\s"
302 case wordCategoryText:
303 if c.negate {
304 return "\\W"
305 }
306 return "\\w"
307 }
308 if _, ok := unicodeCategories[c.cat]; ok {
309
310 if c.negate {
311 return "\\P{" + c.cat + "}"
312 }
313 return "\\p{" + c.cat + "}"
314 }
315 return "Unknown category: " + c.cat
316}
317
318// CharDescription Produces a human-readable description for a single character.
319func CharDescription(ch rune) string {
320 /*if ch == '\\' {
321 return "\\\\"
322 }
323
324 if ch > ' ' && ch <= '~' {
325 return string(ch)
326 } else if ch == '\n' {
327 return "\\n"
328 } else if ch == ' ' {
329 return "\\ "
330 }*/
331
332 b := &bytes.Buffer{}
333 escape(b, ch, false) //fmt.Sprintf("%U", ch)
334 return b.String()
335}
336
337// According to UTS#18 Unicode Regular Expressions (http://www.unicode.org/reports/tr18/)
338// RL 1.4 Simple Word Boundaries The class of <word_character> includes all Alphabetic
339// values from the Unicode character database, from UnicodeData.txt [UData], plus the U+200C
340// ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER.
341func IsWordChar(r rune) bool {
342 //"L", "Mn", "Nd", "Pc"
343 return unicode.In(r,
344 unicode.Categories["L"], unicode.Categories["Mn"],
345 unicode.Categories["Nd"], unicode.Categories["Pc"]) || r == '\u200D' || r == '\u200C'
346 //return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
347}
348
349func IsECMAWordChar(r rune) bool {
350 return unicode.In(r,
351 unicode.Categories["L"], unicode.Categories["Mn"],
352 unicode.Categories["Nd"], unicode.Categories["Pc"])
353
354 //return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
355}
356
357// SingletonChar will return the char from the first range without validation.
358// It assumes you have checked for IsSingleton or IsSingletonInverse and will panic given bad input
359func (c CharSet) SingletonChar() rune {
360 return c.ranges[0].first
361}
362
363func (c CharSet) IsSingleton() bool {
364 return !c.negate && //negated is multiple chars
365 len(c.categories) == 0 && len(c.ranges) == 1 && // multiple ranges and unicode classes represent multiple chars
366 c.sub == nil && // subtraction means we've got multiple chars
367 c.ranges[0].first == c.ranges[0].last // first and last equal means we're just 1 char
368}
369
370func (c CharSet) IsSingletonInverse() bool {
371 return c.negate && //same as above, but requires negated
372 len(c.categories) == 0 && len(c.ranges) == 1 && // multiple ranges and unicode classes represent multiple chars
373 c.sub == nil && // subtraction means we've got multiple chars
374 c.ranges[0].first == c.ranges[0].last // first and last equal means we're just 1 char
375}
376
377func (c CharSet) IsMergeable() bool {
378 return !c.IsNegated() && !c.HasSubtraction()
379}
380
381func (c CharSet) IsNegated() bool {
382 return c.negate
383}
384
385func (c CharSet) HasSubtraction() bool {
386 return c.sub != nil
387}
388
389func (c CharSet) IsEmpty() bool {
390 return len(c.ranges) == 0 && len(c.categories) == 0 && c.sub == nil
391}
392
393func (c *CharSet) addDigit(ecma, negate bool, pattern string) {
394 if ecma {
395 if negate {
396 c.addRanges(NotECMADigitClass().ranges)
397 } else {
398 c.addRanges(ECMADigitClass().ranges)
399 }
400 } else {
401 c.addCategories(category{cat: "Nd", negate: negate})
402 }
403}
404
405func (c *CharSet) addChar(ch rune) {
406 c.addRange(ch, ch)
407}
408
409func (c *CharSet) addSpace(ecma, re2, negate bool) {
410 if ecma {
411 if negate {
412 c.addRanges(NotECMASpaceClass().ranges)
413 } else {
414 c.addRanges(ECMASpaceClass().ranges)
415 }
416 } else if re2 {
417 if negate {
418 c.addRanges(NotRE2SpaceClass().ranges)
419 } else {
420 c.addRanges(RE2SpaceClass().ranges)
421 }
422 } else {
423 c.addCategories(category{cat: spaceCategoryText, negate: negate})
424 }
425}
426
427func (c *CharSet) addWord(ecma, negate bool) {
428 if ecma {
429 if negate {
430 c.addRanges(NotECMAWordClass().ranges)
431 } else {
432 c.addRanges(ECMAWordClass().ranges)
433 }
434 } else {
435 c.addCategories(category{cat: wordCategoryText, negate: negate})
436 }
437}
438
439// Add set ranges and categories into ours -- no deduping or anything
440func (c *CharSet) addSet(set CharSet) {
441 if c.anything {
442 return
443 }
444 if set.anything {
445 c.makeAnything()
446 return
447 }
448 // just append here to prevent double-canon
449 c.ranges = append(c.ranges, set.ranges...)
450 c.addCategories(set.categories...)
451 c.canonicalize()
452}
453
454func (c *CharSet) makeAnything() {
455 c.anything = true
456 c.categories = []category{}
457 c.ranges = AnyClass().ranges
458}
459
460func (c *CharSet) addCategories(cats ...category) {
461 // don't add dupes and remove positive+negative
462 if c.anything {
463 // if we've had a previous positive+negative group then
464 // just return, we're as broad as we can get
465 return
466 }
467
468 for _, ct := range cats {
469 found := false
470 for _, ct2 := range c.categories {
471 if ct.cat == ct2.cat {
472 if ct.negate != ct2.negate {
473 // oposite negations...this mean we just
474 // take us as anything and move on
475 c.makeAnything()
476 return
477 }
478 found = true
479 break
480 }
481 }
482
483 if !found {
484 c.categories = append(c.categories, ct)
485 }
486 }
487}
488
489// Merges new ranges to our own
490func (c *CharSet) addRanges(ranges []singleRange) {
491 if c.anything {
492 return
493 }
494 c.ranges = append(c.ranges, ranges...)
495 c.canonicalize()
496}
497
498// Merges everything but the new ranges into our own
499func (c *CharSet) addNegativeRanges(ranges []singleRange) {
500 if c.anything {
501 return
502 }
503
504 var hi rune
505
506 // convert incoming ranges into opposites, assume they are in order
507 for _, r := range ranges {
508 if hi < r.first {
509 c.ranges = append(c.ranges, singleRange{hi, r.first - 1})
510 }
511 hi = r.last + 1
512 }
513
514 if hi < utf8.MaxRune {
515 c.ranges = append(c.ranges, singleRange{hi, utf8.MaxRune})
516 }
517
518 c.canonicalize()
519}
520
521func isValidUnicodeCat(catName string) bool {
522 _, ok := unicodeCategories[catName]
523 return ok
524}
525
526func (c *CharSet) addCategory(categoryName string, negate, caseInsensitive bool, pattern string) {
527 if !isValidUnicodeCat(categoryName) {
528 // unknown unicode category, script, or property "blah"
529 panic(fmt.Errorf("Unknown unicode category, script, or property '%v'", categoryName))
530
531 }
532
533 if caseInsensitive && (categoryName == "Ll" || categoryName == "Lu" || categoryName == "Lt") {
534 // when RegexOptions.IgnoreCase is specified then {Ll} {Lu} and {Lt} cases should all match
535 c.addCategories(
536 category{cat: "Ll", negate: negate},
537 category{cat: "Lu", negate: negate},
538 category{cat: "Lt", negate: negate})
539 }
540 c.addCategories(category{cat: categoryName, negate: negate})
541}
542
543func (c *CharSet) addSubtraction(sub *CharSet) {
544 c.sub = sub
545}
546
547func (c *CharSet) addRange(chMin, chMax rune) {
548 c.ranges = append(c.ranges, singleRange{first: chMin, last: chMax})
549 c.canonicalize()
550}
551
552func (c *CharSet) addNamedASCII(name string, negate bool) bool {
553 var rs []singleRange
554
555 switch name {
556 case "alnum":
557 rs = []singleRange{singleRange{'0', '9'}, singleRange{'A', 'Z'}, singleRange{'a', 'z'}}
558 case "alpha":
559 rs = []singleRange{singleRange{'A', 'Z'}, singleRange{'a', 'z'}}
560 case "ascii":
561 rs = []singleRange{singleRange{0, 0x7f}}
562 case "blank":
563 rs = []singleRange{singleRange{'\t', '\t'}, singleRange{' ', ' '}}
564 case "cntrl":
565 rs = []singleRange{singleRange{0, 0x1f}, singleRange{0x7f, 0x7f}}
566 case "digit":
567 c.addDigit(false, negate, "")
568 case "graph":
569 rs = []singleRange{singleRange{'!', '~'}}
570 case "lower":
571 rs = []singleRange{singleRange{'a', 'z'}}
572 case "print":
573 rs = []singleRange{singleRange{' ', '~'}}
574 case "punct": //[!-/:-@[-`{-~]
575 rs = []singleRange{singleRange{'!', '/'}, singleRange{':', '@'}, singleRange{'[', '`'}, singleRange{'{', '~'}}
576 case "space":
577 c.addSpace(true, false, negate)
578 case "upper":
579 rs = []singleRange{singleRange{'A', 'Z'}}
580 case "word":
581 c.addWord(true, negate)
582 case "xdigit":
583 rs = []singleRange{singleRange{'0', '9'}, singleRange{'A', 'F'}, singleRange{'a', 'f'}}
584 default:
585 return false
586 }
587
588 if len(rs) > 0 {
589 if negate {
590 c.addNegativeRanges(rs)
591 } else {
592 c.addRanges(rs)
593 }
594 }
595
596 return true
597}
598
599type singleRangeSorter []singleRange
600
601func (p singleRangeSorter) Len() int { return len(p) }
602func (p singleRangeSorter) Less(i, j int) bool { return p[i].first < p[j].first }
603func (p singleRangeSorter) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
604
605// Logic to reduce a character class to a unique, sorted form.
606func (c *CharSet) canonicalize() {
607 var i, j int
608 var last rune
609
610 //
611 // Find and eliminate overlapping or abutting ranges
612 //
613
614 if len(c.ranges) > 1 {
615 sort.Sort(singleRangeSorter(c.ranges))
616
617 done := false
618
619 for i, j = 1, 0; ; i++ {
620 for last = c.ranges[j].last; ; i++ {
621 if i == len(c.ranges) || last == utf8.MaxRune {
622 done = true
623 break
624 }
625
626 CurrentRange := c.ranges[i]
627 if CurrentRange.first > last+1 {
628 break
629 }
630
631 if last < CurrentRange.last {
632 last = CurrentRange.last
633 }
634 }
635
636 c.ranges[j] = singleRange{first: c.ranges[j].first, last: last}
637
638 j++
639
640 if done {
641 break
642 }
643
644 if j < i {
645 c.ranges[j] = c.ranges[i]
646 }
647 }
648
649 c.ranges = append(c.ranges[:j], c.ranges[len(c.ranges):]...)
650 }
651}
652
653// Adds to the class any lowercase versions of characters already
654// in the class. Used for case-insensitivity.
655func (c *CharSet) addLowercase() {
656 if c.anything {
657 return
658 }
659 toAdd := []singleRange{}
660 for i := 0; i < len(c.ranges); i++ {
661 r := c.ranges[i]
662 if r.first == r.last {
663 lower := unicode.ToLower(r.first)
664 c.ranges[i] = singleRange{first: lower, last: lower}
665 } else {
666 toAdd = append(toAdd, r)
667 }
668 }
669
670 for _, r := range toAdd {
671 c.addLowercaseRange(r.first, r.last)
672 }
673 c.canonicalize()
674}
675
676/**************************************************************************
677 Let U be the set of Unicode character values and let L be the lowercase
678 function, mapping from U to U. To perform case insensitive matching of
679 character sets, we need to be able to map an interval I in U, say
680
681 I = [chMin, chMax] = { ch : chMin <= ch <= chMax }
682
683 to a set A such that A contains L(I) and A is contained in the union of
684 I and L(I).
685
686 The table below partitions U into intervals on which L is non-decreasing.
687 Thus, for any interval J = [a, b] contained in one of these intervals,
688 L(J) is contained in [L(a), L(b)].
689
690 It is also true that for any such J, [L(a), L(b)] is contained in the
691 union of J and L(J). This does not follow from L being non-decreasing on
692 these intervals. It follows from the nature of the L on each interval.
693 On each interval, L has one of the following forms:
694
695 (1) L(ch) = constant (LowercaseSet)
696 (2) L(ch) = ch + offset (LowercaseAdd)
697 (3) L(ch) = ch | 1 (LowercaseBor)
698 (4) L(ch) = ch + (ch & 1) (LowercaseBad)
699
700 It is easy to verify that for any of these forms [L(a), L(b)] is
701 contained in the union of [a, b] and L([a, b]).
702***************************************************************************/
703
704const (
705 LowercaseSet = 0 // Set to arg.
706 LowercaseAdd = 1 // Add arg.
707 LowercaseBor = 2 // Bitwise or with 1.
708 LowercaseBad = 3 // Bitwise and with 1 and add original.
709)
710
711type lcMap struct {
712 chMin, chMax rune
713 op, data int32
714}
715
716var lcTable = []lcMap{
717 lcMap{'\u0041', '\u005A', LowercaseAdd, 32},
718 lcMap{'\u00C0', '\u00DE', LowercaseAdd, 32},
719 lcMap{'\u0100', '\u012E', LowercaseBor, 0},
720 lcMap{'\u0130', '\u0130', LowercaseSet, 0x0069},
721 lcMap{'\u0132', '\u0136', LowercaseBor, 0},
722 lcMap{'\u0139', '\u0147', LowercaseBad, 0},
723 lcMap{'\u014A', '\u0176', LowercaseBor, 0},
724 lcMap{'\u0178', '\u0178', LowercaseSet, 0x00FF},
725 lcMap{'\u0179', '\u017D', LowercaseBad, 0},
726 lcMap{'\u0181', '\u0181', LowercaseSet, 0x0253},
727 lcMap{'\u0182', '\u0184', LowercaseBor, 0},
728 lcMap{'\u0186', '\u0186', LowercaseSet, 0x0254},
729 lcMap{'\u0187', '\u0187', LowercaseSet, 0x0188},
730 lcMap{'\u0189', '\u018A', LowercaseAdd, 205},
731 lcMap{'\u018B', '\u018B', LowercaseSet, 0x018C},
732 lcMap{'\u018E', '\u018E', LowercaseSet, 0x01DD},
733 lcMap{'\u018F', '\u018F', LowercaseSet, 0x0259},
734 lcMap{'\u0190', '\u0190', LowercaseSet, 0x025B},
735 lcMap{'\u0191', '\u0191', LowercaseSet, 0x0192},
736 lcMap{'\u0193', '\u0193', LowercaseSet, 0x0260},
737 lcMap{'\u0194', '\u0194', LowercaseSet, 0x0263},
738 lcMap{'\u0196', '\u0196', LowercaseSet, 0x0269},
739 lcMap{'\u0197', '\u0197', LowercaseSet, 0x0268},
740 lcMap{'\u0198', '\u0198', LowercaseSet, 0x0199},
741 lcMap{'\u019C', '\u019C', LowercaseSet, 0x026F},
742 lcMap{'\u019D', '\u019D', LowercaseSet, 0x0272},
743 lcMap{'\u019F', '\u019F', LowercaseSet, 0x0275},
744 lcMap{'\u01A0', '\u01A4', LowercaseBor, 0},
745 lcMap{'\u01A7', '\u01A7', LowercaseSet, 0x01A8},
746 lcMap{'\u01A9', '\u01A9', LowercaseSet, 0x0283},
747 lcMap{'\u01AC', '\u01AC', LowercaseSet, 0x01AD},
748 lcMap{'\u01AE', '\u01AE', LowercaseSet, 0x0288},
749 lcMap{'\u01AF', '\u01AF', LowercaseSet, 0x01B0},
750 lcMap{'\u01B1', '\u01B2', LowercaseAdd, 217},
751 lcMap{'\u01B3', '\u01B5', LowercaseBad, 0},
752 lcMap{'\u01B7', '\u01B7', LowercaseSet, 0x0292},
753 lcMap{'\u01B8', '\u01B8', LowercaseSet, 0x01B9},
754 lcMap{'\u01BC', '\u01BC', LowercaseSet, 0x01BD},
755 lcMap{'\u01C4', '\u01C5', LowercaseSet, 0x01C6},
756 lcMap{'\u01C7', '\u01C8', LowercaseSet, 0x01C9},
757 lcMap{'\u01CA', '\u01CB', LowercaseSet, 0x01CC},
758 lcMap{'\u01CD', '\u01DB', LowercaseBad, 0},
759 lcMap{'\u01DE', '\u01EE', LowercaseBor, 0},
760 lcMap{'\u01F1', '\u01F2', LowercaseSet, 0x01F3},
761 lcMap{'\u01F4', '\u01F4', LowercaseSet, 0x01F5},
762 lcMap{'\u01FA', '\u0216', LowercaseBor, 0},
763 lcMap{'\u0386', '\u0386', LowercaseSet, 0x03AC},
764 lcMap{'\u0388', '\u038A', LowercaseAdd, 37},
765 lcMap{'\u038C', '\u038C', LowercaseSet, 0x03CC},
766 lcMap{'\u038E', '\u038F', LowercaseAdd, 63},
767 lcMap{'\u0391', '\u03AB', LowercaseAdd, 32},
768 lcMap{'\u03E2', '\u03EE', LowercaseBor, 0},
769 lcMap{'\u0401', '\u040F', LowercaseAdd, 80},
770 lcMap{'\u0410', '\u042F', LowercaseAdd, 32},
771 lcMap{'\u0460', '\u0480', LowercaseBor, 0},
772 lcMap{'\u0490', '\u04BE', LowercaseBor, 0},
773 lcMap{'\u04C1', '\u04C3', LowercaseBad, 0},
774 lcMap{'\u04C7', '\u04C7', LowercaseSet, 0x04C8},
775 lcMap{'\u04CB', '\u04CB', LowercaseSet, 0x04CC},
776 lcMap{'\u04D0', '\u04EA', LowercaseBor, 0},
777 lcMap{'\u04EE', '\u04F4', LowercaseBor, 0},
778 lcMap{'\u04F8', '\u04F8', LowercaseSet, 0x04F9},
779 lcMap{'\u0531', '\u0556', LowercaseAdd, 48},
780 lcMap{'\u10A0', '\u10C5', LowercaseAdd, 48},
781 lcMap{'\u1E00', '\u1EF8', LowercaseBor, 0},
782 lcMap{'\u1F08', '\u1F0F', LowercaseAdd, -8},
783 lcMap{'\u1F18', '\u1F1F', LowercaseAdd, -8},
784 lcMap{'\u1F28', '\u1F2F', LowercaseAdd, -8},
785 lcMap{'\u1F38', '\u1F3F', LowercaseAdd, -8},
786 lcMap{'\u1F48', '\u1F4D', LowercaseAdd, -8},
787 lcMap{'\u1F59', '\u1F59', LowercaseSet, 0x1F51},
788 lcMap{'\u1F5B', '\u1F5B', LowercaseSet, 0x1F53},
789 lcMap{'\u1F5D', '\u1F5D', LowercaseSet, 0x1F55},
790 lcMap{'\u1F5F', '\u1F5F', LowercaseSet, 0x1F57},
791 lcMap{'\u1F68', '\u1F6F', LowercaseAdd, -8},
792 lcMap{'\u1F88', '\u1F8F', LowercaseAdd, -8},
793 lcMap{'\u1F98', '\u1F9F', LowercaseAdd, -8},
794 lcMap{'\u1FA8', '\u1FAF', LowercaseAdd, -8},
795 lcMap{'\u1FB8', '\u1FB9', LowercaseAdd, -8},
796 lcMap{'\u1FBA', '\u1FBB', LowercaseAdd, -74},
797 lcMap{'\u1FBC', '\u1FBC', LowercaseSet, 0x1FB3},
798 lcMap{'\u1FC8', '\u1FCB', LowercaseAdd, -86},
799 lcMap{'\u1FCC', '\u1FCC', LowercaseSet, 0x1FC3},
800 lcMap{'\u1FD8', '\u1FD9', LowercaseAdd, -8},
801 lcMap{'\u1FDA', '\u1FDB', LowercaseAdd, -100},
802 lcMap{'\u1FE8', '\u1FE9', LowercaseAdd, -8},
803 lcMap{'\u1FEA', '\u1FEB', LowercaseAdd, -112},
804 lcMap{'\u1FEC', '\u1FEC', LowercaseSet, 0x1FE5},
805 lcMap{'\u1FF8', '\u1FF9', LowercaseAdd, -128},
806 lcMap{'\u1FFA', '\u1FFB', LowercaseAdd, -126},
807 lcMap{'\u1FFC', '\u1FFC', LowercaseSet, 0x1FF3},
808 lcMap{'\u2160', '\u216F', LowercaseAdd, 16},
809 lcMap{'\u24B6', '\u24D0', LowercaseAdd, 26},
810 lcMap{'\uFF21', '\uFF3A', LowercaseAdd, 32},
811}
812
813func (c *CharSet) addLowercaseRange(chMin, chMax rune) {
814 var i, iMax, iMid int
815 var chMinT, chMaxT rune
816 var lc lcMap
817
818 for i, iMax = 0, len(lcTable); i < iMax; {
819 iMid = (i + iMax) / 2
820 if lcTable[iMid].chMax < chMin {
821 i = iMid + 1
822 } else {
823 iMax = iMid
824 }
825 }
826
827 for ; i < len(lcTable); i++ {
828 lc = lcTable[i]
829 if lc.chMin > chMax {
830 return
831 }
832 chMinT = lc.chMin
833 if chMinT < chMin {
834 chMinT = chMin
835 }
836
837 chMaxT = lc.chMax
838 if chMaxT > chMax {
839 chMaxT = chMax
840 }
841
842 switch lc.op {
843 case LowercaseSet:
844 chMinT = rune(lc.data)
845 chMaxT = rune(lc.data)
846 break
847 case LowercaseAdd:
848 chMinT += lc.data
849 chMaxT += lc.data
850 break
851 case LowercaseBor:
852 chMinT |= 1
853 chMaxT |= 1
854 break
855 case LowercaseBad:
856 chMinT += (chMinT & 1)
857 chMaxT += (chMaxT & 1)
858 break
859 }
860
861 if chMinT < chMin || chMaxT > chMax {
862 c.addRange(chMinT, chMaxT)
863 }
864 }
865}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/code.go b/vendor/github.com/dlclark/regexp2/syntax/code.go
new file mode 100644
index 0000000..686e822
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/code.go
@@ -0,0 +1,274 @@
1package syntax
2
3import (
4 "bytes"
5 "fmt"
6 "math"
7)
8
9// similar to prog.go in the go regex package...also with comment 'may not belong in this package'
10
11// File provides operator constants for use by the Builder and the Machine.
12
13// Implementation notes:
14//
15// Regexps are built into RegexCodes, which contain an operation array,
16// a string table, and some constants.
17//
18// Each operation is one of the codes below, followed by the integer
19// operands specified for each op.
20//
21// Strings and sets are indices into a string table.
22
23type InstOp int
24
25const (
26 // lef/back operands description
27
28 Onerep InstOp = 0 // lef,back char,min,max a {n}
29 Notonerep = 1 // lef,back char,min,max .{n}
30 Setrep = 2 // lef,back set,min,max [\d]{n}
31
32 Oneloop = 3 // lef,back char,min,max a {,n}
33 Notoneloop = 4 // lef,back char,min,max .{,n}
34 Setloop = 5 // lef,back set,min,max [\d]{,n}
35
36 Onelazy = 6 // lef,back char,min,max a {,n}?
37 Notonelazy = 7 // lef,back char,min,max .{,n}?
38 Setlazy = 8 // lef,back set,min,max [\d]{,n}?
39
40 One = 9 // lef char a
41 Notone = 10 // lef char [^a]
42 Set = 11 // lef set [a-z\s] \w \s \d
43
44 Multi = 12 // lef string abcd
45 Ref = 13 // lef group \#
46
47 Bol = 14 // ^
48 Eol = 15 // $
49 Boundary = 16 // \b
50 Nonboundary = 17 // \B
51 Beginning = 18 // \A
52 Start = 19 // \G
53 EndZ = 20 // \Z
54 End = 21 // \Z
55
56 Nothing = 22 // Reject!
57
58 // Primitive control structures
59
60 Lazybranch = 23 // back jump straight first
61 Branchmark = 24 // back jump branch first for loop
62 Lazybranchmark = 25 // back jump straight first for loop
63 Nullcount = 26 // back val set counter, null mark
64 Setcount = 27 // back val set counter, make mark
65 Branchcount = 28 // back jump,limit branch++ if zero<=c<limit
66 Lazybranchcount = 29 // back jump,limit same, but straight first
67 Nullmark = 30 // back save position
68 Setmark = 31 // back save position
69 Capturemark = 32 // back group define group
70 Getmark = 33 // back recall position
71 Setjump = 34 // back save backtrack state
72 Backjump = 35 // zap back to saved state
73 Forejump = 36 // zap backtracking state
74 Testref = 37 // backtrack if ref undefined
75 Goto = 38 // jump just go
76
77 Prune = 39 // prune it baby
78 Stop = 40 // done!
79
80 ECMABoundary = 41 // \b
81 NonECMABoundary = 42 // \B
82
83 // Modifiers for alternate modes
84
85 Mask = 63 // Mask to get unmodified ordinary operator
86 Rtl = 64 // bit to indicate that we're reverse scanning.
87 Back = 128 // bit to indicate that we're backtracking.
88 Back2 = 256 // bit to indicate that we're backtracking on a second branch.
89 Ci = 512 // bit to indicate that we're case-insensitive.
90)
91
92type Code struct {
93 Codes []int // the code
94 Strings [][]rune // string table
95 Sets []*CharSet //character set table
96 TrackCount int // how many instructions use backtracking
97 Caps map[int]int // mapping of user group numbers -> impl group slots
98 Capsize int // number of impl group slots
99 FcPrefix *Prefix // the set of candidate first characters (may be null)
100 BmPrefix *BmPrefix // the fixed prefix string as a Boyer-Moore machine (may be null)
101 Anchors AnchorLoc // the set of zero-length start anchors (RegexFCD.Bol, etc)
102 RightToLeft bool // true if right to left
103}
104
105func opcodeBacktracks(op InstOp) bool {
106 op &= Mask
107
108 switch op {
109 case Oneloop, Notoneloop, Setloop, Onelazy, Notonelazy, Setlazy, Lazybranch, Branchmark, Lazybranchmark,
110 Nullcount, Setcount, Branchcount, Lazybranchcount, Setmark, Capturemark, Getmark, Setjump, Backjump,
111 Forejump, Goto:
112 return true
113
114 default:
115 return false
116 }
117}
118
119func opcodeSize(op InstOp) int {
120 op &= Mask
121
122 switch op {
123 case Nothing, Bol, Eol, Boundary, Nonboundary, ECMABoundary, NonECMABoundary, Beginning, Start, EndZ,
124 End, Nullmark, Setmark, Getmark, Setjump, Backjump, Forejump, Stop:
125 return 1
126
127 case One, Notone, Multi, Ref, Testref, Goto, Nullcount, Setcount, Lazybranch, Branchmark, Lazybranchmark,
128 Prune, Set:
129 return 2
130
131 case Capturemark, Branchcount, Lazybranchcount, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy,
132 Setlazy, Setrep, Setloop:
133 return 3
134
135 default:
136 panic(fmt.Errorf("Unexpected op code: %v", op))
137 }
138}
139
140var codeStr = []string{
141 "Onerep", "Notonerep", "Setrep",
142 "Oneloop", "Notoneloop", "Setloop",
143 "Onelazy", "Notonelazy", "Setlazy",
144 "One", "Notone", "Set",
145 "Multi", "Ref",
146 "Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End",
147 "Nothing",
148 "Lazybranch", "Branchmark", "Lazybranchmark",
149 "Nullcount", "Setcount", "Branchcount", "Lazybranchcount",
150 "Nullmark", "Setmark", "Capturemark", "Getmark",
151 "Setjump", "Backjump", "Forejump", "Testref", "Goto",
152 "Prune", "Stop",
153 "ECMABoundary", "NonECMABoundary",
154}
155
156func operatorDescription(op InstOp) string {
157 desc := codeStr[op&Mask]
158 if (op & Ci) != 0 {
159 desc += "-Ci"
160 }
161 if (op & Rtl) != 0 {
162 desc += "-Rtl"
163 }
164 if (op & Back) != 0 {
165 desc += "-Back"
166 }
167 if (op & Back2) != 0 {
168 desc += "-Back2"
169 }
170
171 return desc
172}
173
174// OpcodeDescription is a humman readable string of the specific offset
175func (c *Code) OpcodeDescription(offset int) string {
176 buf := &bytes.Buffer{}
177
178 op := InstOp(c.Codes[offset])
179 fmt.Fprintf(buf, "%06d ", offset)
180
181 if opcodeBacktracks(op & Mask) {
182 buf.WriteString("*")
183 } else {
184 buf.WriteString(" ")
185 }
186 buf.WriteString(operatorDescription(op))
187 buf.WriteString("(")
188 op &= Mask
189
190 switch op {
191 case One, Notone, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy:
192 buf.WriteString("Ch = ")
193 buf.WriteString(CharDescription(rune(c.Codes[offset+1])))
194
195 case Set, Setrep, Setloop, Setlazy:
196 buf.WriteString("Set = ")
197 buf.WriteString(c.Sets[c.Codes[offset+1]].String())
198
199 case Multi:
200 fmt.Fprintf(buf, "String = %s", string(c.Strings[c.Codes[offset+1]]))
201
202 case Ref, Testref:
203 fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
204
205 case Capturemark:
206 fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
207 if c.Codes[offset+2] != -1 {
208 fmt.Fprintf(buf, ", Unindex = %d", c.Codes[offset+2])
209 }
210
211 case Nullcount, Setcount:
212 fmt.Fprintf(buf, "Value = %d", c.Codes[offset+1])
213
214 case Goto, Lazybranch, Branchmark, Lazybranchmark, Branchcount, Lazybranchcount:
215 fmt.Fprintf(buf, "Addr = %d", c.Codes[offset+1])
216 }
217
218 switch op {
219 case Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, Setrep, Setloop, Setlazy:
220 buf.WriteString(", Rep = ")
221 if c.Codes[offset+2] == math.MaxInt32 {
222 buf.WriteString("inf")
223 } else {
224 fmt.Fprintf(buf, "%d", c.Codes[offset+2])
225 }
226
227 case Branchcount, Lazybranchcount:
228 buf.WriteString(", Limit = ")
229 if c.Codes[offset+2] == math.MaxInt32 {
230 buf.WriteString("inf")
231 } else {
232 fmt.Fprintf(buf, "%d", c.Codes[offset+2])
233 }
234
235 }
236
237 buf.WriteString(")")
238
239 return buf.String()
240}
241
242func (c *Code) Dump() string {
243 buf := &bytes.Buffer{}
244
245 if c.RightToLeft {
246 fmt.Fprintln(buf, "Direction: right-to-left")
247 } else {
248 fmt.Fprintln(buf, "Direction: left-to-right")
249 }
250 if c.FcPrefix == nil {
251 fmt.Fprintln(buf, "Firstchars: n/a")
252 } else {
253 fmt.Fprintf(buf, "Firstchars: %v\n", c.FcPrefix.PrefixSet.String())
254 }
255
256 if c.BmPrefix == nil {
257 fmt.Fprintln(buf, "Prefix: n/a")
258 } else {
259 fmt.Fprintf(buf, "Prefix: %v\n", Escape(c.BmPrefix.String()))
260 }
261
262 fmt.Fprintf(buf, "Anchors: %v\n", c.Anchors)
263 fmt.Fprintln(buf)
264
265 if c.BmPrefix != nil {
266 fmt.Fprintln(buf, "BoyerMoore:")
267 fmt.Fprintln(buf, c.BmPrefix.Dump(" "))
268 }
269 for i := 0; i < len(c.Codes); i += opcodeSize(InstOp(c.Codes[i])) {
270 fmt.Fprintln(buf, c.OpcodeDescription(i))
271 }
272
273 return buf.String()
274}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/escape.go b/vendor/github.com/dlclark/regexp2/syntax/escape.go
new file mode 100644
index 0000000..609df10
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/escape.go
@@ -0,0 +1,94 @@
1package syntax
2
3import (
4 "bytes"
5 "strconv"
6 "strings"
7 "unicode"
8)
9
10func Escape(input string) string {
11 b := &bytes.Buffer{}
12 for _, r := range input {
13 escape(b, r, false)
14 }
15 return b.String()
16}
17
18const meta = `\.+*?()|[]{}^$# `
19
20func escape(b *bytes.Buffer, r rune, force bool) {
21 if unicode.IsPrint(r) {
22 if strings.IndexRune(meta, r) >= 0 || force {
23 b.WriteRune('\\')
24 }
25 b.WriteRune(r)
26 return
27 }
28
29 switch r {
30 case '\a':
31 b.WriteString(`\a`)
32 case '\f':
33 b.WriteString(`\f`)
34 case '\n':
35 b.WriteString(`\n`)
36 case '\r':
37 b.WriteString(`\r`)
38 case '\t':
39 b.WriteString(`\t`)
40 case '\v':
41 b.WriteString(`\v`)
42 default:
43 if r < 0x100 {
44 b.WriteString(`\x`)
45 s := strconv.FormatInt(int64(r), 16)
46 if len(s) == 1 {
47 b.WriteRune('0')
48 }
49 b.WriteString(s)
50 break
51 }
52 b.WriteString(`\u`)
53 b.WriteString(strconv.FormatInt(int64(r), 16))
54 }
55}
56
57func Unescape(input string) (string, error) {
58 idx := strings.IndexRune(input, '\\')
59 // no slashes means no unescape needed
60 if idx == -1 {
61 return input, nil
62 }
63
64 buf := bytes.NewBufferString(input[:idx])
65 // get the runes for the rest of the string -- we're going full parser scan on this
66
67 p := parser{}
68 p.setPattern(input[idx+1:])
69 for {
70 if p.rightMost() {
71 return "", p.getErr(ErrIllegalEndEscape)
72 }
73 r, err := p.scanCharEscape()
74 if err != nil {
75 return "", err
76 }
77 buf.WriteRune(r)
78 // are we done?
79 if p.rightMost() {
80 return buf.String(), nil
81 }
82
83 r = p.moveRightGetChar()
84 for r != '\\' {
85 buf.WriteRune(r)
86 if p.rightMost() {
87 // we're done, no more slashes
88 return buf.String(), nil
89 }
90 // keep scanning until we get another slash
91 r = p.moveRightGetChar()
92 }
93 }
94}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/fuzz.go b/vendor/github.com/dlclark/regexp2/syntax/fuzz.go
new file mode 100644
index 0000000..ee86386
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/fuzz.go
@@ -0,0 +1,20 @@
1// +build gofuzz
2
3package syntax
4
5// Fuzz is the input point for go-fuzz
6func Fuzz(data []byte) int {
7 sdata := string(data)
8 tree, err := Parse(sdata, RegexOptions(0))
9 if err != nil {
10 return 0
11 }
12
13 // translate it to code
14 _, err = Write(tree)
15 if err != nil {
16 panic(err)
17 }
18
19 return 1
20}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/parser.go b/vendor/github.com/dlclark/regexp2/syntax/parser.go
new file mode 100644
index 0000000..9dc6e31
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/parser.go
@@ -0,0 +1,2251 @@
1package syntax
2
3import (
4 "fmt"
5 "math"
6 "os"
7 "sort"
8 "strconv"
9 "unicode"
10)
11
12type RegexOptions int32
13
14const (
15 IgnoreCase RegexOptions = 0x0001 // "i"
16 Multiline = 0x0002 // "m"
17 ExplicitCapture = 0x0004 // "n"
18 Compiled = 0x0008 // "c"
19 Singleline = 0x0010 // "s"
20 IgnorePatternWhitespace = 0x0020 // "x"
21 RightToLeft = 0x0040 // "r"
22 Debug = 0x0080 // "d"
23 ECMAScript = 0x0100 // "e"
24 RE2 = 0x0200 // RE2 compat mode
25 Unicode = 0x0400 // "u"
26)
27
28func optionFromCode(ch rune) RegexOptions {
29 // case-insensitive
30 switch ch {
31 case 'i', 'I':
32 return IgnoreCase
33 case 'r', 'R':
34 return RightToLeft
35 case 'm', 'M':
36 return Multiline
37 case 'n', 'N':
38 return ExplicitCapture
39 case 's', 'S':
40 return Singleline
41 case 'x', 'X':
42 return IgnorePatternWhitespace
43 case 'd', 'D':
44 return Debug
45 case 'e', 'E':
46 return ECMAScript
47 case 'u', 'U':
48 return Unicode
49 default:
50 return 0
51 }
52}
53
54// An Error describes a failure to parse a regular expression
55// and gives the offending expression.
56type Error struct {
57 Code ErrorCode
58 Expr string
59 Args []interface{}
60}
61
62func (e *Error) Error() string {
63 if len(e.Args) == 0 {
64 return "error parsing regexp: " + e.Code.String() + " in `" + e.Expr + "`"
65 }
66 return "error parsing regexp: " + fmt.Sprintf(e.Code.String(), e.Args...) + " in `" + e.Expr + "`"
67}
68
69// An ErrorCode describes a failure to parse a regular expression.
70type ErrorCode string
71
72const (
73 // internal issue
74 ErrInternalError ErrorCode = "regexp/syntax: internal error"
75 // Parser errors
76 ErrUnterminatedComment = "unterminated comment"
77 ErrInvalidCharRange = "invalid character class range"
78 ErrInvalidRepeatSize = "invalid repeat count"
79 ErrInvalidUTF8 = "invalid UTF-8"
80 ErrCaptureGroupOutOfRange = "capture group number out of range"
81 ErrUnexpectedParen = "unexpected )"
82 ErrMissingParen = "missing closing )"
83 ErrMissingBrace = "missing closing }"
84 ErrInvalidRepeatOp = "invalid nested repetition operator"
85 ErrMissingRepeatArgument = "missing argument to repetition operator"
86 ErrConditionalExpression = "illegal conditional (?(...)) expression"
87 ErrTooManyAlternates = "too many | in (?()|)"
88 ErrUnrecognizedGrouping = "unrecognized grouping construct: (%v"
89 ErrInvalidGroupName = "invalid group name: group names must begin with a word character and have a matching terminator"
90 ErrCapNumNotZero = "capture number cannot be zero"
91 ErrUndefinedBackRef = "reference to undefined group number %v"
92 ErrUndefinedNameRef = "reference to undefined group name %v"
93 ErrAlternationCantCapture = "alternation conditions do not capture and cannot be named"
94 ErrAlternationCantHaveComment = "alternation conditions cannot be comments"
95 ErrMalformedReference = "(?(%v) ) malformed"
96 ErrUndefinedReference = "(?(%v) ) reference to undefined group"
97 ErrIllegalEndEscape = "illegal \\ at end of pattern"
98 ErrMalformedSlashP = "malformed \\p{X} character escape"
99 ErrIncompleteSlashP = "incomplete \\p{X} character escape"
100 ErrUnknownSlashP = "unknown unicode category, script, or property '%v'"
101 ErrUnrecognizedEscape = "unrecognized escape sequence \\%v"
102 ErrMissingControl = "missing control character"
103 ErrUnrecognizedControl = "unrecognized control character"
104 ErrTooFewHex = "insufficient hexadecimal digits"
105 ErrInvalidHex = "hex values may not be larger than 0x10FFFF"
106 ErrMalformedNameRef = "malformed \\k<...> named back reference"
107 ErrBadClassInCharRange = "cannot include class \\%v in character range"
108 ErrUnterminatedBracket = "unterminated [] set"
109 ErrSubtractionMustBeLast = "a subtraction must be the last element in a character class"
110 ErrReversedCharRange = "[%c-%c] range in reverse order"
111)
112
113func (e ErrorCode) String() string {
114 return string(e)
115}
116
117type parser struct {
118 stack *regexNode
119 group *regexNode
120 alternation *regexNode
121 concatenation *regexNode
122 unit *regexNode
123
124 patternRaw string
125 pattern []rune
126
127 currentPos int
128 specialCase *unicode.SpecialCase
129
130 autocap int
131 capcount int
132 captop int
133 capsize int
134
135 caps map[int]int
136 capnames map[string]int
137
138 capnumlist []int
139 capnamelist []string
140
141 options RegexOptions
142 optionsStack []RegexOptions
143 ignoreNextParen bool
144}
145
146const (
147 maxValueDiv10 int = math.MaxInt32 / 10
148 maxValueMod10 = math.MaxInt32 % 10
149)
150
151// Parse converts a regex string into a parse tree
152func Parse(re string, op RegexOptions) (*RegexTree, error) {
153 p := parser{
154 options: op,
155 caps: make(map[int]int),
156 }
157 p.setPattern(re)
158
159 if err := p.countCaptures(); err != nil {
160 return nil, err
161 }
162
163 p.reset(op)
164 root, err := p.scanRegex()
165
166 if err != nil {
167 return nil, err
168 }
169 tree := &RegexTree{
170 root: root,
171 caps: p.caps,
172 capnumlist: p.capnumlist,
173 captop: p.captop,
174 Capnames: p.capnames,
175 Caplist: p.capnamelist,
176 options: op,
177 }
178
179 if tree.options&Debug > 0 {
180 os.Stdout.WriteString(tree.Dump())
181 }
182
183 return tree, nil
184}
185
186func (p *parser) setPattern(pattern string) {
187 p.patternRaw = pattern
188 p.pattern = make([]rune, 0, len(pattern))
189
190 //populate our rune array to handle utf8 encoding
191 for _, r := range pattern {
192 p.pattern = append(p.pattern, r)
193 }
194}
195func (p *parser) getErr(code ErrorCode, args ...interface{}) error {
196 return &Error{Code: code, Expr: p.patternRaw, Args: args}
197}
198
199func (p *parser) noteCaptureSlot(i, pos int) {
200 if _, ok := p.caps[i]; !ok {
201 // the rhs of the hashtable isn't used in the parser
202 p.caps[i] = pos
203 p.capcount++
204
205 if p.captop <= i {
206 if i == math.MaxInt32 {
207 p.captop = i
208 } else {
209 p.captop = i + 1
210 }
211 }
212 }
213}
214
215func (p *parser) noteCaptureName(name string, pos int) {
216 if p.capnames == nil {
217 p.capnames = make(map[string]int)
218 }
219
220 if _, ok := p.capnames[name]; !ok {
221 p.capnames[name] = pos
222 p.capnamelist = append(p.capnamelist, name)
223 }
224}
225
226func (p *parser) assignNameSlots() {
227 if p.capnames != nil {
228 for _, name := range p.capnamelist {
229 for p.isCaptureSlot(p.autocap) {
230 p.autocap++
231 }
232 pos := p.capnames[name]
233 p.capnames[name] = p.autocap
234 p.noteCaptureSlot(p.autocap, pos)
235
236 p.autocap++
237 }
238 }
239
240 // if the caps array has at least one gap, construct the list of used slots
241 if p.capcount < p.captop {
242 p.capnumlist = make([]int, p.capcount)
243 i := 0
244
245 for k := range p.caps {
246 p.capnumlist[i] = k
247 i++
248 }
249
250 sort.Ints(p.capnumlist)
251 }
252
253 // merge capsnumlist into capnamelist
254 if p.capnames != nil || p.capnumlist != nil {
255 var oldcapnamelist []string
256 var next int
257 var k int
258
259 if p.capnames == nil {
260 oldcapnamelist = nil
261 p.capnames = make(map[string]int)
262 p.capnamelist = []string{}
263 next = -1
264 } else {
265 oldcapnamelist = p.capnamelist
266 p.capnamelist = []string{}
267 next = p.capnames[oldcapnamelist[0]]
268 }
269
270 for i := 0; i < p.capcount; i++ {
271 j := i
272 if p.capnumlist != nil {
273 j = p.capnumlist[i]
274 }
275
276 if next == j {
277 p.capnamelist = append(p.capnamelist, oldcapnamelist[k])
278 k++
279
280 if k == len(oldcapnamelist) {
281 next = -1
282 } else {
283 next = p.capnames[oldcapnamelist[k]]
284 }
285
286 } else {
287 //feature: culture?
288 str := strconv.Itoa(j)
289 p.capnamelist = append(p.capnamelist, str)
290 p.capnames[str] = j
291 }
292 }
293 }
294}
295
296func (p *parser) consumeAutocap() int {
297 r := p.autocap
298 p.autocap++
299 return r
300}
301
302// CountCaptures is a prescanner for deducing the slots used for
303// captures by doing a partial tokenization of the pattern.
304func (p *parser) countCaptures() error {
305 var ch rune
306
307 p.noteCaptureSlot(0, 0)
308
309 p.autocap = 1
310
311 for p.charsRight() > 0 {
312 pos := p.textpos()
313 ch = p.moveRightGetChar()
314 switch ch {
315 case '\\':
316 if p.charsRight() > 0 {
317 p.scanBackslash(true)
318 }
319
320 case '#':
321 if p.useOptionX() {
322 p.moveLeft()
323 p.scanBlank()
324 }
325
326 case '[':
327 p.scanCharSet(false, true)
328
329 case ')':
330 if !p.emptyOptionsStack() {
331 p.popOptions()
332 }
333
334 case '(':
335 if p.charsRight() >= 2 && p.rightChar(1) == '#' && p.rightChar(0) == '?' {
336 p.moveLeft()
337 p.scanBlank()
338 } else {
339 p.pushOptions()
340 if p.charsRight() > 0 && p.rightChar(0) == '?' {
341 // we have (?...
342 p.moveRight(1)
343
344 if p.charsRight() > 1 && (p.rightChar(0) == '<' || p.rightChar(0) == '\'') {
345 // named group: (?<... or (?'...
346
347 p.moveRight(1)
348 ch = p.rightChar(0)
349
350 if ch != '0' && IsWordChar(ch) {
351 if ch >= '1' && ch <= '9' {
352 dec, err := p.scanDecimal()
353 if err != nil {
354 return err
355 }
356 p.noteCaptureSlot(dec, pos)
357 } else {
358 p.noteCaptureName(p.scanCapname(), pos)
359 }
360 }
361 } else if p.useRE2() && p.charsRight() > 2 && (p.rightChar(0) == 'P' && p.rightChar(1) == '<') {
362 // RE2-compat (?P<)
363 p.moveRight(2)
364 ch = p.rightChar(0)
365 if IsWordChar(ch) {
366 p.noteCaptureName(p.scanCapname(), pos)
367 }
368
369 } else {
370 // (?...
371
372 // get the options if it's an option construct (?cimsx-cimsx...)
373 p.scanOptions()
374
375 if p.charsRight() > 0 {
376 if p.rightChar(0) == ')' {
377 // (?cimsx-cimsx)
378 p.moveRight(1)
379 p.popKeepOptions()
380 } else if p.rightChar(0) == '(' {
381 // alternation construct: (?(foo)yes|no)
382 // ignore the next paren so we don't capture the condition
383 p.ignoreNextParen = true
384
385 // break from here so we don't reset ignoreNextParen
386 continue
387 }
388 }
389 }
390 } else {
391 if !p.useOptionN() && !p.ignoreNextParen {
392 p.noteCaptureSlot(p.consumeAutocap(), pos)
393 }
394 }
395 }
396
397 p.ignoreNextParen = false
398
399 }
400 }
401
402 p.assignNameSlots()
403 return nil
404}
405
406func (p *parser) reset(topopts RegexOptions) {
407 p.currentPos = 0
408 p.autocap = 1
409 p.ignoreNextParen = false
410
411 if len(p.optionsStack) > 0 {
412 p.optionsStack = p.optionsStack[:0]
413 }
414
415 p.options = topopts
416 p.stack = nil
417}
418
419func (p *parser) scanRegex() (*regexNode, error) {
420 ch := '@' // nonspecial ch, means at beginning
421 isQuant := false
422
423 p.startGroup(newRegexNodeMN(ntCapture, p.options, 0, -1))
424
425 for p.charsRight() > 0 {
426 wasPrevQuantifier := isQuant
427 isQuant = false
428
429 if err := p.scanBlank(); err != nil {
430 return nil, err
431 }
432
433 startpos := p.textpos()
434
435 // move past all of the normal characters. We'll stop when we hit some kind of control character,
436 // or if IgnorePatternWhiteSpace is on, we'll stop when we see some whitespace.
437 if p.useOptionX() {
438 for p.charsRight() > 0 {
439 ch = p.rightChar(0)
440 //UGLY: clean up, this is ugly
441 if !(!isStopperX(ch) || (ch == '{' && !p.isTrueQuantifier())) {
442 break
443 }
444 p.moveRight(1)
445 }
446 } else {
447 for p.charsRight() > 0 {
448 ch = p.rightChar(0)
449 if !(!isSpecial(ch) || ch == '{' && !p.isTrueQuantifier()) {
450 break
451 }
452 p.moveRight(1)
453 }
454 }
455
456 endpos := p.textpos()
457
458 p.scanBlank()
459
460 if p.charsRight() == 0 {
461 ch = '!' // nonspecial, means at end
462 } else if ch = p.rightChar(0); isSpecial(ch) {
463 isQuant = isQuantifier(ch)
464 p.moveRight(1)
465 } else {
466 ch = ' ' // nonspecial, means at ordinary char
467 }
468
469 if startpos < endpos {
470 cchUnquantified := endpos - startpos
471 if isQuant {
472 cchUnquantified--
473 }
474 wasPrevQuantifier = false
475
476 if cchUnquantified > 0 {
477 p.addToConcatenate(startpos, cchUnquantified, false)
478 }
479
480 if isQuant {
481 p.addUnitOne(p.charAt(endpos - 1))
482 }
483 }
484
485 switch ch {
486 case '!':
487 goto BreakOuterScan
488
489 case ' ':
490 goto ContinueOuterScan
491
492 case '[':
493 cc, err := p.scanCharSet(p.useOptionI(), false)
494 if err != nil {
495 return nil, err
496 }
497 p.addUnitSet(cc)
498
499 case '(':
500 p.pushOptions()
501
502 if grouper, err := p.scanGroupOpen(); err != nil {
503 return nil, err
504 } else if grouper == nil {
505 p.popKeepOptions()
506 } else {
507 p.pushGroup()
508 p.startGroup(grouper)
509 }
510
511 continue
512
513 case '|':
514 p.addAlternate()
515 goto ContinueOuterScan
516
517 case ')':
518 if p.emptyStack() {
519 return nil, p.getErr(ErrUnexpectedParen)
520 }
521
522 if err := p.addGroup(); err != nil {
523 return nil, err
524 }
525 if err := p.popGroup(); err != nil {
526 return nil, err
527 }
528 p.popOptions()
529
530 if p.unit == nil {
531 goto ContinueOuterScan
532 }
533
534 case '\\':
535 n, err := p.scanBackslash(false)
536 if err != nil {
537 return nil, err
538 }
539 p.addUnitNode(n)
540
541 case '^':
542 if p.useOptionM() {
543 p.addUnitType(ntBol)
544 } else {
545 p.addUnitType(ntBeginning)
546 }
547
548 case '$':
549 if p.useOptionM() {
550 p.addUnitType(ntEol)
551 } else {
552 p.addUnitType(ntEndZ)
553 }
554
555 case '.':
556 if p.useOptionE() {
557 p.addUnitSet(ECMAAnyClass())
558 } else if p.useOptionS() {
559 p.addUnitSet(AnyClass())
560 } else {
561 p.addUnitNotone('\n')
562 }
563
564 case '{', '*', '+', '?':
565 if p.unit == nil {
566 if wasPrevQuantifier {
567 return nil, p.getErr(ErrInvalidRepeatOp)
568 } else {
569 return nil, p.getErr(ErrMissingRepeatArgument)
570 }
571 }
572 p.moveLeft()
573
574 default:
575 return nil, p.getErr(ErrInternalError)
576 }
577
578 if err := p.scanBlank(); err != nil {
579 return nil, err
580 }
581
582 if p.charsRight() > 0 {
583 isQuant = p.isTrueQuantifier()
584 }
585 if p.charsRight() == 0 || !isQuant {
586 //maintain odd C# assignment order -- not sure if required, could clean up?
587 p.addConcatenate()
588 goto ContinueOuterScan
589 }
590
591 ch = p.moveRightGetChar()
592
593 // Handle quantifiers
594 for p.unit != nil {
595 var min, max int
596 var lazy bool
597
598 switch ch {
599 case '*':
600 min = 0
601 max = math.MaxInt32
602
603 case '?':
604 min = 0
605 max = 1
606
607 case '+':
608 min = 1
609 max = math.MaxInt32
610
611 case '{':
612 {
613 var err error
614 startpos = p.textpos()
615 if min, err = p.scanDecimal(); err != nil {
616 return nil, err
617 }
618 max = min
619 if startpos < p.textpos() {
620 if p.charsRight() > 0 && p.rightChar(0) == ',' {
621 p.moveRight(1)
622 if p.charsRight() == 0 || p.rightChar(0) == '}' {
623 max = math.MaxInt32
624 } else {
625 if max, err = p.scanDecimal(); err != nil {
626 return nil, err
627 }
628 }
629 }
630 }
631
632 if startpos == p.textpos() || p.charsRight() == 0 || p.moveRightGetChar() != '}' {
633 p.addConcatenate()
634 p.textto(startpos - 1)
635 goto ContinueOuterScan
636 }
637 }
638
639 default:
640 return nil, p.getErr(ErrInternalError)
641 }
642
643 if err := p.scanBlank(); err != nil {
644 return nil, err
645 }
646
647 if p.charsRight() == 0 || p.rightChar(0) != '?' {
648 lazy = false
649 } else {
650 p.moveRight(1)
651 lazy = true
652 }
653
654 if min > max {
655 return nil, p.getErr(ErrInvalidRepeatSize)
656 }
657
658 p.addConcatenate3(lazy, min, max)
659 }
660
661 ContinueOuterScan:
662 }
663
664BreakOuterScan:
665 ;
666
667 if !p.emptyStack() {
668 return nil, p.getErr(ErrMissingParen)
669 }
670
671 if err := p.addGroup(); err != nil {
672 return nil, err
673 }
674
675 return p.unit, nil
676
677}
678
679/*
680 * Simple parsing for replacement patterns
681 */
682func (p *parser) scanReplacement() (*regexNode, error) {
683 var c, startpos int
684
685 p.concatenation = newRegexNode(ntConcatenate, p.options)
686
687 for {
688 c = p.charsRight()
689 if c == 0 {
690 break
691 }
692
693 startpos = p.textpos()
694
695 for c > 0 && p.rightChar(0) != '$' {
696 p.moveRight(1)
697 c--
698 }
699
700 p.addToConcatenate(startpos, p.textpos()-startpos, true)
701
702 if c > 0 {
703 if p.moveRightGetChar() == '$' {
704 n, err := p.scanDollar()
705 if err != nil {
706 return nil, err
707 }
708 p.addUnitNode(n)
709 }
710 p.addConcatenate()
711 }
712 }
713
714 return p.concatenation, nil
715}
716
717/*
718 * Scans $ patterns recognized within replacement patterns
719 */
720func (p *parser) scanDollar() (*regexNode, error) {
721 if p.charsRight() == 0 {
722 return newRegexNodeCh(ntOne, p.options, '$'), nil
723 }
724
725 ch := p.rightChar(0)
726 angled := false
727 backpos := p.textpos()
728 lastEndPos := backpos
729
730 // Note angle
731
732 if ch == '{' && p.charsRight() > 1 {
733 angled = true
734 p.moveRight(1)
735 ch = p.rightChar(0)
736 }
737
738 // Try to parse backreference: \1 or \{1} or \{cap}
739
740 if ch >= '0' && ch <= '9' {
741 if !angled && p.useOptionE() {
742 capnum := -1
743 newcapnum := int(ch - '0')
744 p.moveRight(1)
745 if p.isCaptureSlot(newcapnum) {
746 capnum = newcapnum
747 lastEndPos = p.textpos()
748 }
749
750 for p.charsRight() > 0 {
751 ch = p.rightChar(0)
752 if ch < '0' || ch > '9' {
753 break
754 }
755 digit := int(ch - '0')
756 if newcapnum > maxValueDiv10 || (newcapnum == maxValueDiv10 && digit > maxValueMod10) {
757 return nil, p.getErr(ErrCaptureGroupOutOfRange)
758 }
759
760 newcapnum = newcapnum*10 + digit
761
762 p.moveRight(1)
763 if p.isCaptureSlot(newcapnum) {
764 capnum = newcapnum
765 lastEndPos = p.textpos()
766 }
767 }
768 p.textto(lastEndPos)
769 if capnum >= 0 {
770 return newRegexNodeM(ntRef, p.options, capnum), nil
771 }
772 } else {
773 capnum, err := p.scanDecimal()
774 if err != nil {
775 return nil, err
776 }
777 if !angled || p.charsRight() > 0 && p.moveRightGetChar() == '}' {
778 if p.isCaptureSlot(capnum) {
779 return newRegexNodeM(ntRef, p.options, capnum), nil
780 }
781 }
782 }
783 } else if angled && IsWordChar(ch) {
784 capname := p.scanCapname()
785
786 if p.charsRight() > 0 && p.moveRightGetChar() == '}' {
787 if p.isCaptureName(capname) {
788 return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
789 }
790 }
791 } else if !angled {
792 capnum := 1
793
794 switch ch {
795 case '$':
796 p.moveRight(1)
797 return newRegexNodeCh(ntOne, p.options, '$'), nil
798 case '&':
799 capnum = 0
800 case '`':
801 capnum = replaceLeftPortion
802 case '\'':
803 capnum = replaceRightPortion
804 case '+':
805 capnum = replaceLastGroup
806 case '_':
807 capnum = replaceWholeString
808 }
809
810 if capnum != 1 {
811 p.moveRight(1)
812 return newRegexNodeM(ntRef, p.options, capnum), nil
813 }
814 }
815
816 // unrecognized $: literalize
817
818 p.textto(backpos)
819 return newRegexNodeCh(ntOne, p.options, '$'), nil
820}
821
822// scanGroupOpen scans chars following a '(' (not counting the '('), and returns
823// a RegexNode for the type of group scanned, or nil if the group
824// simply changed options (?cimsx-cimsx) or was a comment (#...).
825func (p *parser) scanGroupOpen() (*regexNode, error) {
826 var ch rune
827 var nt nodeType
828 var err error
829 close := '>'
830 start := p.textpos()
831
832 // just return a RegexNode if we have:
833 // 1. "(" followed by nothing
834 // 2. "(x" where x != ?
835 // 3. "(?)"
836 if p.charsRight() == 0 || p.rightChar(0) != '?' || (p.rightChar(0) == '?' && (p.charsRight() > 1 && p.rightChar(1) == ')')) {
837 if p.useOptionN() || p.ignoreNextParen {
838 p.ignoreNextParen = false
839 return newRegexNode(ntGroup, p.options), nil
840 }
841 return newRegexNodeMN(ntCapture, p.options, p.consumeAutocap(), -1), nil
842 }
843
844 p.moveRight(1)
845
846 for {
847 if p.charsRight() == 0 {
848 break
849 }
850
851 switch ch = p.moveRightGetChar(); ch {
852 case ':':
853 nt = ntGroup
854
855 case '=':
856 p.options &= ^RightToLeft
857 nt = ntRequire
858
859 case '!':
860 p.options &= ^RightToLeft
861 nt = ntPrevent
862
863 case '>':
864 nt = ntGreedy
865
866 case '\'':
867 close = '\''
868 fallthrough
869
870 case '<':
871 if p.charsRight() == 0 {
872 goto BreakRecognize
873 }
874
875 switch ch = p.moveRightGetChar(); ch {
876 case '=':
877 if close == '\'' {
878 goto BreakRecognize
879 }
880
881 p.options |= RightToLeft
882 nt = ntRequire
883
884 case '!':
885 if close == '\'' {
886 goto BreakRecognize
887 }
888
889 p.options |= RightToLeft
890 nt = ntPrevent
891
892 default:
893 p.moveLeft()
894 capnum := -1
895 uncapnum := -1
896 proceed := false
897
898 // grab part before -
899
900 if ch >= '0' && ch <= '9' {
901 if capnum, err = p.scanDecimal(); err != nil {
902 return nil, err
903 }
904
905 if !p.isCaptureSlot(capnum) {
906 capnum = -1
907 }
908
909 // check if we have bogus characters after the number
910 if p.charsRight() > 0 && !(p.rightChar(0) == close || p.rightChar(0) == '-') {
911 return nil, p.getErr(ErrInvalidGroupName)
912 }
913 if capnum == 0 {
914 return nil, p.getErr(ErrCapNumNotZero)
915 }
916 } else if IsWordChar(ch) {
917 capname := p.scanCapname()
918
919 if p.isCaptureName(capname) {
920 capnum = p.captureSlotFromName(capname)
921 }
922
923 // check if we have bogus character after the name
924 if p.charsRight() > 0 && !(p.rightChar(0) == close || p.rightChar(0) == '-') {
925 return nil, p.getErr(ErrInvalidGroupName)
926 }
927 } else if ch == '-' {
928 proceed = true
929 } else {
930 // bad group name - starts with something other than a word character and isn't a number
931 return nil, p.getErr(ErrInvalidGroupName)
932 }
933
934 // grab part after - if any
935
936 if (capnum != -1 || proceed == true) && p.charsRight() > 0 && p.rightChar(0) == '-' {
937 p.moveRight(1)
938
939 //no more chars left, no closing char, etc
940 if p.charsRight() == 0 {
941 return nil, p.getErr(ErrInvalidGroupName)
942 }
943
944 ch = p.rightChar(0)
945 if ch >= '0' && ch <= '9' {
946 if uncapnum, err = p.scanDecimal(); err != nil {
947 return nil, err
948 }
949
950 if !p.isCaptureSlot(uncapnum) {
951 return nil, p.getErr(ErrUndefinedBackRef, uncapnum)
952 }
953
954 // check if we have bogus characters after the number
955 if p.charsRight() > 0 && p.rightChar(0) != close {
956 return nil, p.getErr(ErrInvalidGroupName)
957 }
958 } else if IsWordChar(ch) {
959 uncapname := p.scanCapname()
960
961 if !p.isCaptureName(uncapname) {
962 return nil, p.getErr(ErrUndefinedNameRef, uncapname)
963 }
964 uncapnum = p.captureSlotFromName(uncapname)
965
966 // check if we have bogus character after the name
967 if p.charsRight() > 0 && p.rightChar(0) != close {
968 return nil, p.getErr(ErrInvalidGroupName)
969 }
970 } else {
971 // bad group name - starts with something other than a word character and isn't a number
972 return nil, p.getErr(ErrInvalidGroupName)
973 }
974 }
975
976 // actually make the node
977
978 if (capnum != -1 || uncapnum != -1) && p.charsRight() > 0 && p.moveRightGetChar() == close {
979 return newRegexNodeMN(ntCapture, p.options, capnum, uncapnum), nil
980 }
981 goto BreakRecognize
982 }
983
984 case '(':
985 // alternation construct (?(...) | )
986
987 parenPos := p.textpos()
988 if p.charsRight() > 0 {
989 ch = p.rightChar(0)
990
991 // check if the alternation condition is a backref
992 if ch >= '0' && ch <= '9' {
993 var capnum int
994 if capnum, err = p.scanDecimal(); err != nil {
995 return nil, err
996 }
997 if p.charsRight() > 0 && p.moveRightGetChar() == ')' {
998 if p.isCaptureSlot(capnum) {
999 return newRegexNodeM(ntTestref, p.options, capnum), nil
1000 }
1001 return nil, p.getErr(ErrUndefinedReference, capnum)
1002 }
1003
1004 return nil, p.getErr(ErrMalformedReference, capnum)
1005
1006 } else if IsWordChar(ch) {
1007 capname := p.scanCapname()
1008
1009 if p.isCaptureName(capname) && p.charsRight() > 0 && p.moveRightGetChar() == ')' {
1010 return newRegexNodeM(ntTestref, p.options, p.captureSlotFromName(capname)), nil
1011 }
1012 }
1013 }
1014 // not a backref
1015 nt = ntTestgroup
1016 p.textto(parenPos - 1) // jump to the start of the parentheses
1017 p.ignoreNextParen = true // but make sure we don't try to capture the insides
1018
1019 charsRight := p.charsRight()
1020 if charsRight >= 3 && p.rightChar(1) == '?' {
1021 rightchar2 := p.rightChar(2)
1022 // disallow comments in the condition
1023 if rightchar2 == '#' {
1024 return nil, p.getErr(ErrAlternationCantHaveComment)
1025 }
1026
1027 // disallow named capture group (?<..>..) in the condition
1028 if rightchar2 == '\'' {
1029 return nil, p.getErr(ErrAlternationCantCapture)
1030 }
1031
1032 if charsRight >= 4 && (rightchar2 == '<' && p.rightChar(3) != '!' && p.rightChar(3) != '=') {
1033 return nil, p.getErr(ErrAlternationCantCapture)
1034 }
1035 }
1036
1037 case 'P':
1038 if p.useRE2() {
1039 // support for P<name> syntax
1040 if p.charsRight() < 3 {
1041 goto BreakRecognize
1042 }
1043
1044 ch = p.moveRightGetChar()
1045 if ch != '<' {
1046 goto BreakRecognize
1047 }
1048
1049 ch = p.moveRightGetChar()
1050 p.moveLeft()
1051
1052 if IsWordChar(ch) {
1053 capnum := -1
1054 capname := p.scanCapname()
1055
1056 if p.isCaptureName(capname) {
1057 capnum = p.captureSlotFromName(capname)
1058 }
1059
1060 // check if we have bogus character after the name
1061 if p.charsRight() > 0 && p.rightChar(0) != '>' {
1062 return nil, p.getErr(ErrInvalidGroupName)
1063 }
1064
1065 // actually make the node
1066
1067 if capnum != -1 && p.charsRight() > 0 && p.moveRightGetChar() == '>' {
1068 return newRegexNodeMN(ntCapture, p.options, capnum, -1), nil
1069 }
1070 goto BreakRecognize
1071
1072 } else {
1073 // bad group name - starts with something other than a word character and isn't a number
1074 return nil, p.getErr(ErrInvalidGroupName)
1075 }
1076 }
1077 // if we're not using RE2 compat mode then
1078 // we just behave like normal
1079 fallthrough
1080
1081 default:
1082 p.moveLeft()
1083
1084 nt = ntGroup
1085 // disallow options in the children of a testgroup node
1086 if p.group.t != ntTestgroup {
1087 p.scanOptions()
1088 }
1089 if p.charsRight() == 0 {
1090 goto BreakRecognize
1091 }
1092
1093 if ch = p.moveRightGetChar(); ch == ')' {
1094 return nil, nil
1095 }
1096
1097 if ch != ':' {
1098 goto BreakRecognize
1099 }
1100
1101 }
1102
1103 return newRegexNode(nt, p.options), nil
1104 }
1105
1106BreakRecognize:
1107
1108 // break Recognize comes here
1109
1110 return nil, p.getErr(ErrUnrecognizedGrouping, string(p.pattern[start:p.textpos()]))
1111}
1112
1113// scans backslash specials and basics
1114func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {
1115
1116 if p.charsRight() == 0 {
1117 return nil, p.getErr(ErrIllegalEndEscape)
1118 }
1119
1120 switch ch := p.rightChar(0); ch {
1121 case 'b', 'B', 'A', 'G', 'Z', 'z':
1122 p.moveRight(1)
1123 return newRegexNode(p.typeFromCode(ch), p.options), nil
1124
1125 case 'w':
1126 p.moveRight(1)
1127 if p.useOptionE() || p.useRE2() {
1128 return newRegexNodeSet(ntSet, p.options, ECMAWordClass()), nil
1129 }
1130 return newRegexNodeSet(ntSet, p.options, WordClass()), nil
1131
1132 case 'W':
1133 p.moveRight(1)
1134 if p.useOptionE() || p.useRE2() {
1135 return newRegexNodeSet(ntSet, p.options, NotECMAWordClass()), nil
1136 }
1137 return newRegexNodeSet(ntSet, p.options, NotWordClass()), nil
1138
1139 case 's':
1140 p.moveRight(1)
1141 if p.useOptionE() {
1142 return newRegexNodeSet(ntSet, p.options, ECMASpaceClass()), nil
1143 } else if p.useRE2() {
1144 return newRegexNodeSet(ntSet, p.options, RE2SpaceClass()), nil
1145 }
1146 return newRegexNodeSet(ntSet, p.options, SpaceClass()), nil
1147
1148 case 'S':
1149 p.moveRight(1)
1150 if p.useOptionE() {
1151 return newRegexNodeSet(ntSet, p.options, NotECMASpaceClass()), nil
1152 } else if p.useRE2() {
1153 return newRegexNodeSet(ntSet, p.options, NotRE2SpaceClass()), nil
1154 }
1155 return newRegexNodeSet(ntSet, p.options, NotSpaceClass()), nil
1156
1157 case 'd':
1158 p.moveRight(1)
1159 if p.useOptionE() || p.useRE2() {
1160 return newRegexNodeSet(ntSet, p.options, ECMADigitClass()), nil
1161 }
1162 return newRegexNodeSet(ntSet, p.options, DigitClass()), nil
1163
1164 case 'D':
1165 p.moveRight(1)
1166 if p.useOptionE() || p.useRE2() {
1167 return newRegexNodeSet(ntSet, p.options, NotECMADigitClass()), nil
1168 }
1169 return newRegexNodeSet(ntSet, p.options, NotDigitClass()), nil
1170
1171 case 'p', 'P':
1172 p.moveRight(1)
1173 prop, err := p.parseProperty()
1174 if err != nil {
1175 return nil, err
1176 }
1177 cc := &CharSet{}
1178 cc.addCategory(prop, (ch != 'p'), p.useOptionI(), p.patternRaw)
1179 if p.useOptionI() {
1180 cc.addLowercase()
1181 }
1182
1183 return newRegexNodeSet(ntSet, p.options, cc), nil
1184
1185 default:
1186 return p.scanBasicBackslash(scanOnly)
1187 }
1188}
1189
1190// Scans \-style backreferences and character escapes
1191func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
1192 if p.charsRight() == 0 {
1193 return nil, p.getErr(ErrIllegalEndEscape)
1194 }
1195 angled := false
1196 k := false
1197 close := '\x00'
1198
1199 backpos := p.textpos()
1200 ch := p.rightChar(0)
1201
1202 // Allow \k<foo> instead of \<foo>, which is now deprecated.
1203
1204 // According to ECMAScript specification, \k<name> is only parsed as a named group reference if
1205 // there is at least one group name in the regexp.
1206 // See https://www.ecma-international.org/ecma-262/#sec-isvalidregularexpressionliteral, step 7.
1207 // Note, during the first (scanOnly) run we may not have all group names scanned, but that's ok.
1208 if ch == 'k' && (!p.useOptionE() || len(p.capnames) > 0) {
1209 if p.charsRight() >= 2 {
1210 p.moveRight(1)
1211 ch = p.moveRightGetChar()
1212
1213 if ch == '<' || (!p.useOptionE() && ch == '\'') { // No support for \k'name' in ECMAScript
1214 angled = true
1215 if ch == '\'' {
1216 close = '\''
1217 } else {
1218 close = '>'
1219 }
1220 }
1221 }
1222
1223 if !angled || p.charsRight() <= 0 {
1224 return nil, p.getErr(ErrMalformedNameRef)
1225 }
1226
1227 ch = p.rightChar(0)
1228 k = true
1229
1230 } else if !p.useOptionE() && (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
1231 angled = true
1232 if ch == '\'' {
1233 close = '\''
1234 } else {
1235 close = '>'
1236 }
1237
1238 p.moveRight(1)
1239 ch = p.rightChar(0)
1240 }
1241
1242 // Try to parse backreference: \<1> or \<cap>
1243
1244 if angled && ch >= '0' && ch <= '9' {
1245 capnum, err := p.scanDecimal()
1246 if err != nil {
1247 return nil, err
1248 }
1249
1250 if p.charsRight() > 0 && p.moveRightGetChar() == close {
1251 if p.isCaptureSlot(capnum) {
1252 return newRegexNodeM(ntRef, p.options, capnum), nil
1253 }
1254 return nil, p.getErr(ErrUndefinedBackRef, capnum)
1255 }
1256 } else if !angled && ch >= '1' && ch <= '9' { // Try to parse backreference or octal: \1
1257 capnum, err := p.scanDecimal()
1258 if err != nil {
1259 return nil, err
1260 }
1261
1262 if scanOnly {
1263 return nil, nil
1264 }
1265
1266 if p.isCaptureSlot(capnum) {
1267 return newRegexNodeM(ntRef, p.options, capnum), nil
1268 }
1269 if capnum <= 9 && !p.useOptionE() {
1270 return nil, p.getErr(ErrUndefinedBackRef, capnum)
1271 }
1272
1273 } else if angled {
1274 capname := p.scanCapname()
1275
1276 if capname != "" && p.charsRight() > 0 && p.moveRightGetChar() == close {
1277
1278 if scanOnly {
1279 return nil, nil
1280 }
1281
1282 if p.isCaptureName(capname) {
1283 return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
1284 }
1285 return nil, p.getErr(ErrUndefinedNameRef, capname)
1286 } else {
1287 if k {
1288 return nil, p.getErr(ErrMalformedNameRef)
1289 }
1290 }
1291 }
1292
1293 // Not backreference: must be char code
1294
1295 p.textto(backpos)
1296 ch, err := p.scanCharEscape()
1297 if err != nil {
1298 return nil, err
1299 }
1300
1301 if scanOnly {
1302 return nil, nil
1303 }
1304
1305 if p.useOptionI() {
1306 ch = unicode.ToLower(ch)
1307 }
1308
1309 return newRegexNodeCh(ntOne, p.options, ch), nil
1310}
1311
1312// Scans X for \p{X} or \P{X}
1313func (p *parser) parseProperty() (string, error) {
1314 if p.charsRight() < 3 {
1315 return "", p.getErr(ErrIncompleteSlashP)
1316 }
1317 ch := p.moveRightGetChar()
1318 if ch != '{' {
1319 return "", p.getErr(ErrMalformedSlashP)
1320 }
1321
1322 startpos := p.textpos()
1323 for p.charsRight() > 0 {
1324 ch = p.moveRightGetChar()
1325 if !(IsWordChar(ch) || ch == '-') {
1326 p.moveLeft()
1327 break
1328 }
1329 }
1330 capname := string(p.pattern[startpos:p.textpos()])
1331
1332 if p.charsRight() == 0 || p.moveRightGetChar() != '}' {
1333 return "", p.getErr(ErrIncompleteSlashP)
1334 }
1335
1336 if !isValidUnicodeCat(capname) {
1337 return "", p.getErr(ErrUnknownSlashP, capname)
1338 }
1339
1340 return capname, nil
1341}
1342
1343// Returns ReNode type for zero-length assertions with a \ code.
1344func (p *parser) typeFromCode(ch rune) nodeType {
1345 switch ch {
1346 case 'b':
1347 if p.useOptionE() {
1348 return ntECMABoundary
1349 }
1350 return ntBoundary
1351 case 'B':
1352 if p.useOptionE() {
1353 return ntNonECMABoundary
1354 }
1355 return ntNonboundary
1356 case 'A':
1357 return ntBeginning
1358 case 'G':
1359 return ntStart
1360 case 'Z':
1361 return ntEndZ
1362 case 'z':
1363 return ntEnd
1364 default:
1365 return ntNothing
1366 }
1367}
1368
1369// Scans whitespace or x-mode comments.
1370func (p *parser) scanBlank() error {
1371 if p.useOptionX() {
1372 for {
1373 for p.charsRight() > 0 && isSpace(p.rightChar(0)) {
1374 p.moveRight(1)
1375 }
1376
1377 if p.charsRight() == 0 {
1378 break
1379 }
1380
1381 if p.rightChar(0) == '#' {
1382 for p.charsRight() > 0 && p.rightChar(0) != '\n' {
1383 p.moveRight(1)
1384 }
1385 } else if p.charsRight() >= 3 && p.rightChar(2) == '#' &&
1386 p.rightChar(1) == '?' && p.rightChar(0) == '(' {
1387 for p.charsRight() > 0 && p.rightChar(0) != ')' {
1388 p.moveRight(1)
1389 }
1390 if p.charsRight() == 0 {
1391 return p.getErr(ErrUnterminatedComment)
1392 }
1393 p.moveRight(1)
1394 } else {
1395 break
1396 }
1397 }
1398 } else {
1399 for {
1400 if p.charsRight() < 3 || p.rightChar(2) != '#' ||
1401 p.rightChar(1) != '?' || p.rightChar(0) != '(' {
1402 return nil
1403 }
1404
1405 for p.charsRight() > 0 && p.rightChar(0) != ')' {
1406 p.moveRight(1)
1407 }
1408 if p.charsRight() == 0 {
1409 return p.getErr(ErrUnterminatedComment)
1410 }
1411 p.moveRight(1)
1412 }
1413 }
1414 return nil
1415}
1416
1417func (p *parser) scanCapname() string {
1418 startpos := p.textpos()
1419
1420 for p.charsRight() > 0 {
1421 if !IsWordChar(p.moveRightGetChar()) {
1422 p.moveLeft()
1423 break
1424 }
1425 }
1426
1427 return string(p.pattern[startpos:p.textpos()])
1428}
1429
1430//Scans contents of [] (not including []'s), and converts to a set.
1431func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
1432 ch := '\x00'
1433 chPrev := '\x00'
1434 inRange := false
1435 firstChar := true
1436 closed := false
1437
1438 var cc *CharSet
1439 if !scanOnly {
1440 cc = &CharSet{}
1441 }
1442
1443 if p.charsRight() > 0 && p.rightChar(0) == '^' {
1444 p.moveRight(1)
1445 if !scanOnly {
1446 cc.negate = true
1447 }
1448 }
1449
1450 for ; p.charsRight() > 0; firstChar = false {
1451 fTranslatedChar := false
1452 ch = p.moveRightGetChar()
1453 if ch == ']' {
1454 if !firstChar {
1455 closed = true
1456 break
1457 } else if p.useOptionE() {
1458 if !scanOnly {
1459 cc.addRanges(NoneClass().ranges)
1460 }
1461 closed = true
1462 break
1463 }
1464
1465 } else if ch == '\\' && p.charsRight() > 0 {
1466 switch ch = p.moveRightGetChar(); ch {
1467 case 'D', 'd':
1468 if !scanOnly {
1469 if inRange {
1470 return nil, p.getErr(ErrBadClassInCharRange, ch)
1471 }
1472 cc.addDigit(p.useOptionE() || p.useRE2(), ch == 'D', p.patternRaw)
1473 }
1474 continue
1475
1476 case 'S', 's':
1477 if !scanOnly {
1478 if inRange {
1479 return nil, p.getErr(ErrBadClassInCharRange, ch)
1480 }
1481 cc.addSpace(p.useOptionE(), p.useRE2(), ch == 'S')
1482 }
1483 continue
1484
1485 case 'W', 'w':
1486 if !scanOnly {
1487 if inRange {
1488 return nil, p.getErr(ErrBadClassInCharRange, ch)
1489 }
1490
1491 cc.addWord(p.useOptionE() || p.useRE2(), ch == 'W')
1492 }
1493 continue
1494
1495 case 'p', 'P':
1496 if !scanOnly {
1497 if inRange {
1498 return nil, p.getErr(ErrBadClassInCharRange, ch)
1499 }
1500 prop, err := p.parseProperty()
1501 if err != nil {
1502 return nil, err
1503 }
1504 cc.addCategory(prop, (ch != 'p'), caseInsensitive, p.patternRaw)
1505 } else {
1506 p.parseProperty()
1507 }
1508
1509 continue
1510
1511 case '-':
1512 if !scanOnly {
1513 cc.addRange(ch, ch)
1514 }
1515 continue
1516
1517 default:
1518 p.moveLeft()
1519 var err error
1520 ch, err = p.scanCharEscape() // non-literal character
1521 if err != nil {
1522 return nil, err
1523 }
1524 fTranslatedChar = true
1525 break // this break will only break out of the switch
1526 }
1527 } else if ch == '[' {
1528 // This is code for Posix style properties - [:Ll:] or [:IsTibetan:].
1529 // It currently doesn't do anything other than skip the whole thing!
1530 if p.charsRight() > 0 && p.rightChar(0) == ':' && !inRange {
1531 savePos := p.textpos()
1532
1533 p.moveRight(1)
1534 negate := false
1535 if p.charsRight() > 1 && p.rightChar(0) == '^' {
1536 negate = true
1537 p.moveRight(1)
1538 }
1539
1540 nm := p.scanCapname() // snag the name
1541 if !scanOnly && p.useRE2() {
1542 // look up the name since these are valid for RE2
1543 // add the group based on the name
1544 if ok := cc.addNamedASCII(nm, negate); !ok {
1545 return nil, p.getErr(ErrInvalidCharRange)
1546 }
1547 }
1548 if p.charsRight() < 2 || p.moveRightGetChar() != ':' || p.moveRightGetChar() != ']' {
1549 p.textto(savePos)
1550 } else if p.useRE2() {
1551 // move on
1552 continue
1553 }
1554 }
1555 }
1556
1557 if inRange {
1558 inRange = false
1559 if !scanOnly {
1560 if ch == '[' && !fTranslatedChar && !firstChar {
1561 // We thought we were in a range, but we're actually starting a subtraction.
1562 // In that case, we'll add chPrev to our char class, skip the opening [, and
1563 // scan the new character class recursively.
1564 cc.addChar(chPrev)
1565 sub, err := p.scanCharSet(caseInsensitive, false)
1566 if err != nil {
1567 return nil, err
1568 }
1569 cc.addSubtraction(sub)
1570
1571 if p.charsRight() > 0 && p.rightChar(0) != ']' {
1572 return nil, p.getErr(ErrSubtractionMustBeLast)
1573 }
1574 } else {
1575 // a regular range, like a-z
1576 if chPrev > ch {
1577 return nil, p.getErr(ErrReversedCharRange, chPrev, ch)
1578 }
1579 cc.addRange(chPrev, ch)
1580 }
1581 }
1582 } else if p.charsRight() >= 2 && p.rightChar(0) == '-' && p.rightChar(1) != ']' {
1583 // this could be the start of a range
1584 chPrev = ch
1585 inRange = true
1586 p.moveRight(1)
1587 } else if p.charsRight() >= 1 && ch == '-' && !fTranslatedChar && p.rightChar(0) == '[' && !firstChar {
1588 // we aren't in a range, and now there is a subtraction. Usually this happens
1589 // only when a subtraction follows a range, like [a-z-[b]]
1590 if !scanOnly {
1591 p.moveRight(1)
1592 sub, err := p.scanCharSet(caseInsensitive, false)
1593 if err != nil {
1594 return nil, err
1595 }
1596 cc.addSubtraction(sub)
1597
1598 if p.charsRight() > 0 && p.rightChar(0) != ']' {
1599 return nil, p.getErr(ErrSubtractionMustBeLast)
1600 }
1601 } else {
1602 p.moveRight(1)
1603 p.scanCharSet(caseInsensitive, true)
1604 }
1605 } else {
1606 if !scanOnly {
1607 cc.addRange(ch, ch)
1608 }
1609 }
1610 }
1611
1612 if !closed {
1613 return nil, p.getErr(ErrUnterminatedBracket)
1614 }
1615
1616 if !scanOnly && caseInsensitive {
1617 cc.addLowercase()
1618 }
1619
1620 return cc, nil
1621}
1622
1623// Scans any number of decimal digits (pegs value at 2^31-1 if too large)
1624func (p *parser) scanDecimal() (int, error) {
1625 i := 0
1626 var d int
1627
1628 for p.charsRight() > 0 {
1629 d = int(p.rightChar(0) - '0')
1630 if d < 0 || d > 9 {
1631 break
1632 }
1633 p.moveRight(1)
1634
1635 if i > maxValueDiv10 || (i == maxValueDiv10 && d > maxValueMod10) {
1636 return 0, p.getErr(ErrCaptureGroupOutOfRange)
1637 }
1638
1639 i *= 10
1640 i += d
1641 }
1642
1643 return int(i), nil
1644}
1645
1646// Returns true for options allowed only at the top level
1647func isOnlyTopOption(option RegexOptions) bool {
1648 return option == RightToLeft || option == ECMAScript || option == RE2
1649}
1650
1651// Scans cimsx-cimsx option string, stops at the first unrecognized char.
1652func (p *parser) scanOptions() {
1653
1654 for off := false; p.charsRight() > 0; p.moveRight(1) {
1655 ch := p.rightChar(0)
1656
1657 if ch == '-' {
1658 off = true
1659 } else if ch == '+' {
1660 off = false
1661 } else {
1662 option := optionFromCode(ch)
1663 if option == 0 || isOnlyTopOption(option) {
1664 return
1665 }
1666
1667 if off {
1668 p.options &= ^option
1669 } else {
1670 p.options |= option
1671 }
1672 }
1673 }
1674}
1675
1676// Scans \ code for escape codes that map to single unicode chars.
1677func (p *parser) scanCharEscape() (r rune, err error) {
1678
1679 ch := p.moveRightGetChar()
1680
1681 if ch >= '0' && ch <= '7' {
1682 p.moveLeft()
1683 return p.scanOctal(), nil
1684 }
1685
1686 pos := p.textpos()
1687
1688 switch ch {
1689 case 'x':
1690 // support for \x{HEX} syntax from Perl and PCRE
1691 if p.charsRight() > 0 && p.rightChar(0) == '{' {
1692 if p.useOptionE() {
1693 return ch, nil
1694 }
1695 p.moveRight(1)
1696 return p.scanHexUntilBrace()
1697 } else {
1698 r, err = p.scanHex(2)
1699 }
1700 case 'u':
1701 // ECMAscript suppot \u{HEX} only if `u` is also set
1702 if p.useOptionE() && p.useOptionU() && p.charsRight() > 0 && p.rightChar(0) == '{' {
1703 p.moveRight(1)
1704 return p.scanHexUntilBrace()
1705 } else {
1706 r, err = p.scanHex(4)
1707 }
1708 case 'a':
1709 return '\u0007', nil
1710 case 'b':
1711 return '\b', nil
1712 case 'e':
1713 return '\u001B', nil
1714 case 'f':
1715 return '\f', nil
1716 case 'n':
1717 return '\n', nil
1718 case 'r':
1719 return '\r', nil
1720 case 't':
1721 return '\t', nil
1722 case 'v':
1723 return '\u000B', nil
1724 case 'c':
1725 r, err = p.scanControl()
1726 default:
1727 if !p.useOptionE() && !p.useRE2() && IsWordChar(ch) {
1728 return 0, p.getErr(ErrUnrecognizedEscape, string(ch))
1729 }
1730 return ch, nil
1731 }
1732 if err != nil && p.useOptionE() {
1733 p.textto(pos)
1734 return ch, nil
1735 }
1736 return
1737}
1738
1739// Grabs and converts an ascii control character
1740func (p *parser) scanControl() (rune, error) {
1741 if p.charsRight() <= 0 {
1742 return 0, p.getErr(ErrMissingControl)
1743 }
1744
1745 ch := p.moveRightGetChar()
1746
1747 // \ca interpreted as \cA
1748
1749 if ch >= 'a' && ch <= 'z' {
1750 ch = (ch - ('a' - 'A'))
1751 }
1752 ch = (ch - '@')
1753 if ch >= 0 && ch < ' ' {
1754 return ch, nil
1755 }
1756
1757 return 0, p.getErr(ErrUnrecognizedControl)
1758
1759}
1760
1761// Scan hex digits until we hit a closing brace.
1762// Non-hex digits, hex value too large for UTF-8, or running out of chars are errors
1763func (p *parser) scanHexUntilBrace() (rune, error) {
1764 // PCRE spec reads like unlimited hex digits are allowed, but unicode has a limit
1765 // so we can enforce that
1766 i := 0
1767 hasContent := false
1768
1769 for p.charsRight() > 0 {
1770 ch := p.moveRightGetChar()
1771 if ch == '}' {
1772 // hit our close brace, we're done here
1773 // prevent \x{}
1774 if !hasContent {
1775 return 0, p.getErr(ErrTooFewHex)
1776 }
1777 return rune(i), nil
1778 }
1779 hasContent = true
1780 // no brace needs to be hex digit
1781 d := hexDigit(ch)
1782 if d < 0 {
1783 return 0, p.getErr(ErrMissingBrace)
1784 }
1785
1786 i *= 0x10
1787 i += d
1788
1789 if i > unicode.MaxRune {
1790 return 0, p.getErr(ErrInvalidHex)
1791 }
1792 }
1793
1794 // we only make it here if we run out of digits without finding the brace
1795 return 0, p.getErr(ErrMissingBrace)
1796}
1797
1798// Scans exactly c hex digits (c=2 for \xFF, c=4 for \uFFFF)
1799func (p *parser) scanHex(c int) (rune, error) {
1800
1801 i := 0
1802
1803 if p.charsRight() >= c {
1804 for c > 0 {
1805 d := hexDigit(p.moveRightGetChar())
1806 if d < 0 {
1807 break
1808 }
1809 i *= 0x10
1810 i += d
1811 c--
1812 }
1813 }
1814
1815 if c > 0 {
1816 return 0, p.getErr(ErrTooFewHex)
1817 }
1818
1819 return rune(i), nil
1820}
1821
1822// Returns n <= 0xF for a hex digit.
1823func hexDigit(ch rune) int {
1824
1825 if d := uint(ch - '0'); d <= 9 {
1826 return int(d)
1827 }
1828
1829 if d := uint(ch - 'a'); d <= 5 {
1830 return int(d + 0xa)
1831 }
1832
1833 if d := uint(ch - 'A'); d <= 5 {
1834 return int(d + 0xa)
1835 }
1836
1837 return -1
1838}
1839
1840// Scans up to three octal digits (stops before exceeding 0377).
1841func (p *parser) scanOctal() rune {
1842 // Consume octal chars only up to 3 digits and value 0377
1843
1844 c := 3
1845
1846 if c > p.charsRight() {
1847 c = p.charsRight()
1848 }
1849
1850 //we know the first char is good because the caller had to check
1851 i := 0
1852 d := int(p.rightChar(0) - '0')
1853 for c > 0 && d <= 7 && d >= 0 {
1854 if i >= 0x20 && p.useOptionE() {
1855 break
1856 }
1857 i *= 8
1858 i += d
1859 c--
1860
1861 p.moveRight(1)
1862 if !p.rightMost() {
1863 d = int(p.rightChar(0) - '0')
1864 }
1865 }
1866
1867 // Octal codes only go up to 255. Any larger and the behavior that Perl follows
1868 // is simply to truncate the high bits.
1869 i &= 0xFF
1870
1871 return rune(i)
1872}
1873
1874// Returns the current parsing position.
1875func (p *parser) textpos() int {
1876 return p.currentPos
1877}
1878
1879// Zaps to a specific parsing position.
1880func (p *parser) textto(pos int) {
1881 p.currentPos = pos
1882}
1883
1884// Returns the char at the right of the current parsing position and advances to the right.
1885func (p *parser) moveRightGetChar() rune {
1886 ch := p.pattern[p.currentPos]
1887 p.currentPos++
1888 return ch
1889}
1890
1891// Moves the current position to the right.
1892func (p *parser) moveRight(i int) {
1893 // default would be 1
1894 p.currentPos += i
1895}
1896
1897// Moves the current parsing position one to the left.
1898func (p *parser) moveLeft() {
1899 p.currentPos--
1900}
1901
1902// Returns the char left of the current parsing position.
1903func (p *parser) charAt(i int) rune {
1904 return p.pattern[i]
1905}
1906
1907// Returns the char i chars right of the current parsing position.
1908func (p *parser) rightChar(i int) rune {
1909 // default would be 0
1910 return p.pattern[p.currentPos+i]
1911}
1912
1913// Number of characters to the right of the current parsing position.
1914func (p *parser) charsRight() int {
1915 return len(p.pattern) - p.currentPos
1916}
1917
1918func (p *parser) rightMost() bool {
1919 return p.currentPos == len(p.pattern)
1920}
1921
1922// Looks up the slot number for a given name
1923func (p *parser) captureSlotFromName(capname string) int {
1924 return p.capnames[capname]
1925}
1926
1927// True if the capture slot was noted
1928func (p *parser) isCaptureSlot(i int) bool {
1929 if p.caps != nil {
1930 _, ok := p.caps[i]
1931 return ok
1932 }
1933
1934 return (i >= 0 && i < p.capsize)
1935}
1936
1937// Looks up the slot number for a given name
1938func (p *parser) isCaptureName(capname string) bool {
1939 if p.capnames == nil {
1940 return false
1941 }
1942
1943 _, ok := p.capnames[capname]
1944 return ok
1945}
1946
1947// option shortcuts
1948
1949// True if N option disabling '(' autocapture is on.
1950func (p *parser) useOptionN() bool {
1951 return (p.options & ExplicitCapture) != 0
1952}
1953
1954// True if I option enabling case-insensitivity is on.
1955func (p *parser) useOptionI() bool {
1956 return (p.options & IgnoreCase) != 0
1957}
1958
1959// True if M option altering meaning of $ and ^ is on.
1960func (p *parser) useOptionM() bool {
1961 return (p.options & Multiline) != 0
1962}
1963
1964// True if S option altering meaning of . is on.
1965func (p *parser) useOptionS() bool {
1966 return (p.options & Singleline) != 0
1967}
1968
1969// True if X option enabling whitespace/comment mode is on.
1970func (p *parser) useOptionX() bool {
1971 return (p.options & IgnorePatternWhitespace) != 0
1972}
1973
1974// True if E option enabling ECMAScript behavior on.
1975func (p *parser) useOptionE() bool {
1976 return (p.options & ECMAScript) != 0
1977}
1978
1979// true to use RE2 compatibility parsing behavior.
1980func (p *parser) useRE2() bool {
1981 return (p.options & RE2) != 0
1982}
1983
1984// True if U option enabling ECMAScript's Unicode behavior on.
1985func (p *parser) useOptionU() bool {
1986 return (p.options & Unicode) != 0
1987}
1988
1989// True if options stack is empty.
1990func (p *parser) emptyOptionsStack() bool {
1991 return len(p.optionsStack) == 0
1992}
1993
1994// Finish the current quantifiable (when a quantifier is not found or is not possible)
1995func (p *parser) addConcatenate() {
1996 // The first (| inside a Testgroup group goes directly to the group
1997 p.concatenation.addChild(p.unit)
1998 p.unit = nil
1999}
2000
2001// Finish the current quantifiable (when a quantifier is found)
2002func (p *parser) addConcatenate3(lazy bool, min, max int) {
2003 p.concatenation.addChild(p.unit.makeQuantifier(lazy, min, max))
2004 p.unit = nil
2005}
2006
2007// Sets the current unit to a single char node
2008func (p *parser) addUnitOne(ch rune) {
2009 if p.useOptionI() {
2010 ch = unicode.ToLower(ch)
2011 }
2012
2013 p.unit = newRegexNodeCh(ntOne, p.options, ch)
2014}
2015
2016// Sets the current unit to a single inverse-char node
2017func (p *parser) addUnitNotone(ch rune) {
2018 if p.useOptionI() {
2019 ch = unicode.ToLower(ch)
2020 }
2021
2022 p.unit = newRegexNodeCh(ntNotone, p.options, ch)
2023}
2024
2025// Sets the current unit to a single set node
2026func (p *parser) addUnitSet(set *CharSet) {
2027 p.unit = newRegexNodeSet(ntSet, p.options, set)
2028}
2029
2030// Sets the current unit to a subtree
2031func (p *parser) addUnitNode(node *regexNode) {
2032 p.unit = node
2033}
2034
2035// Sets the current unit to an assertion of the specified type
2036func (p *parser) addUnitType(t nodeType) {
2037 p.unit = newRegexNode(t, p.options)
2038}
2039
2040// Finish the current group (in response to a ')' or end)
2041func (p *parser) addGroup() error {
2042 if p.group.t == ntTestgroup || p.group.t == ntTestref {
2043 p.group.addChild(p.concatenation.reverseLeft())
2044 if (p.group.t == ntTestref && len(p.group.children) > 2) || len(p.group.children) > 3 {
2045 return p.getErr(ErrTooManyAlternates)
2046 }
2047 } else {
2048 p.alternation.addChild(p.concatenation.reverseLeft())
2049 p.group.addChild(p.alternation)
2050 }
2051
2052 p.unit = p.group
2053 return nil
2054}
2055
2056// Pops the option stack, but keeps the current options unchanged.
2057func (p *parser) popKeepOptions() {
2058 lastIdx := len(p.optionsStack) - 1
2059 p.optionsStack = p.optionsStack[:lastIdx]
2060}
2061
2062// Recalls options from the stack.
2063func (p *parser) popOptions() {
2064 lastIdx := len(p.optionsStack) - 1
2065 // get the last item on the stack and then remove it by reslicing
2066 p.options = p.optionsStack[lastIdx]
2067 p.optionsStack = p.optionsStack[:lastIdx]
2068}
2069
2070// Saves options on a stack.
2071func (p *parser) pushOptions() {
2072 p.optionsStack = append(p.optionsStack, p.options)
2073}
2074
2075// Add a string to the last concatenate.
2076func (p *parser) addToConcatenate(pos, cch int, isReplacement bool) {
2077 var node *regexNode
2078
2079 if cch == 0 {
2080 return
2081 }
2082
2083 if cch > 1 {
2084 str := make([]rune, cch)
2085 copy(str, p.pattern[pos:pos+cch])
2086
2087 if p.useOptionI() && !isReplacement {
2088 // We do the ToLower character by character for consistency. With surrogate chars, doing
2089 // a ToLower on the entire string could actually change the surrogate pair. This is more correct
2090 // linguistically, but since Regex doesn't support surrogates, it's more important to be
2091 // consistent.
2092 for i := 0; i < len(str); i++ {
2093 str[i] = unicode.ToLower(str[i])
2094 }
2095 }
2096
2097 node = newRegexNodeStr(ntMulti, p.options, str)
2098 } else {
2099 ch := p.charAt(pos)
2100
2101 if p.useOptionI() && !isReplacement {
2102 ch = unicode.ToLower(ch)
2103 }
2104
2105 node = newRegexNodeCh(ntOne, p.options, ch)
2106 }
2107
2108 p.concatenation.addChild(node)
2109}
2110
2111// Push the parser state (in response to an open paren)
2112func (p *parser) pushGroup() {
2113 p.group.next = p.stack
2114 p.alternation.next = p.group
2115 p.concatenation.next = p.alternation
2116 p.stack = p.concatenation
2117}
2118
2119// Remember the pushed state (in response to a ')')
2120func (p *parser) popGroup() error {
2121 p.concatenation = p.stack
2122 p.alternation = p.concatenation.next
2123 p.group = p.alternation.next
2124 p.stack = p.group.next
2125
2126 // The first () inside a Testgroup group goes directly to the group
2127 if p.group.t == ntTestgroup && len(p.group.children) == 0 {
2128 if p.unit == nil {
2129 return p.getErr(ErrConditionalExpression)
2130 }
2131
2132 p.group.addChild(p.unit)
2133 p.unit = nil
2134 }
2135 return nil
2136}
2137
2138// True if the group stack is empty.
2139func (p *parser) emptyStack() bool {
2140 return p.stack == nil
2141}
2142
2143// Start a new round for the parser state (in response to an open paren or string start)
2144func (p *parser) startGroup(openGroup *regexNode) {
2145 p.group = openGroup
2146 p.alternation = newRegexNode(ntAlternate, p.options)
2147 p.concatenation = newRegexNode(ntConcatenate, p.options)
2148}
2149
2150// Finish the current concatenation (in response to a |)
2151func (p *parser) addAlternate() {
2152 // The | parts inside a Testgroup group go directly to the group
2153
2154 if p.group.t == ntTestgroup || p.group.t == ntTestref {
2155 p.group.addChild(p.concatenation.reverseLeft())
2156 } else {
2157 p.alternation.addChild(p.concatenation.reverseLeft())
2158 }
2159
2160 p.concatenation = newRegexNode(ntConcatenate, p.options)
2161}
2162
2163// For categorizing ascii characters.
2164
2165const (
2166 Q byte = 5 // quantifier
2167 S = 4 // ordinary stopper
2168 Z = 3 // ScanBlank stopper
2169 X = 2 // whitespace
2170 E = 1 // should be escaped
2171)
2172
2173var _category = []byte{
2174 //01 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F
2175 0, 0, 0, 0, 0, 0, 0, 0, 0, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2176 // ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
2177 X, 0, 0, Z, S, 0, 0, 0, S, S, Q, Q, 0, 0, S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q,
2178 //@A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _
2179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, S, 0,
2180 //'a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
2181 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q, S, 0, 0, 0,
2182}
2183
2184func isSpace(ch rune) bool {
2185 return (ch <= ' ' && _category[ch] == X)
2186}
2187
2188// Returns true for those characters that terminate a string of ordinary chars.
2189func isSpecial(ch rune) bool {
2190 return (ch <= '|' && _category[ch] >= S)
2191}
2192
2193// Returns true for those characters that terminate a string of ordinary chars.
2194func isStopperX(ch rune) bool {
2195 return (ch <= '|' && _category[ch] >= X)
2196}
2197
2198// Returns true for those characters that begin a quantifier.
2199func isQuantifier(ch rune) bool {
2200 return (ch <= '{' && _category[ch] >= Q)
2201}
2202
2203func (p *parser) isTrueQuantifier() bool {
2204 nChars := p.charsRight()
2205 if nChars == 0 {
2206 return false
2207 }
2208
2209 startpos := p.textpos()
2210 ch := p.charAt(startpos)
2211 if ch != '{' {
2212 return ch <= '{' && _category[ch] >= Q
2213 }
2214
2215 //UGLY: this is ugly -- the original code was ugly too
2216 pos := startpos
2217 for {
2218 nChars--
2219 if nChars <= 0 {
2220 break
2221 }
2222 pos++
2223 ch = p.charAt(pos)
2224 if ch < '0' || ch > '9' {
2225 break
2226 }
2227 }
2228
2229 if nChars == 0 || pos-startpos == 1 {
2230 return false
2231 }
2232 if ch == '}' {
2233 return true
2234 }
2235 if ch != ',' {
2236 return false
2237 }
2238 for {
2239 nChars--
2240 if nChars <= 0 {
2241 break
2242 }
2243 pos++
2244 ch = p.charAt(pos)
2245 if ch < '0' || ch > '9' {
2246 break
2247 }
2248 }
2249
2250 return nChars > 0 && ch == '}'
2251}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/prefix.go b/vendor/github.com/dlclark/regexp2/syntax/prefix.go
new file mode 100644
index 0000000..f671688
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/prefix.go
@@ -0,0 +1,896 @@
1package syntax
2
3import (
4 "bytes"
5 "fmt"
6 "strconv"
7 "unicode"
8 "unicode/utf8"
9)
10
11type Prefix struct {
12 PrefixStr []rune
13 PrefixSet CharSet
14 CaseInsensitive bool
15}
16
17// It takes a RegexTree and computes the set of chars that can start it.
18func getFirstCharsPrefix(tree *RegexTree) *Prefix {
19 s := regexFcd{
20 fcStack: make([]regexFc, 32),
21 intStack: make([]int, 32),
22 }
23 fc := s.regexFCFromRegexTree(tree)
24
25 if fc == nil || fc.nullable || fc.cc.IsEmpty() {
26 return nil
27 }
28 fcSet := fc.getFirstChars()
29 return &Prefix{PrefixSet: fcSet, CaseInsensitive: fc.caseInsensitive}
30}
31
32type regexFcd struct {
33 intStack []int
34 intDepth int
35 fcStack []regexFc
36 fcDepth int
37 skipAllChildren bool // don't process any more children at the current level
38 skipchild bool // don't process the current child.
39 failed bool
40}
41
42/*
43 * The main FC computation. It does a shortcutted depth-first walk
44 * through the tree and calls CalculateFC to emits code before
45 * and after each child of an interior node, and at each leaf.
46 */
47func (s *regexFcd) regexFCFromRegexTree(tree *RegexTree) *regexFc {
48 curNode := tree.root
49 curChild := 0
50
51 for {
52 if len(curNode.children) == 0 {
53 // This is a leaf node
54 s.calculateFC(curNode.t, curNode, 0)
55 } else if curChild < len(curNode.children) && !s.skipAllChildren {
56 // This is an interior node, and we have more children to analyze
57 s.calculateFC(curNode.t|beforeChild, curNode, curChild)
58
59 if !s.skipchild {
60 curNode = curNode.children[curChild]
61 // this stack is how we get a depth first walk of the tree.
62 s.pushInt(curChild)
63 curChild = 0
64 } else {
65 curChild++
66 s.skipchild = false
67 }
68 continue
69 }
70
71 // This is an interior node where we've finished analyzing all the children, or
72 // the end of a leaf node.
73 s.skipAllChildren = false
74
75 if s.intIsEmpty() {
76 break
77 }
78
79 curChild = s.popInt()
80 curNode = curNode.next
81
82 s.calculateFC(curNode.t|afterChild, curNode, curChild)
83 if s.failed {
84 return nil
85 }
86
87 curChild++
88 }
89
90 if s.fcIsEmpty() {
91 return nil
92 }
93
94 return s.popFC()
95}
96
97// To avoid recursion, we use a simple integer stack.
98// This is the push.
99func (s *regexFcd) pushInt(I int) {
100 if s.intDepth >= len(s.intStack) {
101 expanded := make([]int, s.intDepth*2)
102 copy(expanded, s.intStack)
103 s.intStack = expanded
104 }
105
106 s.intStack[s.intDepth] = I
107 s.intDepth++
108}
109
110// True if the stack is empty.
111func (s *regexFcd) intIsEmpty() bool {
112 return s.intDepth == 0
113}
114
115// This is the pop.
116func (s *regexFcd) popInt() int {
117 s.intDepth--
118 return s.intStack[s.intDepth]
119}
120
121// We also use a stack of RegexFC objects.
122// This is the push.
123func (s *regexFcd) pushFC(fc regexFc) {
124 if s.fcDepth >= len(s.fcStack) {
125 expanded := make([]regexFc, s.fcDepth*2)
126 copy(expanded, s.fcStack)
127 s.fcStack = expanded
128 }
129
130 s.fcStack[s.fcDepth] = fc
131 s.fcDepth++
132}
133
134// True if the stack is empty.
135func (s *regexFcd) fcIsEmpty() bool {
136 return s.fcDepth == 0
137}
138
139// This is the pop.
140func (s *regexFcd) popFC() *regexFc {
141 s.fcDepth--
142 return &s.fcStack[s.fcDepth]
143}
144
145// This is the top.
146func (s *regexFcd) topFC() *regexFc {
147 return &s.fcStack[s.fcDepth-1]
148}
149
150// Called in Beforechild to prevent further processing of the current child
151func (s *regexFcd) skipChild() {
152 s.skipchild = true
153}
154
155// FC computation and shortcut cases for each node type
156func (s *regexFcd) calculateFC(nt nodeType, node *regexNode, CurIndex int) {
157 //fmt.Printf("NodeType: %v, CurIndex: %v, Desc: %v\n", nt, CurIndex, node.description())
158 ci := false
159 rtl := false
160
161 if nt <= ntRef {
162 if (node.options & IgnoreCase) != 0 {
163 ci = true
164 }
165 if (node.options & RightToLeft) != 0 {
166 rtl = true
167 }
168 }
169
170 switch nt {
171 case ntConcatenate | beforeChild, ntAlternate | beforeChild, ntTestref | beforeChild, ntLoop | beforeChild, ntLazyloop | beforeChild:
172 break
173
174 case ntTestgroup | beforeChild:
175 if CurIndex == 0 {
176 s.skipChild()
177 }
178 break
179
180 case ntEmpty:
181 s.pushFC(regexFc{nullable: true})
182 break
183
184 case ntConcatenate | afterChild:
185 if CurIndex != 0 {
186 child := s.popFC()
187 cumul := s.topFC()
188
189 s.failed = !cumul.addFC(*child, true)
190 }
191
192 fc := s.topFC()
193 if !fc.nullable {
194 s.skipAllChildren = true
195 }
196 break
197
198 case ntTestgroup | afterChild:
199 if CurIndex > 1 {
200 child := s.popFC()
201 cumul := s.topFC()
202
203 s.failed = !cumul.addFC(*child, false)
204 }
205 break
206
207 case ntAlternate | afterChild, ntTestref | afterChild:
208 if CurIndex != 0 {
209 child := s.popFC()
210 cumul := s.topFC()
211
212 s.failed = !cumul.addFC(*child, false)
213 }
214 break
215
216 case ntLoop | afterChild, ntLazyloop | afterChild:
217 if node.m == 0 {
218 fc := s.topFC()
219 fc.nullable = true
220 }
221 break
222
223 case ntGroup | beforeChild, ntGroup | afterChild, ntCapture | beforeChild, ntCapture | afterChild, ntGreedy | beforeChild, ntGreedy | afterChild:
224 break
225
226 case ntRequire | beforeChild, ntPrevent | beforeChild:
227 s.skipChild()
228 s.pushFC(regexFc{nullable: true})
229 break
230
231 case ntRequire | afterChild, ntPrevent | afterChild:
232 break
233
234 case ntOne, ntNotone:
235 s.pushFC(newRegexFc(node.ch, nt == ntNotone, false, ci))
236 break
237
238 case ntOneloop, ntOnelazy:
239 s.pushFC(newRegexFc(node.ch, false, node.m == 0, ci))
240 break
241
242 case ntNotoneloop, ntNotonelazy:
243 s.pushFC(newRegexFc(node.ch, true, node.m == 0, ci))
244 break
245
246 case ntMulti:
247 if len(node.str) == 0 {
248 s.pushFC(regexFc{nullable: true})
249 } else if !rtl {
250 s.pushFC(newRegexFc(node.str[0], false, false, ci))
251 } else {
252 s.pushFC(newRegexFc(node.str[len(node.str)-1], false, false, ci))
253 }
254 break
255
256 case ntSet:
257 s.pushFC(regexFc{cc: node.set.Copy(), nullable: false, caseInsensitive: ci})
258 break
259
260 case ntSetloop, ntSetlazy:
261 s.pushFC(regexFc{cc: node.set.Copy(), nullable: node.m == 0, caseInsensitive: ci})
262 break
263
264 case ntRef:
265 s.pushFC(regexFc{cc: *AnyClass(), nullable: true, caseInsensitive: false})
266 break
267
268 case ntNothing, ntBol, ntEol, ntBoundary, ntNonboundary, ntECMABoundary, ntNonECMABoundary, ntBeginning, ntStart, ntEndZ, ntEnd:
269 s.pushFC(regexFc{nullable: true})
270 break
271
272 default:
273 panic(fmt.Sprintf("unexpected op code: %v", nt))
274 }
275}
276
277type regexFc struct {
278 cc CharSet
279 nullable bool
280 caseInsensitive bool
281}
282
283func newRegexFc(ch rune, not, nullable, caseInsensitive bool) regexFc {
284 r := regexFc{
285 caseInsensitive: caseInsensitive,
286 nullable: nullable,
287 }
288 if not {
289 if ch > 0 {
290 r.cc.addRange('\x00', ch-1)
291 }
292 if ch < 0xFFFF {
293 r.cc.addRange(ch+1, utf8.MaxRune)
294 }
295 } else {
296 r.cc.addRange(ch, ch)
297 }
298 return r
299}
300
301func (r *regexFc) getFirstChars() CharSet {
302 if r.caseInsensitive {
303 r.cc.addLowercase()
304 }
305
306 return r.cc
307}
308
309func (r *regexFc) addFC(fc regexFc, concatenate bool) bool {
310 if !r.cc.IsMergeable() || !fc.cc.IsMergeable() {
311 return false
312 }
313
314 if concatenate {
315 if !r.nullable {
316 return true
317 }
318
319 if !fc.nullable {
320 r.nullable = false
321 }
322 } else {
323 if fc.nullable {
324 r.nullable = true
325 }
326 }
327
328 r.caseInsensitive = r.caseInsensitive || fc.caseInsensitive
329 r.cc.addSet(fc.cc)
330
331 return true
332}
333
334// This is a related computation: it takes a RegexTree and computes the
335// leading substring if it sees one. It's quite trivial and gives up easily.
336func getPrefix(tree *RegexTree) *Prefix {
337 var concatNode *regexNode
338 nextChild := 0
339
340 curNode := tree.root
341
342 for {
343 switch curNode.t {
344 case ntConcatenate:
345 if len(curNode.children) > 0 {
346 concatNode = curNode
347 nextChild = 0
348 }
349
350 case ntGreedy, ntCapture:
351 curNode = curNode.children[0]
352 concatNode = nil
353 continue
354
355 case ntOneloop, ntOnelazy:
356 if curNode.m > 0 {
357 return &Prefix{
358 PrefixStr: repeat(curNode.ch, curNode.m),
359 CaseInsensitive: (curNode.options & IgnoreCase) != 0,
360 }
361 }
362 return nil
363
364 case ntOne:
365 return &Prefix{
366 PrefixStr: []rune{curNode.ch},
367 CaseInsensitive: (curNode.options & IgnoreCase) != 0,
368 }
369
370 case ntMulti:
371 return &Prefix{
372 PrefixStr: curNode.str,
373 CaseInsensitive: (curNode.options & IgnoreCase) != 0,
374 }
375
376 case ntBol, ntEol, ntBoundary, ntECMABoundary, ntBeginning, ntStart,
377 ntEndZ, ntEnd, ntEmpty, ntRequire, ntPrevent:
378
379 default:
380 return nil
381 }
382
383 if concatNode == nil || nextChild >= len(concatNode.children) {
384 return nil
385 }
386
387 curNode = concatNode.children[nextChild]
388 nextChild++
389 }
390}
391
392// repeat the rune r, c times... up to the max of MaxPrefixSize
393func repeat(r rune, c int) []rune {
394 if c > MaxPrefixSize {
395 c = MaxPrefixSize
396 }
397
398 ret := make([]rune, c)
399
400 // binary growth using copy for speed
401 ret[0] = r
402 bp := 1
403 for bp < len(ret) {
404 copy(ret[bp:], ret[:bp])
405 bp *= 2
406 }
407
408 return ret
409}
410
411// BmPrefix precomputes the Boyer-Moore
412// tables for fast string scanning. These tables allow
413// you to scan for the first occurrence of a string within
414// a large body of text without examining every character.
415// The performance of the heuristic depends on the actual
416// string and the text being searched, but usually, the longer
417// the string that is being searched for, the fewer characters
418// need to be examined.
419type BmPrefix struct {
420 positive []int
421 negativeASCII []int
422 negativeUnicode [][]int
423 pattern []rune
424 lowASCII rune
425 highASCII rune
426 rightToLeft bool
427 caseInsensitive bool
428}
429
430func newBmPrefix(pattern []rune, caseInsensitive, rightToLeft bool) *BmPrefix {
431
432 b := &BmPrefix{
433 rightToLeft: rightToLeft,
434 caseInsensitive: caseInsensitive,
435 pattern: pattern,
436 }
437
438 if caseInsensitive {
439 for i := 0; i < len(b.pattern); i++ {
440 // We do the ToLower character by character for consistency. With surrogate chars, doing
441 // a ToLower on the entire string could actually change the surrogate pair. This is more correct
442 // linguistically, but since Regex doesn't support surrogates, it's more important to be
443 // consistent.
444
445 b.pattern[i] = unicode.ToLower(b.pattern[i])
446 }
447 }
448
449 var beforefirst, last, bump int
450 var scan, match int
451
452 if !rightToLeft {
453 beforefirst = -1
454 last = len(b.pattern) - 1
455 bump = 1
456 } else {
457 beforefirst = len(b.pattern)
458 last = 0
459 bump = -1
460 }
461
462 // PART I - the good-suffix shift table
463 //
464 // compute the positive requirement:
465 // if char "i" is the first one from the right that doesn't match,
466 // then we know the matcher can advance by _positive[i].
467 //
468 // This algorithm is a simplified variant of the standard
469 // Boyer-Moore good suffix calculation.
470
471 b.positive = make([]int, len(b.pattern))
472
473 examine := last
474 ch := b.pattern[examine]
475 b.positive[examine] = bump
476 examine -= bump
477
478Outerloop:
479 for {
480 // find an internal char (examine) that matches the tail
481
482 for {
483 if examine == beforefirst {
484 break Outerloop
485 }
486 if b.pattern[examine] == ch {
487 break
488 }
489 examine -= bump
490 }
491
492 match = last
493 scan = examine
494
495 // find the length of the match
496 for {
497 if scan == beforefirst || b.pattern[match] != b.pattern[scan] {
498 // at the end of the match, note the difference in _positive
499 // this is not the length of the match, but the distance from the internal match
500 // to the tail suffix.
501 if b.positive[match] == 0 {
502 b.positive[match] = match - scan
503 }
504
505 // System.Diagnostics.Debug.WriteLine("Set positive[" + match + "] to " + (match - scan));
506
507 break
508 }
509
510 scan -= bump
511 match -= bump
512 }
513
514 examine -= bump
515 }
516
517 match = last - bump
518
519 // scan for the chars for which there are no shifts that yield a different candidate
520
521 // The inside of the if statement used to say
522 // "_positive[match] = last - beforefirst;"
523 // This is slightly less aggressive in how much we skip, but at worst it
524 // should mean a little more work rather than skipping a potential match.
525 for match != beforefirst {
526 if b.positive[match] == 0 {
527 b.positive[match] = bump
528 }
529
530 match -= bump
531 }
532
533 // PART II - the bad-character shift table
534 //
535 // compute the negative requirement:
536 // if char "ch" is the reject character when testing position "i",
537 // we can slide up by _negative[ch];
538 // (_negative[ch] = str.Length - 1 - str.LastIndexOf(ch))
539 //
540 // the lookup table is divided into ASCII and Unicode portions;
541 // only those parts of the Unicode 16-bit code set that actually
542 // appear in the string are in the table. (Maximum size with
543 // Unicode is 65K; ASCII only case is 512 bytes.)
544
545 b.negativeASCII = make([]int, 128)
546
547 for i := 0; i < len(b.negativeASCII); i++ {
548 b.negativeASCII[i] = last - beforefirst
549 }
550
551 b.lowASCII = 127
552 b.highASCII = 0
553
554 for examine = last; examine != beforefirst; examine -= bump {
555 ch = b.pattern[examine]
556
557 switch {
558 case ch < 128:
559 if b.lowASCII > ch {
560 b.lowASCII = ch
561 }
562
563 if b.highASCII < ch {
564 b.highASCII = ch
565 }
566
567 if b.negativeASCII[ch] == last-beforefirst {
568 b.negativeASCII[ch] = last - examine
569 }
570 case ch <= 0xffff:
571 i, j := ch>>8, ch&0xFF
572
573 if b.negativeUnicode == nil {
574 b.negativeUnicode = make([][]int, 256)
575 }
576
577 if b.negativeUnicode[i] == nil {
578 newarray := make([]int, 256)
579
580 for k := 0; k < len(newarray); k++ {
581 newarray[k] = last - beforefirst
582 }
583
584 if i == 0 {
585 copy(newarray, b.negativeASCII)
586 //TODO: this line needed?
587 b.negativeASCII = newarray
588 }
589
590 b.negativeUnicode[i] = newarray
591 }
592
593 if b.negativeUnicode[i][j] == last-beforefirst {
594 b.negativeUnicode[i][j] = last - examine
595 }
596 default:
597 // we can't do the filter because this algo doesn't support
598 // unicode chars >0xffff
599 return nil
600 }
601 }
602
603 return b
604}
605
606func (b *BmPrefix) String() string {
607 return string(b.pattern)
608}
609
610// Dump returns the contents of the filter as a human readable string
611func (b *BmPrefix) Dump(indent string) string {
612 buf := &bytes.Buffer{}
613
614 fmt.Fprintf(buf, "%sBM Pattern: %s\n%sPositive: ", indent, string(b.pattern), indent)
615 for i := 0; i < len(b.positive); i++ {
616 buf.WriteString(strconv.Itoa(b.positive[i]))
617 buf.WriteRune(' ')
618 }
619 buf.WriteRune('\n')
620
621 if b.negativeASCII != nil {
622 buf.WriteString(indent)
623 buf.WriteString("Negative table\n")
624 for i := 0; i < len(b.negativeASCII); i++ {
625 if b.negativeASCII[i] != len(b.pattern) {
626 fmt.Fprintf(buf, "%s %s %s\n", indent, Escape(string(rune(i))), strconv.Itoa(b.negativeASCII[i]))
627 }
628 }
629 }
630
631 return buf.String()
632}
633
634// Scan uses the Boyer-Moore algorithm to find the first occurrence
635// of the specified string within text, beginning at index, and
636// constrained within beglimit and endlimit.
637//
638// The direction and case-sensitivity of the match is determined
639// by the arguments to the RegexBoyerMoore constructor.
640func (b *BmPrefix) Scan(text []rune, index, beglimit, endlimit int) int {
641 var (
642 defadv, test, test2 int
643 match, startmatch, endmatch int
644 bump, advance int
645 chTest rune
646 unicodeLookup []int
647 )
648
649 if !b.rightToLeft {
650 defadv = len(b.pattern)
651 startmatch = len(b.pattern) - 1
652 endmatch = 0
653 test = index + defadv - 1
654 bump = 1
655 } else {
656 defadv = -len(b.pattern)
657 startmatch = 0
658 endmatch = -defadv - 1
659 test = index + defadv
660 bump = -1
661 }
662
663 chMatch := b.pattern[startmatch]
664
665 for {
666 if test >= endlimit || test < beglimit {
667 return -1
668 }
669
670 chTest = text[test]
671
672 if b.caseInsensitive {
673 chTest = unicode.ToLower(chTest)
674 }
675
676 if chTest != chMatch {
677 if chTest < 128 {
678 advance = b.negativeASCII[chTest]
679 } else if chTest < 0xffff && len(b.negativeUnicode) > 0 {
680 unicodeLookup = b.negativeUnicode[chTest>>8]
681 if len(unicodeLookup) > 0 {
682 advance = unicodeLookup[chTest&0xFF]
683 } else {
684 advance = defadv
685 }
686 } else {
687 advance = defadv
688 }
689
690 test += advance
691 } else { // if (chTest == chMatch)
692 test2 = test
693 match = startmatch
694
695 for {
696 if match == endmatch {
697 if b.rightToLeft {
698 return test2 + 1
699 } else {
700 return test2
701 }
702 }
703
704 match -= bump
705 test2 -= bump
706
707 chTest = text[test2]
708
709 if b.caseInsensitive {
710 chTest = unicode.ToLower(chTest)
711 }
712
713 if chTest != b.pattern[match] {
714 advance = b.positive[match]
715 if chTest < 128 {
716 test2 = (match - startmatch) + b.negativeASCII[chTest]
717 } else if chTest < 0xffff && len(b.negativeUnicode) > 0 {
718 unicodeLookup = b.negativeUnicode[chTest>>8]
719 if len(unicodeLookup) > 0 {
720 test2 = (match - startmatch) + unicodeLookup[chTest&0xFF]
721 } else {
722 test += advance
723 break
724 }
725 } else {
726 test += advance
727 break
728 }
729
730 if b.rightToLeft {
731 if test2 < advance {
732 advance = test2
733 }
734 } else if test2 > advance {
735 advance = test2
736 }
737
738 test += advance
739 break
740 }
741 }
742 }
743 }
744}
745
746// When a regex is anchored, we can do a quick IsMatch test instead of a Scan
747func (b *BmPrefix) IsMatch(text []rune, index, beglimit, endlimit int) bool {
748 if !b.rightToLeft {
749 if index < beglimit || endlimit-index < len(b.pattern) {
750 return false
751 }
752
753 return b.matchPattern(text, index)
754 } else {
755 if index > endlimit || index-beglimit < len(b.pattern) {
756 return false
757 }
758
759 return b.matchPattern(text, index-len(b.pattern))
760 }
761}
762
763func (b *BmPrefix) matchPattern(text []rune, index int) bool {
764 if len(text)-index < len(b.pattern) {
765 return false
766 }
767
768 if b.caseInsensitive {
769 for i := 0; i < len(b.pattern); i++ {
770 //Debug.Assert(textinfo.ToLower(_pattern[i]) == _pattern[i], "pattern should be converted to lower case in constructor!");
771 if unicode.ToLower(text[index+i]) != b.pattern[i] {
772 return false
773 }
774 }
775 return true
776 } else {
777 for i := 0; i < len(b.pattern); i++ {
778 if text[index+i] != b.pattern[i] {
779 return false
780 }
781 }
782 return true
783 }
784}
785
786type AnchorLoc int16
787
788// where the regex can be pegged
789const (
790 AnchorBeginning AnchorLoc = 0x0001
791 AnchorBol = 0x0002
792 AnchorStart = 0x0004
793 AnchorEol = 0x0008
794 AnchorEndZ = 0x0010
795 AnchorEnd = 0x0020
796 AnchorBoundary = 0x0040
797 AnchorECMABoundary = 0x0080
798)
799
800func getAnchors(tree *RegexTree) AnchorLoc {
801
802 var concatNode *regexNode
803 nextChild, result := 0, AnchorLoc(0)
804
805 curNode := tree.root
806
807 for {
808 switch curNode.t {
809 case ntConcatenate:
810 if len(curNode.children) > 0 {
811 concatNode = curNode
812 nextChild = 0
813 }
814
815 case ntGreedy, ntCapture:
816 curNode = curNode.children[0]
817 concatNode = nil
818 continue
819
820 case ntBol, ntEol, ntBoundary, ntECMABoundary, ntBeginning,
821 ntStart, ntEndZ, ntEnd:
822 return result | anchorFromType(curNode.t)
823
824 case ntEmpty, ntRequire, ntPrevent:
825
826 default:
827 return result
828 }
829
830 if concatNode == nil || nextChild >= len(concatNode.children) {
831 return result
832 }
833
834 curNode = concatNode.children[nextChild]
835 nextChild++
836 }
837}
838
839func anchorFromType(t nodeType) AnchorLoc {
840 switch t {
841 case ntBol:
842 return AnchorBol
843 case ntEol:
844 return AnchorEol
845 case ntBoundary:
846 return AnchorBoundary
847 case ntECMABoundary:
848 return AnchorECMABoundary
849 case ntBeginning:
850 return AnchorBeginning
851 case ntStart:
852 return AnchorStart
853 case ntEndZ:
854 return AnchorEndZ
855 case ntEnd:
856 return AnchorEnd
857 default:
858 return 0
859 }
860}
861
862// anchorDescription returns a human-readable description of the anchors
863func (anchors AnchorLoc) String() string {
864 buf := &bytes.Buffer{}
865
866 if 0 != (anchors & AnchorBeginning) {
867 buf.WriteString(", Beginning")
868 }
869 if 0 != (anchors & AnchorStart) {
870 buf.WriteString(", Start")
871 }
872 if 0 != (anchors & AnchorBol) {
873 buf.WriteString(", Bol")
874 }
875 if 0 != (anchors & AnchorBoundary) {
876 buf.WriteString(", Boundary")
877 }
878 if 0 != (anchors & AnchorECMABoundary) {
879 buf.WriteString(", ECMABoundary")
880 }
881 if 0 != (anchors & AnchorEol) {
882 buf.WriteString(", Eol")
883 }
884 if 0 != (anchors & AnchorEnd) {
885 buf.WriteString(", End")
886 }
887 if 0 != (anchors & AnchorEndZ) {
888 buf.WriteString(", EndZ")
889 }
890
891 // trim off comma
892 if buf.Len() >= 2 {
893 return buf.String()[2:]
894 }
895 return "None"
896}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/replacerdata.go b/vendor/github.com/dlclark/regexp2/syntax/replacerdata.go
new file mode 100644
index 0000000..bcf4d3f
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/replacerdata.go
@@ -0,0 +1,87 @@
1package syntax
2
3import (
4 "bytes"
5 "errors"
6)
7
8type ReplacerData struct {
9 Rep string
10 Strings []string
11 Rules []int
12}
13
14const (
15 replaceSpecials = 4
16 replaceLeftPortion = -1
17 replaceRightPortion = -2
18 replaceLastGroup = -3
19 replaceWholeString = -4
20)
21
22//ErrReplacementError is a general error during parsing the replacement text
23var ErrReplacementError = errors.New("Replacement pattern error.")
24
25// NewReplacerData will populate a reusable replacer data struct based on the given replacement string
26// and the capture group data from a regexp
27func NewReplacerData(rep string, caps map[int]int, capsize int, capnames map[string]int, op RegexOptions) (*ReplacerData, error) {
28 p := parser{
29 options: op,
30 caps: caps,
31 capsize: capsize,
32 capnames: capnames,
33 }
34 p.setPattern(rep)
35 concat, err := p.scanReplacement()
36 if err != nil {
37 return nil, err
38 }
39
40 if concat.t != ntConcatenate {
41 panic(ErrReplacementError)
42 }
43
44 sb := &bytes.Buffer{}
45 var (
46 strings []string
47 rules []int
48 )
49
50 for _, child := range concat.children {
51 switch child.t {
52 case ntMulti:
53 child.writeStrToBuf(sb)
54
55 case ntOne:
56 sb.WriteRune(child.ch)
57
58 case ntRef:
59 if sb.Len() > 0 {
60 rules = append(rules, len(strings))
61 strings = append(strings, sb.String())
62 sb.Reset()
63 }
64 slot := child.m
65
66 if len(caps) > 0 && slot >= 0 {
67 slot = caps[slot]
68 }
69
70 rules = append(rules, -replaceSpecials-1-slot)
71
72 default:
73 panic(ErrReplacementError)
74 }
75 }
76
77 if sb.Len() > 0 {
78 rules = append(rules, len(strings))
79 strings = append(strings, sb.String())
80 }
81
82 return &ReplacerData{
83 Rep: rep,
84 Strings: strings,
85 Rules: rules,
86 }, nil
87}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/tree.go b/vendor/github.com/dlclark/regexp2/syntax/tree.go
new file mode 100644
index 0000000..ea28829
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/tree.go
@@ -0,0 +1,654 @@
1package syntax
2
3import (
4 "bytes"
5 "fmt"
6 "math"
7 "strconv"
8)
9
10type RegexTree struct {
11 root *regexNode
12 caps map[int]int
13 capnumlist []int
14 captop int
15 Capnames map[string]int
16 Caplist []string
17 options RegexOptions
18}
19
20// It is built into a parsed tree for a regular expression.
21
22// Implementation notes:
23//
24// Since the node tree is a temporary data structure only used
25// during compilation of the regexp to integer codes, it's
26// designed for clarity and convenience rather than
27// space efficiency.
28//
29// RegexNodes are built into a tree, linked by the n.children list.
30// Each node also has a n.parent and n.ichild member indicating
31// its parent and which child # it is in its parent's list.
32//
33// RegexNodes come in as many types as there are constructs in
34// a regular expression, for example, "concatenate", "alternate",
35// "one", "rept", "group". There are also node types for basic
36// peephole optimizations, e.g., "onerep", "notsetrep", etc.
37//
38// Because perl 5 allows "lookback" groups that scan backwards,
39// each node also gets a "direction". Normally the value of
40// boolean n.backward = false.
41//
42// During parsing, top-level nodes are also stacked onto a parse
43// stack (a stack of trees). For this purpose we have a n.next
44// pointer. [Note that to save a few bytes, we could overload the
45// n.parent pointer instead.]
46//
47// On the parse stack, each tree has a "role" - basically, the
48// nonterminal in the grammar that the parser has currently
49// assigned to the tree. That code is stored in n.role.
50//
51// Finally, some of the different kinds of nodes have data.
52// Two integers (for the looping constructs) are stored in
53// n.operands, an an object (either a string or a set)
54// is stored in n.data
55type regexNode struct {
56 t nodeType
57 children []*regexNode
58 str []rune
59 set *CharSet
60 ch rune
61 m int
62 n int
63 options RegexOptions
64 next *regexNode
65}
66
67type nodeType int32
68
69const (
70 // The following are leaves, and correspond to primitive operations
71
72 ntOnerep nodeType = 0 // lef,back char,min,max a {n}
73 ntNotonerep = 1 // lef,back char,min,max .{n}
74 ntSetrep = 2 // lef,back set,min,max [\d]{n}
75 ntOneloop = 3 // lef,back char,min,max a {,n}
76 ntNotoneloop = 4 // lef,back char,min,max .{,n}
77 ntSetloop = 5 // lef,back set,min,max [\d]{,n}
78 ntOnelazy = 6 // lef,back char,min,max a {,n}?
79 ntNotonelazy = 7 // lef,back char,min,max .{,n}?
80 ntSetlazy = 8 // lef,back set,min,max [\d]{,n}?
81 ntOne = 9 // lef char a
82 ntNotone = 10 // lef char [^a]
83 ntSet = 11 // lef set [a-z\s] \w \s \d
84 ntMulti = 12 // lef string abcd
85 ntRef = 13 // lef group \#
86 ntBol = 14 // ^
87 ntEol = 15 // $
88 ntBoundary = 16 // \b
89 ntNonboundary = 17 // \B
90 ntBeginning = 18 // \A
91 ntStart = 19 // \G
92 ntEndZ = 20 // \Z
93 ntEnd = 21 // \Z
94
95 // Interior nodes do not correspond to primitive operations, but
96 // control structures compositing other operations
97
98 // Concat and alternate take n children, and can run forward or backwards
99
100 ntNothing = 22 // []
101 ntEmpty = 23 // ()
102 ntAlternate = 24 // a|b
103 ntConcatenate = 25 // ab
104 ntLoop = 26 // m,x * + ? {,}
105 ntLazyloop = 27 // m,x *? +? ?? {,}?
106 ntCapture = 28 // n ()
107 ntGroup = 29 // (?:)
108 ntRequire = 30 // (?=) (?<=)
109 ntPrevent = 31 // (?!) (?<!)
110 ntGreedy = 32 // (?>) (?<)
111 ntTestref = 33 // (?(n) | )
112 ntTestgroup = 34 // (?(...) | )
113
114 ntECMABoundary = 41 // \b
115 ntNonECMABoundary = 42 // \B
116)
117
118func newRegexNode(t nodeType, opt RegexOptions) *regexNode {
119 return &regexNode{
120 t: t,
121 options: opt,
122 }
123}
124
125func newRegexNodeCh(t nodeType, opt RegexOptions, ch rune) *regexNode {
126 return &regexNode{
127 t: t,
128 options: opt,
129 ch: ch,
130 }
131}
132
133func newRegexNodeStr(t nodeType, opt RegexOptions, str []rune) *regexNode {
134 return &regexNode{
135 t: t,
136 options: opt,
137 str: str,
138 }
139}
140
141func newRegexNodeSet(t nodeType, opt RegexOptions, set *CharSet) *regexNode {
142 return &regexNode{
143 t: t,
144 options: opt,
145 set: set,
146 }
147}
148
149func newRegexNodeM(t nodeType, opt RegexOptions, m int) *regexNode {
150 return &regexNode{
151 t: t,
152 options: opt,
153 m: m,
154 }
155}
156func newRegexNodeMN(t nodeType, opt RegexOptions, m, n int) *regexNode {
157 return &regexNode{
158 t: t,
159 options: opt,
160 m: m,
161 n: n,
162 }
163}
164
165func (n *regexNode) writeStrToBuf(buf *bytes.Buffer) {
166 for i := 0; i < len(n.str); i++ {
167 buf.WriteRune(n.str[i])
168 }
169}
170
171func (n *regexNode) addChild(child *regexNode) {
172 reduced := child.reduce()
173 n.children = append(n.children, reduced)
174 reduced.next = n
175}
176
177func (n *regexNode) insertChildren(afterIndex int, nodes []*regexNode) {
178 newChildren := make([]*regexNode, 0, len(n.children)+len(nodes))
179 n.children = append(append(append(newChildren, n.children[:afterIndex]...), nodes...), n.children[afterIndex:]...)
180}
181
182// removes children including the start but not the end index
183func (n *regexNode) removeChildren(startIndex, endIndex int) {
184 n.children = append(n.children[:startIndex], n.children[endIndex:]...)
185}
186
187// Pass type as OneLazy or OneLoop
188func (n *regexNode) makeRep(t nodeType, min, max int) {
189 n.t += (t - ntOne)
190 n.m = min
191 n.n = max
192}
193
194func (n *regexNode) reduce() *regexNode {
195 switch n.t {
196 case ntAlternate:
197 return n.reduceAlternation()
198
199 case ntConcatenate:
200 return n.reduceConcatenation()
201
202 case ntLoop, ntLazyloop:
203 return n.reduceRep()
204
205 case ntGroup:
206 return n.reduceGroup()
207
208 case ntSet, ntSetloop:
209 return n.reduceSet()
210
211 default:
212 return n
213 }
214}
215
216// Basic optimization. Single-letter alternations can be replaced
217// by faster set specifications, and nested alternations with no
218// intervening operators can be flattened:
219//
220// a|b|c|def|g|h -> [a-c]|def|[gh]
221// apple|(?:orange|pear)|grape -> apple|orange|pear|grape
222func (n *regexNode) reduceAlternation() *regexNode {
223 if len(n.children) == 0 {
224 return newRegexNode(ntNothing, n.options)
225 }
226
227 wasLastSet := false
228 lastNodeCannotMerge := false
229 var optionsLast RegexOptions
230 var i, j int
231
232 for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
233 at := n.children[i]
234
235 if j < i {
236 n.children[j] = at
237 }
238
239 for {
240 if at.t == ntAlternate {
241 for k := 0; k < len(at.children); k++ {
242 at.children[k].next = n
243 }
244 n.insertChildren(i+1, at.children)
245
246 j--
247 } else if at.t == ntSet || at.t == ntOne {
248 // Cannot merge sets if L or I options differ, or if either are negated.
249 optionsAt := at.options & (RightToLeft | IgnoreCase)
250
251 if at.t == ntSet {
252 if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !at.set.IsMergeable() {
253 wasLastSet = true
254 lastNodeCannotMerge = !at.set.IsMergeable()
255 optionsLast = optionsAt
256 break
257 }
258 } else if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge {
259 wasLastSet = true
260 lastNodeCannotMerge = false
261 optionsLast = optionsAt
262 break
263 }
264
265 // The last node was a Set or a One, we're a Set or One and our options are the same.
266 // Merge the two nodes.
267 j--
268 prev := n.children[j]
269
270 var prevCharClass *CharSet
271 if prev.t == ntOne {
272 prevCharClass = &CharSet{}
273 prevCharClass.addChar(prev.ch)
274 } else {
275 prevCharClass = prev.set
276 }
277
278 if at.t == ntOne {
279 prevCharClass.addChar(at.ch)
280 } else {
281 prevCharClass.addSet(*at.set)
282 }
283
284 prev.t = ntSet
285 prev.set = prevCharClass
286 } else if at.t == ntNothing {
287 j--
288 } else {
289 wasLastSet = false
290 lastNodeCannotMerge = false
291 }
292 break
293 }
294 }
295
296 if j < i {
297 n.removeChildren(j, i)
298 }
299
300 return n.stripEnation(ntNothing)
301}
302
303// Basic optimization. Adjacent strings can be concatenated.
304//
305// (?:abc)(?:def) -> abcdef
306func (n *regexNode) reduceConcatenation() *regexNode {
307 // Eliminate empties and concat adjacent strings/chars
308
309 var optionsLast RegexOptions
310 var optionsAt RegexOptions
311 var i, j int
312
313 if len(n.children) == 0 {
314 return newRegexNode(ntEmpty, n.options)
315 }
316
317 wasLastString := false
318
319 for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
320 var at, prev *regexNode
321
322 at = n.children[i]
323
324 if j < i {
325 n.children[j] = at
326 }
327
328 if at.t == ntConcatenate &&
329 ((at.options & RightToLeft) == (n.options & RightToLeft)) {
330 for k := 0; k < len(at.children); k++ {
331 at.children[k].next = n
332 }
333
334 //insert at.children at i+1 index in n.children
335 n.insertChildren(i+1, at.children)
336
337 j--
338 } else if at.t == ntMulti || at.t == ntOne {
339 // Cannot merge strings if L or I options differ
340 optionsAt = at.options & (RightToLeft | IgnoreCase)
341
342 if !wasLastString || optionsLast != optionsAt {
343 wasLastString = true
344 optionsLast = optionsAt
345 continue
346 }
347
348 j--
349 prev = n.children[j]
350
351 if prev.t == ntOne {
352 prev.t = ntMulti
353 prev.str = []rune{prev.ch}
354 }
355
356 if (optionsAt & RightToLeft) == 0 {
357 if at.t == ntOne {
358 prev.str = append(prev.str, at.ch)
359 } else {
360 prev.str = append(prev.str, at.str...)
361 }
362 } else {
363 if at.t == ntOne {
364 // insert at the front by expanding our slice, copying the data over, and then setting the value
365 prev.str = append(prev.str, 0)
366 copy(prev.str[1:], prev.str)
367 prev.str[0] = at.ch
368 } else {
369 //insert at the front...this one we'll make a new slice and copy both into it
370 merge := make([]rune, len(prev.str)+len(at.str))
371 copy(merge, at.str)
372 copy(merge[len(at.str):], prev.str)
373 prev.str = merge
374 }
375 }
376 } else if at.t == ntEmpty {
377 j--
378 } else {
379 wasLastString = false
380 }
381 }
382
383 if j < i {
384 // remove indices j through i from the children
385 n.removeChildren(j, i)
386 }
387
388 return n.stripEnation(ntEmpty)
389}
390
391// Nested repeaters just get multiplied with each other if they're not
392// too lumpy
393func (n *regexNode) reduceRep() *regexNode {
394
395 u := n
396 t := n.t
397 min := n.m
398 max := n.n
399
400 for {
401 if len(u.children) == 0 {
402 break
403 }
404
405 child := u.children[0]
406
407 // multiply reps of the same type only
408 if child.t != t {
409 childType := child.t
410
411 if !(childType >= ntOneloop && childType <= ntSetloop && t == ntLoop ||
412 childType >= ntOnelazy && childType <= ntSetlazy && t == ntLazyloop) {
413 break
414 }
415 }
416
417 // child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})?
418 // [but things like (a {2,})+ are not too lumpy...]
419 if u.m == 0 && child.m > 1 || child.n < child.m*2 {
420 break
421 }
422
423 u = child
424 if u.m > 0 {
425 if (math.MaxInt32-1)/u.m < min {
426 u.m = math.MaxInt32
427 } else {
428 u.m = u.m * min
429 }
430 }
431 if u.n > 0 {
432 if (math.MaxInt32-1)/u.n < max {
433 u.n = math.MaxInt32
434 } else {
435 u.n = u.n * max
436 }
437 }
438 }
439
440 if math.MaxInt32 == min {
441 return newRegexNode(ntNothing, n.options)
442 }
443 return u
444
445}
446
447// Simple optimization. If a concatenation or alternation has only
448// one child strip out the intermediate node. If it has zero children,
449// turn it into an empty.
450func (n *regexNode) stripEnation(emptyType nodeType) *regexNode {
451 switch len(n.children) {
452 case 0:
453 return newRegexNode(emptyType, n.options)
454 case 1:
455 return n.children[0]
456 default:
457 return n
458 }
459}
460
461func (n *regexNode) reduceGroup() *regexNode {
462 u := n
463
464 for u.t == ntGroup {
465 u = u.children[0]
466 }
467
468 return u
469}
470
471// Simple optimization. If a set is a singleton, an inverse singleton,
472// or empty, it's transformed accordingly.
473func (n *regexNode) reduceSet() *regexNode {
474 // Extract empty-set, one and not-one case as special
475
476 if n.set == nil {
477 n.t = ntNothing
478 } else if n.set.IsSingleton() {
479 n.ch = n.set.SingletonChar()
480 n.set = nil
481 n.t += (ntOne - ntSet)
482 } else if n.set.IsSingletonInverse() {
483 n.ch = n.set.SingletonChar()
484 n.set = nil
485 n.t += (ntNotone - ntSet)
486 }
487
488 return n
489}
490
491func (n *regexNode) reverseLeft() *regexNode {
492 if n.options&RightToLeft != 0 && n.t == ntConcatenate && len(n.children) > 0 {
493 //reverse children order
494 for left, right := 0, len(n.children)-1; left < right; left, right = left+1, right-1 {
495 n.children[left], n.children[right] = n.children[right], n.children[left]
496 }
497 }
498
499 return n
500}
501
502func (n *regexNode) makeQuantifier(lazy bool, min, max int) *regexNode {
503 if min == 0 && max == 0 {
504 return newRegexNode(ntEmpty, n.options)
505 }
506
507 if min == 1 && max == 1 {
508 return n
509 }
510
511 switch n.t {
512 case ntOne, ntNotone, ntSet:
513 if lazy {
514 n.makeRep(Onelazy, min, max)
515 } else {
516 n.makeRep(Oneloop, min, max)
517 }
518 return n
519
520 default:
521 var t nodeType
522 if lazy {
523 t = ntLazyloop
524 } else {
525 t = ntLoop
526 }
527 result := newRegexNodeMN(t, n.options, min, max)
528 result.addChild(n)
529 return result
530 }
531}
532
533// debug functions
534
535var typeStr = []string{
536 "Onerep", "Notonerep", "Setrep",
537 "Oneloop", "Notoneloop", "Setloop",
538 "Onelazy", "Notonelazy", "Setlazy",
539 "One", "Notone", "Set",
540 "Multi", "Ref",
541 "Bol", "Eol", "Boundary", "Nonboundary",
542 "Beginning", "Start", "EndZ", "End",
543 "Nothing", "Empty",
544 "Alternate", "Concatenate",
545 "Loop", "Lazyloop",
546 "Capture", "Group", "Require", "Prevent", "Greedy",
547 "Testref", "Testgroup",
548 "Unknown", "Unknown", "Unknown",
549 "Unknown", "Unknown", "Unknown",
550 "ECMABoundary", "NonECMABoundary",
551}
552
553func (n *regexNode) description() string {
554 buf := &bytes.Buffer{}
555
556 buf.WriteString(typeStr[n.t])
557
558 if (n.options & ExplicitCapture) != 0 {
559 buf.WriteString("-C")
560 }
561 if (n.options & IgnoreCase) != 0 {
562 buf.WriteString("-I")
563 }
564 if (n.options & RightToLeft) != 0 {
565 buf.WriteString("-L")
566 }
567 if (n.options & Multiline) != 0 {
568 buf.WriteString("-M")
569 }
570 if (n.options & Singleline) != 0 {
571 buf.WriteString("-S")
572 }
573 if (n.options & IgnorePatternWhitespace) != 0 {
574 buf.WriteString("-X")
575 }
576 if (n.options & ECMAScript) != 0 {
577 buf.WriteString("-E")
578 }
579
580 switch n.t {
581 case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntOne, ntNotone:
582 buf.WriteString("(Ch = " + CharDescription(n.ch) + ")")
583 break
584 case ntCapture:
585 buf.WriteString("(index = " + strconv.Itoa(n.m) + ", unindex = " + strconv.Itoa(n.n) + ")")
586 break
587 case ntRef, ntTestref:
588 buf.WriteString("(index = " + strconv.Itoa(n.m) + ")")
589 break
590 case ntMulti:
591 fmt.Fprintf(buf, "(String = %s)", string(n.str))
592 break
593 case ntSet, ntSetloop, ntSetlazy:
594 buf.WriteString("(Set = " + n.set.String() + ")")
595 break
596 }
597
598 switch n.t {
599 case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntSetloop, ntSetlazy, ntLoop, ntLazyloop:
600 buf.WriteString("(Min = ")
601 buf.WriteString(strconv.Itoa(n.m))
602 buf.WriteString(", Max = ")
603 if n.n == math.MaxInt32 {
604 buf.WriteString("inf")
605 } else {
606 buf.WriteString(strconv.Itoa(n.n))
607 }
608 buf.WriteString(")")
609
610 break
611 }
612
613 return buf.String()
614}
615
616var padSpace = []byte(" ")
617
618func (t *RegexTree) Dump() string {
619 return t.root.dump()
620}
621
622func (n *regexNode) dump() string {
623 var stack []int
624 CurNode := n
625 CurChild := 0
626
627 buf := bytes.NewBufferString(CurNode.description())
628 buf.WriteRune('\n')
629
630 for {
631 if CurNode.children != nil && CurChild < len(CurNode.children) {
632 stack = append(stack, CurChild+1)
633 CurNode = CurNode.children[CurChild]
634 CurChild = 0
635
636 Depth := len(stack)
637 if Depth > 32 {
638 Depth = 32
639 }
640 buf.Write(padSpace[:Depth])
641 buf.WriteString(CurNode.description())
642 buf.WriteRune('\n')
643 } else {
644 if len(stack) == 0 {
645 break
646 }
647
648 CurChild = stack[len(stack)-1]
649 stack = stack[:len(stack)-1]
650 CurNode = CurNode.next
651 }
652 }
653 return buf.String()
654}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/writer.go b/vendor/github.com/dlclark/regexp2/syntax/writer.go
new file mode 100644
index 0000000..a5aa11c
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/writer.go
@@ -0,0 +1,500 @@
1package syntax
2
3import (
4 "bytes"
5 "fmt"
6 "math"
7 "os"
8)
9
10func Write(tree *RegexTree) (*Code, error) {
11 w := writer{
12 intStack: make([]int, 0, 32),
13 emitted: make([]int, 2),
14 stringhash: make(map[string]int),
15 sethash: make(map[string]int),
16 }
17
18 code, err := w.codeFromTree(tree)
19
20 if tree.options&Debug > 0 && code != nil {
21 os.Stdout.WriteString(code.Dump())
22 os.Stdout.WriteString("\n")
23 }
24
25 return code, err
26}
27
28type writer struct {
29 emitted []int
30
31 intStack []int
32 curpos int
33 stringhash map[string]int
34 stringtable [][]rune
35 sethash map[string]int
36 settable []*CharSet
37 counting bool
38 count int
39 trackcount int
40 caps map[int]int
41}
42
43const (
44 beforeChild nodeType = 64
45 afterChild = 128
46 //MaxPrefixSize is the largest number of runes we'll use for a BoyerMoyer prefix
47 MaxPrefixSize = 50
48)
49
50// The top level RegexCode generator. It does a depth-first walk
51// through the tree and calls EmitFragment to emits code before
52// and after each child of an interior node, and at each leaf.
53//
54// It runs two passes, first to count the size of the generated
55// code, and second to generate the code.
56//
57// We should time it against the alternative, which is
58// to just generate the code and grow the array as we go.
59func (w *writer) codeFromTree(tree *RegexTree) (*Code, error) {
60 var (
61 curNode *regexNode
62 curChild int
63 capsize int
64 )
65 // construct sparse capnum mapping if some numbers are unused
66
67 if tree.capnumlist == nil || tree.captop == len(tree.capnumlist) {
68 capsize = tree.captop
69 w.caps = nil
70 } else {
71 capsize = len(tree.capnumlist)
72 w.caps = tree.caps
73 for i := 0; i < len(tree.capnumlist); i++ {
74 w.caps[tree.capnumlist[i]] = i
75 }
76 }
77
78 w.counting = true
79
80 for {
81 if !w.counting {
82 w.emitted = make([]int, w.count)
83 }
84
85 curNode = tree.root
86 curChild = 0
87
88 w.emit1(Lazybranch, 0)
89
90 for {
91 if len(curNode.children) == 0 {
92 w.emitFragment(curNode.t, curNode, 0)
93 } else if curChild < len(curNode.children) {
94 w.emitFragment(curNode.t|beforeChild, curNode, curChild)
95
96 curNode = curNode.children[curChild]
97
98 w.pushInt(curChild)
99 curChild = 0
100 continue
101 }
102
103 if w.emptyStack() {
104 break
105 }
106
107 curChild = w.popInt()
108 curNode = curNode.next
109
110 w.emitFragment(curNode.t|afterChild, curNode, curChild)
111 curChild++
112 }
113
114 w.patchJump(0, w.curPos())
115 w.emit(Stop)
116
117 if !w.counting {
118 break
119 }
120
121 w.counting = false
122 }
123
124 fcPrefix := getFirstCharsPrefix(tree)
125 prefix := getPrefix(tree)
126 rtl := (tree.options & RightToLeft) != 0
127
128 var bmPrefix *BmPrefix
129 //TODO: benchmark string prefixes
130 if prefix != nil && len(prefix.PrefixStr) > 0 && MaxPrefixSize > 0 {
131 if len(prefix.PrefixStr) > MaxPrefixSize {
132 // limit prefix changes to 10k
133 prefix.PrefixStr = prefix.PrefixStr[:MaxPrefixSize]
134 }
135 bmPrefix = newBmPrefix(prefix.PrefixStr, prefix.CaseInsensitive, rtl)
136 } else {
137 bmPrefix = nil
138 }
139
140 return &Code{
141 Codes: w.emitted,
142 Strings: w.stringtable,
143 Sets: w.settable,
144 TrackCount: w.trackcount,
145 Caps: w.caps,
146 Capsize: capsize,
147 FcPrefix: fcPrefix,
148 BmPrefix: bmPrefix,
149 Anchors: getAnchors(tree),
150 RightToLeft: rtl,
151 }, nil
152}
153
154// The main RegexCode generator. It does a depth-first walk
155// through the tree and calls EmitFragment to emits code before
156// and after each child of an interior node, and at each leaf.
157func (w *writer) emitFragment(nodetype nodeType, node *regexNode, curIndex int) error {
158 bits := InstOp(0)
159
160 if nodetype <= ntRef {
161 if (node.options & RightToLeft) != 0 {
162 bits |= Rtl
163 }
164 if (node.options & IgnoreCase) != 0 {
165 bits |= Ci
166 }
167 }
168 ntBits := nodeType(bits)
169
170 switch nodetype {
171 case ntConcatenate | beforeChild, ntConcatenate | afterChild, ntEmpty:
172 break
173
174 case ntAlternate | beforeChild:
175 if curIndex < len(node.children)-1 {
176 w.pushInt(w.curPos())
177 w.emit1(Lazybranch, 0)
178 }
179
180 case ntAlternate | afterChild:
181 if curIndex < len(node.children)-1 {
182 lbPos := w.popInt()
183 w.pushInt(w.curPos())
184 w.emit1(Goto, 0)
185 w.patchJump(lbPos, w.curPos())
186 } else {
187 for i := 0; i < curIndex; i++ {
188 w.patchJump(w.popInt(), w.curPos())
189 }
190 }
191 break
192
193 case ntTestref | beforeChild:
194 if curIndex == 0 {
195 w.emit(Setjump)
196 w.pushInt(w.curPos())
197 w.emit1(Lazybranch, 0)
198 w.emit1(Testref, w.mapCapnum(node.m))
199 w.emit(Forejump)
200 }
201
202 case ntTestref | afterChild:
203 if curIndex == 0 {
204 branchpos := w.popInt()
205 w.pushInt(w.curPos())
206 w.emit1(Goto, 0)
207 w.patchJump(branchpos, w.curPos())
208 w.emit(Forejump)
209 if len(node.children) <= 1 {
210 w.patchJump(w.popInt(), w.curPos())
211 }
212 } else if curIndex == 1 {
213 w.patchJump(w.popInt(), w.curPos())
214 }
215
216 case ntTestgroup | beforeChild:
217 if curIndex == 0 {
218 w.emit(Setjump)
219 w.emit(Setmark)
220 w.pushInt(w.curPos())
221 w.emit1(Lazybranch, 0)
222 }
223
224 case ntTestgroup | afterChild:
225 if curIndex == 0 {
226 w.emit(Getmark)
227 w.emit(Forejump)
228 } else if curIndex == 1 {
229 Branchpos := w.popInt()
230 w.pushInt(w.curPos())
231 w.emit1(Goto, 0)
232 w.patchJump(Branchpos, w.curPos())
233 w.emit(Getmark)
234 w.emit(Forejump)
235 if len(node.children) <= 2 {
236 w.patchJump(w.popInt(), w.curPos())
237 }
238 } else if curIndex == 2 {
239 w.patchJump(w.popInt(), w.curPos())
240 }
241
242 case ntLoop | beforeChild, ntLazyloop | beforeChild:
243
244 if node.n < math.MaxInt32 || node.m > 1 {
245 if node.m == 0 {
246 w.emit1(Nullcount, 0)
247 } else {
248 w.emit1(Setcount, 1-node.m)
249 }
250 } else if node.m == 0 {
251 w.emit(Nullmark)
252 } else {
253 w.emit(Setmark)
254 }
255
256 if node.m == 0 {
257 w.pushInt(w.curPos())
258 w.emit1(Goto, 0)
259 }
260 w.pushInt(w.curPos())
261
262 case ntLoop | afterChild, ntLazyloop | afterChild:
263
264 startJumpPos := w.curPos()
265 lazy := (nodetype - (ntLoop | afterChild))
266
267 if node.n < math.MaxInt32 || node.m > 1 {
268 if node.n == math.MaxInt32 {
269 w.emit2(InstOp(Branchcount+lazy), w.popInt(), math.MaxInt32)
270 } else {
271 w.emit2(InstOp(Branchcount+lazy), w.popInt(), node.n-node.m)
272 }
273 } else {
274 w.emit1(InstOp(Branchmark+lazy), w.popInt())
275 }
276
277 if node.m == 0 {
278 w.patchJump(w.popInt(), startJumpPos)
279 }
280
281 case ntGroup | beforeChild, ntGroup | afterChild:
282
283 case ntCapture | beforeChild:
284 w.emit(Setmark)
285
286 case ntCapture | afterChild:
287 w.emit2(Capturemark, w.mapCapnum(node.m), w.mapCapnum(node.n))
288
289 case ntRequire | beforeChild:
290 // NOTE: the following line causes lookahead/lookbehind to be
291 // NON-BACKTRACKING. It can be commented out with (*)
292 w.emit(Setjump)
293
294 w.emit(Setmark)
295
296 case ntRequire | afterChild:
297 w.emit(Getmark)
298
299 // NOTE: the following line causes lookahead/lookbehind to be
300 // NON-BACKTRACKING. It can be commented out with (*)
301 w.emit(Forejump)
302
303 case ntPrevent | beforeChild:
304 w.emit(Setjump)
305 w.pushInt(w.curPos())
306 w.emit1(Lazybranch, 0)
307
308 case ntPrevent | afterChild:
309 w.emit(Backjump)
310 w.patchJump(w.popInt(), w.curPos())
311 w.emit(Forejump)
312
313 case ntGreedy | beforeChild:
314 w.emit(Setjump)
315
316 case ntGreedy | afterChild:
317 w.emit(Forejump)
318
319 case ntOne, ntNotone:
320 w.emit1(InstOp(node.t|ntBits), int(node.ch))
321
322 case ntNotoneloop, ntNotonelazy, ntOneloop, ntOnelazy:
323 if node.m > 0 {
324 if node.t == ntOneloop || node.t == ntOnelazy {
325 w.emit2(Onerep|bits, int(node.ch), node.m)
326 } else {
327 w.emit2(Notonerep|bits, int(node.ch), node.m)
328 }
329 }
330 if node.n > node.m {
331 if node.n == math.MaxInt32 {
332 w.emit2(InstOp(node.t|ntBits), int(node.ch), math.MaxInt32)
333 } else {
334 w.emit2(InstOp(node.t|ntBits), int(node.ch), node.n-node.m)
335 }
336 }
337
338 case ntSetloop, ntSetlazy:
339 if node.m > 0 {
340 w.emit2(Setrep|bits, w.setCode(node.set), node.m)
341 }
342 if node.n > node.m {
343 if node.n == math.MaxInt32 {
344 w.emit2(InstOp(node.t|ntBits), w.setCode(node.set), math.MaxInt32)
345 } else {
346 w.emit2(InstOp(node.t|ntBits), w.setCode(node.set), node.n-node.m)
347 }
348 }
349
350 case ntMulti:
351 w.emit1(InstOp(node.t|ntBits), w.stringCode(node.str))
352
353 case ntSet:
354 w.emit1(InstOp(node.t|ntBits), w.setCode(node.set))
355
356 case ntRef:
357 w.emit1(InstOp(node.t|ntBits), w.mapCapnum(node.m))
358
359 case ntNothing, ntBol, ntEol, ntBoundary, ntNonboundary, ntECMABoundary, ntNonECMABoundary, ntBeginning, ntStart, ntEndZ, ntEnd:
360 w.emit(InstOp(node.t))
361
362 default:
363 return fmt.Errorf("unexpected opcode in regular expression generation: %v", nodetype)
364 }
365
366 return nil
367}
368
369// To avoid recursion, we use a simple integer stack.
370// This is the push.
371func (w *writer) pushInt(i int) {
372 w.intStack = append(w.intStack, i)
373}
374
375// Returns true if the stack is empty.
376func (w *writer) emptyStack() bool {
377 return len(w.intStack) == 0
378}
379
380// This is the pop.
381func (w *writer) popInt() int {
382 //get our item
383 idx := len(w.intStack) - 1
384 i := w.intStack[idx]
385 //trim our slice
386 w.intStack = w.intStack[:idx]
387 return i
388}
389
390// Returns the current position in the emitted code.
391func (w *writer) curPos() int {
392 return w.curpos
393}
394
395// Fixes up a jump instruction at the specified offset
396// so that it jumps to the specified jumpDest.
397func (w *writer) patchJump(offset, jumpDest int) {
398 w.emitted[offset+1] = jumpDest
399}
400
401// Returns an index in the set table for a charset
402// uses a map to eliminate duplicates.
403func (w *writer) setCode(set *CharSet) int {
404 if w.counting {
405 return 0
406 }
407
408 buf := &bytes.Buffer{}
409
410 set.mapHashFill(buf)
411 hash := buf.String()
412 i, ok := w.sethash[hash]
413 if !ok {
414 i = len(w.sethash)
415 w.sethash[hash] = i
416 w.settable = append(w.settable, set)
417 }
418 return i
419}
420
421// Returns an index in the string table for a string.
422// uses a map to eliminate duplicates.
423func (w *writer) stringCode(str []rune) int {
424 if w.counting {
425 return 0
426 }
427
428 hash := string(str)
429 i, ok := w.stringhash[hash]
430 if !ok {
431 i = len(w.stringhash)
432 w.stringhash[hash] = i
433 w.stringtable = append(w.stringtable, str)
434 }
435
436 return i
437}
438
439// When generating code on a regex that uses a sparse set
440// of capture slots, we hash them to a dense set of indices
441// for an array of capture slots. Instead of doing the hash
442// at match time, it's done at compile time, here.
443func (w *writer) mapCapnum(capnum int) int {
444 if capnum == -1 {
445 return -1
446 }
447
448 if w.caps != nil {
449 return w.caps[capnum]
450 }
451
452 return capnum
453}
454
455// Emits a zero-argument operation. Note that the emit
456// functions all run in two modes: they can emit code, or
457// they can just count the size of the code.
458func (w *writer) emit(op InstOp) {
459 if w.counting {
460 w.count++
461 if opcodeBacktracks(op) {
462 w.trackcount++
463 }
464 return
465 }
466 w.emitted[w.curpos] = int(op)
467 w.curpos++
468}
469
470// Emits a one-argument operation.
471func (w *writer) emit1(op InstOp, opd1 int) {
472 if w.counting {
473 w.count += 2
474 if opcodeBacktracks(op) {
475 w.trackcount++
476 }
477 return
478 }
479 w.emitted[w.curpos] = int(op)
480 w.curpos++
481 w.emitted[w.curpos] = opd1
482 w.curpos++
483}
484
485// Emits a two-argument operation.
486func (w *writer) emit2(op InstOp, opd1, opd2 int) {
487 if w.counting {
488 w.count += 3
489 if opcodeBacktracks(op) {
490 w.trackcount++
491 }
492 return
493 }
494 w.emitted[w.curpos] = int(op)
495 w.curpos++
496 w.emitted[w.curpos] = opd1
497 w.curpos++
498 w.emitted[w.curpos] = opd2
499 w.curpos++
500}
diff --git a/vendor/github.com/dlclark/regexp2/testoutput1 b/vendor/github.com/dlclark/regexp2/testoutput1
new file mode 100644
index 0000000..fbf63fd
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/testoutput1
@@ -0,0 +1,7061 @@
1# This set of tests is for features that are compatible with all versions of
2# Perl >= 5.10, in non-UTF mode. It should run clean for the 8-bit, 16-bit, and
3# 32-bit PCRE libraries, and also using the perltest.pl script.
4
5#forbid_utf
6#newline_default lf any anycrlf
7#perltest
8
9/the quick brown fox/
10 the quick brown fox
11 0: the quick brown fox
12 What do you know about the quick brown fox?
13 0: the quick brown fox
14\= Expect no match
15 The quick brown FOX
16No match
17 What do you know about THE QUICK BROWN FOX?
18No match
19
20/The quick brown fox/i
21 the quick brown fox
22 0: the quick brown fox
23 The quick brown FOX
24 0: The quick brown FOX
25 What do you know about the quick brown fox?
26 0: the quick brown fox
27 What do you know about THE QUICK BROWN FOX?
28 0: THE QUICK BROWN FOX
29
30/abcd\t\n\r\f\a\e\071\x3b\$\\\?caxyz/
31 abcd\t\n\r\f\a\e9;\$\\?caxyz
32 0: abcd\x09\x0a\x0d\x0c\x07\x1b9;$\?caxyz
33
34/a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz/
35 abxyzpqrrrabbxyyyypqAzz
36 0: abxyzpqrrrabbxyyyypqAzz
37 abxyzpqrrrabbxyyyypqAzz
38 0: abxyzpqrrrabbxyyyypqAzz
39 aabxyzpqrrrabbxyyyypqAzz
40 0: aabxyzpqrrrabbxyyyypqAzz
41 aaabxyzpqrrrabbxyyyypqAzz
42 0: aaabxyzpqrrrabbxyyyypqAzz
43 aaaabxyzpqrrrabbxyyyypqAzz
44 0: aaaabxyzpqrrrabbxyyyypqAzz
45 abcxyzpqrrrabbxyyyypqAzz
46 0: abcxyzpqrrrabbxyyyypqAzz
47 aabcxyzpqrrrabbxyyyypqAzz
48 0: aabcxyzpqrrrabbxyyyypqAzz
49 aaabcxyzpqrrrabbxyyyypAzz
50 0: aaabcxyzpqrrrabbxyyyypAzz
51 aaabcxyzpqrrrabbxyyyypqAzz
52 0: aaabcxyzpqrrrabbxyyyypqAzz
53 aaabcxyzpqrrrabbxyyyypqqAzz
54 0: aaabcxyzpqrrrabbxyyyypqqAzz
55 aaabcxyzpqrrrabbxyyyypqqqAzz
56 0: aaabcxyzpqrrrabbxyyyypqqqAzz
57 aaabcxyzpqrrrabbxyyyypqqqqAzz
58 0: aaabcxyzpqrrrabbxyyyypqqqqAzz
59 aaabcxyzpqrrrabbxyyyypqqqqqAzz
60 0: aaabcxyzpqrrrabbxyyyypqqqqqAzz
61 aaabcxyzpqrrrabbxyyyypqqqqqqAzz
62 0: aaabcxyzpqrrrabbxyyyypqqqqqqAzz
63 aaaabcxyzpqrrrabbxyyyypqAzz
64 0: aaaabcxyzpqrrrabbxyyyypqAzz
65 abxyzzpqrrrabbxyyyypqAzz
66 0: abxyzzpqrrrabbxyyyypqAzz
67 aabxyzzzpqrrrabbxyyyypqAzz
68 0: aabxyzzzpqrrrabbxyyyypqAzz
69 aaabxyzzzzpqrrrabbxyyyypqAzz
70 0: aaabxyzzzzpqrrrabbxyyyypqAzz
71 aaaabxyzzzzpqrrrabbxyyyypqAzz
72 0: aaaabxyzzzzpqrrrabbxyyyypqAzz
73 abcxyzzpqrrrabbxyyyypqAzz
74 0: abcxyzzpqrrrabbxyyyypqAzz
75 aabcxyzzzpqrrrabbxyyyypqAzz
76 0: aabcxyzzzpqrrrabbxyyyypqAzz
77 aaabcxyzzzzpqrrrabbxyyyypqAzz
78 0: aaabcxyzzzzpqrrrabbxyyyypqAzz
79 aaaabcxyzzzzpqrrrabbxyyyypqAzz
80 0: aaaabcxyzzzzpqrrrabbxyyyypqAzz
81 aaaabcxyzzzzpqrrrabbbxyyyypqAzz
82 0: aaaabcxyzzzzpqrrrabbbxyyyypqAzz
83 aaaabcxyzzzzpqrrrabbbxyyyyypqAzz
84 0: aaaabcxyzzzzpqrrrabbbxyyyyypqAzz
85 aaabcxyzpqrrrabbxyyyypABzz
86 0: aaabcxyzpqrrrabbxyyyypABzz
87 aaabcxyzpqrrrabbxyyyypABBzz
88 0: aaabcxyzpqrrrabbxyyyypABBzz
89 >>>aaabxyzpqrrrabbxyyyypqAzz
90 0: aaabxyzpqrrrabbxyyyypqAzz
91 >aaaabxyzpqrrrabbxyyyypqAzz
92 0: aaaabxyzpqrrrabbxyyyypqAzz
93 >>>>abcxyzpqrrrabbxyyyypqAzz
94 0: abcxyzpqrrrabbxyyyypqAzz
95\= Expect no match
96 abxyzpqrrabbxyyyypqAzz
97No match
98 abxyzpqrrrrabbxyyyypqAzz
99No match
100 abxyzpqrrrabxyyyypqAzz
101No match
102 aaaabcxyzzzzpqrrrabbbxyyyyyypqAzz
103No match
104 aaaabcxyzzzzpqrrrabbbxyyypqAzz
105No match
106 aaabcxyzpqrrrabbxyyyypqqqqqqqAzz
107No match
108
109/^(abc){1,2}zz/
110 abczz
111 0: abczz
112 1: abc
113 abcabczz
114 0: abcabczz
115 1: abc
116\= Expect no match
117 zz
118No match
119 abcabcabczz
120No match
121 >>abczz
122No match
123
124/^(b+?|a){1,2}?c/
125 bc
126 0: bc
127 1: b
128 bbc
129 0: bbc
130 1: b
131 bbbc
132 0: bbbc
133 1: bb
134 bac
135 0: bac
136 1: a
137 bbac
138 0: bbac
139 1: a
140 aac
141 0: aac
142 1: a
143 abbbbbbbbbbbc
144 0: abbbbbbbbbbbc
145 1: bbbbbbbbbbb
146 bbbbbbbbbbbac
147 0: bbbbbbbbbbbac
148 1: a
149\= Expect no match
150 aaac
151No match
152 abbbbbbbbbbbac
153No match
154
155/^(b+|a){1,2}c/
156 bc
157 0: bc
158 1: b
159 bbc
160 0: bbc
161 1: bb
162 bbbc
163 0: bbbc
164 1: bbb
165 bac
166 0: bac
167 1: a
168 bbac
169 0: bbac
170 1: a
171 aac
172 0: aac
173 1: a
174 abbbbbbbbbbbc
175 0: abbbbbbbbbbbc
176 1: bbbbbbbbbbb
177 bbbbbbbbbbbac
178 0: bbbbbbbbbbbac
179 1: a
180\= Expect no match
181 aaac
182No match
183 abbbbbbbbbbbac
184No match
185
186/^(b+|a){1,2}?bc/
187 bbc
188 0: bbc
189 1: b
190
191/^(b*|ba){1,2}?bc/
192 babc
193 0: babc
194 1: ba
195 bbabc
196 0: bbabc
197 1: ba
198 bababc
199 0: bababc
200 1: ba
201\= Expect no match
202 bababbc
203No match
204 babababc
205No match
206
207/^(ba|b*){1,2}?bc/
208 babc
209 0: babc
210 1: ba
211 bbabc
212 0: bbabc
213 1: ba
214 bababc
215 0: bababc
216 1: ba
217\= Expect no match
218 bababbc
219No match
220 babababc
221No match
222
223#/^\ca\cA\c[;\c:/
224# \x01\x01\e;z
225# 0: \x01\x01\x1b;z
226
227/^[ab\]cde]/
228 athing
229 0: a
230 bthing
231 0: b
232 ]thing
233 0: ]
234 cthing
235 0: c
236 dthing
237 0: d
238 ething
239 0: e
240\= Expect no match
241 fthing
242No match
243 [thing
244No match
245 \\thing
246No match
247
248/^[]cde]/
249 ]thing
250 0: ]
251 cthing
252 0: c
253 dthing
254 0: d
255 ething
256 0: e
257\= Expect no match
258 athing
259No match
260 fthing
261No match
262
263/^[^ab\]cde]/
264 fthing
265 0: f
266 [thing
267 0: [
268 \\thing
269 0: \
270\= Expect no match
271 athing
272No match
273 bthing
274No match
275 ]thing
276No match
277 cthing
278No match
279 dthing
280No match
281 ething
282No match
283
284/^[^]cde]/
285 athing
286 0: a
287 fthing
288 0: f
289\= Expect no match
290 ]thing
291No match
292 cthing
293No match
294 dthing
295No match
296 ething
297No match
298
299# DLC - I don't get this one
300#/^\/
301# 
302# 0: \x81
303
304#updated to handle 16-bits utf8
305/^ÿ/
306 ÿ
307 0: \xc3\xbf
308
309/^[0-9]+$/
310 0
311 0: 0
312 1
313 0: 1
314 2
315 0: 2
316 3
317 0: 3
318 4
319 0: 4
320 5
321 0: 5
322 6
323 0: 6
324 7
325 0: 7
326 8
327 0: 8
328 9
329 0: 9
330 10
331 0: 10
332 100
333 0: 100
334\= Expect no match
335 abc
336No match
337
338/^.*nter/
339 enter
340 0: enter
341 inter
342 0: inter
343 uponter
344 0: uponter
345
346/^xxx[0-9]+$/
347 xxx0
348 0: xxx0
349 xxx1234
350 0: xxx1234
351\= Expect no match
352 xxx
353No match
354
355/^.+[0-9][0-9][0-9]$/
356 x123
357 0: x123
358 x1234
359 0: x1234
360 xx123
361 0: xx123
362 123456
363 0: 123456
364\= Expect no match
365 123
366No match
367
368/^.+?[0-9][0-9][0-9]$/
369 x123
370 0: x123
371 x1234
372 0: x1234
373 xx123
374 0: xx123
375 123456
376 0: 123456
377\= Expect no match
378 123
379No match
380
381/^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/
382 abc!pqr=apquxz.ixr.zzz.ac.uk
383 0: abc!pqr=apquxz.ixr.zzz.ac.uk
384 1: abc
385 2: pqr
386\= Expect no match
387 !pqr=apquxz.ixr.zzz.ac.uk
388No match
389 abc!=apquxz.ixr.zzz.ac.uk
390No match
391 abc!pqr=apquxz:ixr.zzz.ac.uk
392No match
393 abc!pqr=apquxz.ixr.zzz.ac.ukk
394No match
395
396/:/
397 Well, we need a colon: somewhere
398 0: :
399\= Expect no match
400 Fail without a colon
401No match
402
403/([\da-f:]+)$/i
404 0abc
405 0: 0abc
406 1: 0abc
407 abc
408 0: abc
409 1: abc
410 fed
411 0: fed
412 1: fed
413 E
414 0: E
415 1: E
416 ::
417 0: ::
418 1: ::
419 5f03:12C0::932e
420 0: 5f03:12C0::932e
421 1: 5f03:12C0::932e
422 fed def
423 0: def
424 1: def
425 Any old stuff
426 0: ff
427 1: ff
428\= Expect no match
429 0zzz
430No match
431 gzzz
432No match
433 fed\x20
434No match
435 Any old rubbish
436No match
437
438/^.*\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/
439 .1.2.3
440 0: .1.2.3
441 1: 1
442 2: 2
443 3: 3
444 A.12.123.0
445 0: A.12.123.0
446 1: 12
447 2: 123
448 3: 0
449\= Expect no match
450 .1.2.3333
451No match
452 1.2.3
453No match
454 1234.2.3
455No match
456
457/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/
458 1 IN SOA non-sp1 non-sp2(
459 0: 1 IN SOA non-sp1 non-sp2(
460 1: 1
461 2: non-sp1
462 3: non-sp2
463 1 IN SOA non-sp1 non-sp2 (
464 0: 1 IN SOA non-sp1 non-sp2 (
465 1: 1
466 2: non-sp1
467 3: non-sp2
468\= Expect no match
469 1IN SOA non-sp1 non-sp2(
470No match
471
472/^[a-zA-Z\d][a-zA-Z\d\-]*(\.[a-zA-Z\d][a-zA-z\d\-]*)*\.$/
473 a.
474 0: a.
475 Z.
476 0: Z.
477 2.
478 0: 2.
479 ab-c.pq-r.
480 0: ab-c.pq-r.
481 1: .pq-r
482 sxk.zzz.ac.uk.
483 0: sxk.zzz.ac.uk.
484 1: .uk
485 x-.y-.
486 0: x-.y-.
487 1: .y-
488\= Expect no match
489 -abc.peq.
490No match
491
492/^\*\.[a-z]([a-z\-\d]*[a-z\d]+)?(\.[a-z]([a-z\-\d]*[a-z\d]+)?)*$/
493 *.a
494 0: *.a
495 *.b0-a
496 0: *.b0-a
497 1: 0-a
498 *.c3-b.c
499 0: *.c3-b.c
500 1: 3-b
501 2: .c
502 *.c-a.b-c
503 0: *.c-a.b-c
504 1: -a
505 2: .b-c
506 3: -c
507\= Expect no match
508 *.0
509No match
510 *.a-
511No match
512 *.a-b.c-
513No match
514 *.c-a.0-c
515No match
516
517/^(?=ab(de))(abd)(e)/
518 abde
519 0: abde
520 1: de
521 2: abd
522 3: e
523
524/^(?!(ab)de|x)(abd)(f)/
525 abdf
526 0: abdf
527 1: <unset>
528 2: abd
529 3: f
530
531/^(?=(ab(cd)))(ab)/
532 abcd
533 0: ab
534 1: abcd
535 2: cd
536 3: ab
537
538/^[\da-f](\.[\da-f])*$/i
539 a.b.c.d
540 0: a.b.c.d
541 1: .d
542 A.B.C.D
543 0: A.B.C.D
544 1: .D
545 a.b.c.1.2.3.C
546 0: a.b.c.1.2.3.C
547 1: .C
548
549/^\".*\"\s*(;.*)?$/
550 \"1234\"
551 0: "1234"
552 \"abcd\" ;
553 0: "abcd" ;
554 1: ;
555 \"\" ; rhubarb
556 0: "" ; rhubarb
557 1: ; rhubarb
558\= Expect no match
559 \"1234\" : things
560No match
561
562/^$/
563 \
564 0:
565\= Expect no match
566 A non-empty line
567No match
568
569/ ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/x
570 ab c
571 0: ab c
572\= Expect no match
573 abc
574No match
575 ab cde
576No match
577
578/(?x) ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/
579 ab c
580 0: ab c
581\= Expect no match
582 abc
583No match
584 ab cde
585No match
586
587/^ a\ b[c ]d $/x
588 a bcd
589 0: a bcd
590 a b d
591 0: a b d
592\= Expect no match
593 abcd
594No match
595 ab d
596No match
597
598/^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$/
599 abcdefhijklm
600 0: abcdefhijklm
601 1: abc
602 2: bc
603 3: c
604 4: def
605 5: ef
606 6: f
607 7: hij
608 8: ij
609 9: j
61010: klm
61111: lm
61212: m
613
614/^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$/
615 abcdefhijklm
616 0: abcdefhijklm
617 1: bc
618 2: c
619 3: ef
620 4: f
621 5: ij
622 6: j
623 7: lm
624 8: m
625
626#/^[\w][\W][\s][\S][\d][\D][\b][\n][\c]][\022]/
627# a+ Z0+\x08\n\x1d\x12
628# 0: a+ Z0+\x08\x0a\x1d\x12
629
630/^[.^$|()*+?{,}]+/
631 .^\$(*+)|{?,?}
632 0: .^$(*+)|{?,?}
633
634/^a*\w/
635 z
636 0: z
637 az
638 0: az
639 aaaz
640 0: aaaz
641 a
642 0: a
643 aa
644 0: aa
645 aaaa
646 0: aaaa
647 a+
648 0: a
649 aa+
650 0: aa
651
652/^a*?\w/
653 z
654 0: z
655 az
656 0: a
657 aaaz
658 0: a
659 a
660 0: a
661 aa
662 0: a
663 aaaa
664 0: a
665 a+
666 0: a
667 aa+
668 0: a
669
670/^a+\w/
671 az
672 0: az
673 aaaz
674 0: aaaz
675 aa
676 0: aa
677 aaaa
678 0: aaaa
679 aa+
680 0: aa
681
682/^a+?\w/
683 az
684 0: az
685 aaaz
686 0: aa
687 aa
688 0: aa
689 aaaa
690 0: aa
691 aa+
692 0: aa
693
694/^\d{8}\w{2,}/
695 1234567890
696 0: 1234567890
697 12345678ab
698 0: 12345678ab
699 12345678__
700 0: 12345678__
701\= Expect no match
702 1234567
703No match
704
705/^[aeiou\d]{4,5}$/
706 uoie
707 0: uoie
708 1234
709 0: 1234
710 12345
711 0: 12345
712 aaaaa
713 0: aaaaa
714\= Expect no match
715 123456
716No match
717
718/^[aeiou\d]{4,5}?/
719 uoie
720 0: uoie
721 1234
722 0: 1234
723 12345
724 0: 1234
725 aaaaa
726 0: aaaa
727 123456
728 0: 1234
729
730/\A(abc|def)=(\1){2,3}\Z/
731 abc=abcabc
732 0: abc=abcabc
733 1: abc
734 2: abc
735 def=defdefdef
736 0: def=defdefdef
737 1: def
738 2: def
739\= Expect no match
740 abc=defdef
741No match
742
743/^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/
744 abcdefghijkcda2
745 0: abcdefghijkcda2
746 1: a
747 2: b
748 3: c
749 4: d
750 5: e
751 6: f
752 7: g
753 8: h
754 9: i
75510: j
75611: k
75712: cd
758 abcdefghijkkkkcda2
759 0: abcdefghijkkkkcda2
760 1: a
761 2: b
762 3: c
763 4: d
764 5: e
765 6: f
766 7: g
767 8: h
768 9: i
76910: j
77011: k
77112: cd
772
773/(cat(a(ract|tonic)|erpillar)) \1()2(3)/
774 cataract cataract23
775 0: cataract cataract23
776 1: cataract
777 2: aract
778 3: ract
779 4:
780 5: 3
781 catatonic catatonic23
782 0: catatonic catatonic23
783 1: catatonic
784 2: atonic
785 3: tonic
786 4:
787 5: 3
788 caterpillar caterpillar23
789 0: caterpillar caterpillar23
790 1: caterpillar
791 2: erpillar
792 3: <unset>
793 4:
794 5: 3
795
796
797/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/
798 From abcd Mon Sep 01 12:33:02 1997
799 0: From abcd Mon Sep 01 12:33
800 1: abcd
801
802/^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/
803 From abcd Mon Sep 01 12:33:02 1997
804 0: From abcd Mon Sep 01 12:33
805 1: Sep
806 From abcd Mon Sep 1 12:33:02 1997
807 0: From abcd Mon Sep 1 12:33
808 1: Sep
809\= Expect no match
810 From abcd Sep 01 12:33:02 1997
811No match
812
813/^12.34/s
814 12\n34
815 0: 12\x0a34
816 12\r34
817 0: 12\x0d34
818
819/\w+(?=\t)/
820 the quick brown\t fox
821 0: brown
822
823/foo(?!bar)(.*)/
824 foobar is foolish see?
825 0: foolish see?
826 1: lish see?
827
828/(?:(?!foo)...|^.{0,2})bar(.*)/
829 foobar crowbar etc
830 0: rowbar etc
831 1: etc
832 barrel
833 0: barrel
834 1: rel
835 2barrel
836 0: 2barrel
837 1: rel
838 A barrel
839 0: A barrel
840 1: rel
841
842/^(\D*)(?=\d)(?!123)/
843 abc456
844 0: abc
845 1: abc
846\= Expect no match
847 abc123
848No match
849
850/^1234(?# test newlines
851 inside)/
852 1234
853 0: 1234
854
855/^1234 #comment in extended re
856 /x
857 1234
858 0: 1234
859
860/#rhubarb
861 abcd/x
862 abcd
863 0: abcd
864
865/^abcd#rhubarb/x
866 abcd
867 0: abcd
868
869/^(a)\1{2,3}(.)/
870 aaab
871 0: aaab
872 1: a
873 2: b
874 aaaab
875 0: aaaab
876 1: a
877 2: b
878 aaaaab
879 0: aaaaa
880 1: a
881 2: a
882 aaaaaab
883 0: aaaaa
884 1: a
885 2: a
886
887/(?!^)abc/
888 the abc
889 0: abc
890\= Expect no match
891 abc
892No match
893
894/(?=^)abc/
895 abc
896 0: abc
897\= Expect no match
898 the abc
899No match
900
901/^[ab]{1,3}(ab*|b)/
902 aabbbbb
903 0: aabb
904 1: b
905
906/^[ab]{1,3}?(ab*|b)/
907 aabbbbb
908 0: aabbbbb
909 1: abbbbb
910
911/^[ab]{1,3}?(ab*?|b)/
912 aabbbbb
913 0: aa
914 1: a
915
916/^[ab]{1,3}(ab*?|b)/
917 aabbbbb
918 0: aabb
919 1: b
920
921/ (?: [\040\t] | \(
922(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
923\) )* # optional leading comment
924(?: (?:
925[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
926(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
927|
928" (?: # opening quote...
929[^\\\x80-\xff\n\015"] # Anything except backslash and quote
930| # or
931\\ [^\x80-\xff] # Escaped something (something != CR)
932)* " # closing quote
933) # initial word
934(?: (?: [\040\t] | \(
935(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
936\) )* \. (?: [\040\t] | \(
937(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
938\) )* (?:
939[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
940(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
941|
942" (?: # opening quote...
943[^\\\x80-\xff\n\015"] # Anything except backslash and quote
944| # or
945\\ [^\x80-\xff] # Escaped something (something != CR)
946)* " # closing quote
947) )* # further okay, if led by a period
948(?: [\040\t] | \(
949(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
950\) )* @ (?: [\040\t] | \(
951(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
952\) )* (?:
953[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
954(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
955| \[ # [
956(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
957\] # ]
958) # initial subdomain
959(?: #
960(?: [\040\t] | \(
961(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
962\) )* \. # if led by a period...
963(?: [\040\t] | \(
964(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
965\) )* (?:
966[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
967(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
968| \[ # [
969(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
970\] # ]
971) # ...further okay
972)*
973# address
974| # or
975(?:
976[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
977(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
978|
979" (?: # opening quote...
980[^\\\x80-\xff\n\015"] # Anything except backslash and quote
981| # or
982\\ [^\x80-\xff] # Escaped something (something != CR)
983)* " # closing quote
984) # one word, optionally followed by....
985(?:
986[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
987\(
988(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
989\) | # comments, or...
990
991" (?: # opening quote...
992[^\\\x80-\xff\n\015"] # Anything except backslash and quote
993| # or
994\\ [^\x80-\xff] # Escaped something (something != CR)
995)* " # closing quote
996# quoted strings
997)*
998< (?: [\040\t] | \(
999(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1000\) )* # leading <
1001(?: @ (?: [\040\t] | \(
1002(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1003\) )* (?:
1004[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1005(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1006| \[ # [
1007(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1008\] # ]
1009) # initial subdomain
1010(?: #
1011(?: [\040\t] | \(
1012(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1013\) )* \. # if led by a period...
1014(?: [\040\t] | \(
1015(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1016\) )* (?:
1017[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1018(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1019| \[ # [
1020(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1021\] # ]
1022) # ...further okay
1023)*
1024
1025(?: (?: [\040\t] | \(
1026(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1027\) )* , (?: [\040\t] | \(
1028(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1029\) )* @ (?: [\040\t] | \(
1030(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1031\) )* (?:
1032[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1033(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1034| \[ # [
1035(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1036\] # ]
1037) # initial subdomain
1038(?: #
1039(?: [\040\t] | \(
1040(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1041\) )* \. # if led by a period...
1042(?: [\040\t] | \(
1043(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1044\) )* (?:
1045[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1046(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1047| \[ # [
1048(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1049\] # ]
1050) # ...further okay
1051)*
1052)* # further okay, if led by comma
1053: # closing colon
1054(?: [\040\t] | \(
1055(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1056\) )* )? # optional route
1057(?:
1058[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1059(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1060|
1061" (?: # opening quote...
1062[^\\\x80-\xff\n\015"] # Anything except backslash and quote
1063| # or
1064\\ [^\x80-\xff] # Escaped something (something != CR)
1065)* " # closing quote
1066) # initial word
1067(?: (?: [\040\t] | \(
1068(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1069\) )* \. (?: [\040\t] | \(
1070(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1071\) )* (?:
1072[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1073(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1074|
1075" (?: # opening quote...
1076[^\\\x80-\xff\n\015"] # Anything except backslash and quote
1077| # or
1078\\ [^\x80-\xff] # Escaped something (something != CR)
1079)* " # closing quote
1080) )* # further okay, if led by a period
1081(?: [\040\t] | \(
1082(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1083\) )* @ (?: [\040\t] | \(
1084(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1085\) )* (?:
1086[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1087(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1088| \[ # [
1089(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1090\] # ]
1091) # initial subdomain
1092(?: #
1093(?: [\040\t] | \(
1094(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1095\) )* \. # if led by a period...
1096(?: [\040\t] | \(
1097(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1098\) )* (?:
1099[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1100(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1101| \[ # [
1102(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1103\] # ]
1104) # ...further okay
1105)*
1106# address spec
1107(?: [\040\t] | \(
1108(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1109\) )* > # trailing >
1110# name and address
1111) (?: [\040\t] | \(
1112(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
1113\) )* # optional trailing comment
1114/x
1115 Alan Other <user\@dom.ain>
1116 0: Alan Other <user@dom.ain>
1117 <user\@dom.ain>
1118 0: user@dom.ain
1119 user\@dom.ain
1120 0: user@dom.ain
1121 \"A. Other\" <user.1234\@dom.ain> (a comment)
1122 0: "A. Other" <user.1234@dom.ain> (a comment)
1123 A. Other <user.1234\@dom.ain> (a comment)
1124 0: Other <user.1234@dom.ain> (a comment)
1125 \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
1126 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay
1127 A missing angle <user\@some.where
1128 0: user@some.where
1129\= Expect no match
1130 The quick brown fox
1131No match
1132
1133/[\040\t]* # Nab whitespace.
1134(?:
1135\( # (
1136[^\\\x80-\xff\n\015()] * # normal*
1137(?: # (
1138(?: \\ [^\x80-\xff] |
1139\( # (
1140[^\\\x80-\xff\n\015()] * # normal*
1141(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1142\) # )
1143) # special
1144[^\\\x80-\xff\n\015()] * # normal*
1145)* # )*
1146\) # )
1147[\040\t]* )* # If comment found, allow more spaces.
1148# optional leading comment
1149(?:
1150(?:
1151[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1152(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1153# Atom
1154| # or
1155" # "
1156[^\\\x80-\xff\n\015"] * # normal
1157(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
1158" # "
1159# Quoted string
1160)
1161[\040\t]* # Nab whitespace.
1162(?:
1163\( # (
1164[^\\\x80-\xff\n\015()] * # normal*
1165(?: # (
1166(?: \\ [^\x80-\xff] |
1167\( # (
1168[^\\\x80-\xff\n\015()] * # normal*
1169(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1170\) # )
1171) # special
1172[^\\\x80-\xff\n\015()] * # normal*
1173)* # )*
1174\) # )
1175[\040\t]* )* # If comment found, allow more spaces.
1176(?:
1177\.
1178[\040\t]* # Nab whitespace.
1179(?:
1180\( # (
1181[^\\\x80-\xff\n\015()] * # normal*
1182(?: # (
1183(?: \\ [^\x80-\xff] |
1184\( # (
1185[^\\\x80-\xff\n\015()] * # normal*
1186(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1187\) # )
1188) # special
1189[^\\\x80-\xff\n\015()] * # normal*
1190)* # )*
1191\) # )
1192[\040\t]* )* # If comment found, allow more spaces.
1193(?:
1194[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1195(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1196# Atom
1197| # or
1198" # "
1199[^\\\x80-\xff\n\015"] * # normal
1200(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
1201" # "
1202# Quoted string
1203)
1204[\040\t]* # Nab whitespace.
1205(?:
1206\( # (
1207[^\\\x80-\xff\n\015()] * # normal*
1208(?: # (
1209(?: \\ [^\x80-\xff] |
1210\( # (
1211[^\\\x80-\xff\n\015()] * # normal*
1212(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1213\) # )
1214) # special
1215[^\\\x80-\xff\n\015()] * # normal*
1216)* # )*
1217\) # )
1218[\040\t]* )* # If comment found, allow more spaces.
1219# additional words
1220)*
1221@
1222[\040\t]* # Nab whitespace.
1223(?:
1224\( # (
1225[^\\\x80-\xff\n\015()] * # normal*
1226(?: # (
1227(?: \\ [^\x80-\xff] |
1228\( # (
1229[^\\\x80-\xff\n\015()] * # normal*
1230(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1231\) # )
1232) # special
1233[^\\\x80-\xff\n\015()] * # normal*
1234)* # )*
1235\) # )
1236[\040\t]* )* # If comment found, allow more spaces.
1237(?:
1238[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1239(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1240|
1241\[ # [
1242(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1243\] # ]
1244)
1245[\040\t]* # Nab whitespace.
1246(?:
1247\( # (
1248[^\\\x80-\xff\n\015()] * # normal*
1249(?: # (
1250(?: \\ [^\x80-\xff] |
1251\( # (
1252[^\\\x80-\xff\n\015()] * # normal*
1253(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1254\) # )
1255) # special
1256[^\\\x80-\xff\n\015()] * # normal*
1257)* # )*
1258\) # )
1259[\040\t]* )* # If comment found, allow more spaces.
1260# optional trailing comments
1261(?:
1262\.
1263[\040\t]* # Nab whitespace.
1264(?:
1265\( # (
1266[^\\\x80-\xff\n\015()] * # normal*
1267(?: # (
1268(?: \\ [^\x80-\xff] |
1269\( # (
1270[^\\\x80-\xff\n\015()] * # normal*
1271(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1272\) # )
1273) # special
1274[^\\\x80-\xff\n\015()] * # normal*
1275)* # )*
1276\) # )
1277[\040\t]* )* # If comment found, allow more spaces.
1278(?:
1279[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1280(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1281|
1282\[ # [
1283(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1284\] # ]
1285)
1286[\040\t]* # Nab whitespace.
1287(?:
1288\( # (
1289[^\\\x80-\xff\n\015()] * # normal*
1290(?: # (
1291(?: \\ [^\x80-\xff] |
1292\( # (
1293[^\\\x80-\xff\n\015()] * # normal*
1294(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1295\) # )
1296) # special
1297[^\\\x80-\xff\n\015()] * # normal*
1298)* # )*
1299\) # )
1300[\040\t]* )* # If comment found, allow more spaces.
1301# optional trailing comments
1302)*
1303# address
1304| # or
1305(?:
1306[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1307(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1308# Atom
1309| # or
1310" # "
1311[^\\\x80-\xff\n\015"] * # normal
1312(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
1313" # "
1314# Quoted string
1315)
1316# leading word
1317[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # "normal" atoms and or spaces
1318(?:
1319(?:
1320\( # (
1321[^\\\x80-\xff\n\015()] * # normal*
1322(?: # (
1323(?: \\ [^\x80-\xff] |
1324\( # (
1325[^\\\x80-\xff\n\015()] * # normal*
1326(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1327\) # )
1328) # special
1329[^\\\x80-\xff\n\015()] * # normal*
1330)* # )*
1331\) # )
1332|
1333" # "
1334[^\\\x80-\xff\n\015"] * # normal
1335(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
1336" # "
1337) # "special" comment or quoted string
1338[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # more "normal"
1339)*
1340<
1341[\040\t]* # Nab whitespace.
1342(?:
1343\( # (
1344[^\\\x80-\xff\n\015()] * # normal*
1345(?: # (
1346(?: \\ [^\x80-\xff] |
1347\( # (
1348[^\\\x80-\xff\n\015()] * # normal*
1349(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1350\) # )
1351) # special
1352[^\\\x80-\xff\n\015()] * # normal*
1353)* # )*
1354\) # )
1355[\040\t]* )* # If comment found, allow more spaces.
1356# <
1357(?:
1358@
1359[\040\t]* # Nab whitespace.
1360(?:
1361\( # (
1362[^\\\x80-\xff\n\015()] * # normal*
1363(?: # (
1364(?: \\ [^\x80-\xff] |
1365\( # (
1366[^\\\x80-\xff\n\015()] * # normal*
1367(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1368\) # )
1369) # special
1370[^\\\x80-\xff\n\015()] * # normal*
1371)* # )*
1372\) # )
1373[\040\t]* )* # If comment found, allow more spaces.
1374(?:
1375[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1376(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1377|
1378\[ # [
1379(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1380\] # ]
1381)
1382[\040\t]* # Nab whitespace.
1383(?:
1384\( # (
1385[^\\\x80-\xff\n\015()] * # normal*
1386(?: # (
1387(?: \\ [^\x80-\xff] |
1388\( # (
1389[^\\\x80-\xff\n\015()] * # normal*
1390(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1391\) # )
1392) # special
1393[^\\\x80-\xff\n\015()] * # normal*
1394)* # )*
1395\) # )
1396[\040\t]* )* # If comment found, allow more spaces.
1397# optional trailing comments
1398(?:
1399\.
1400[\040\t]* # Nab whitespace.
1401(?:
1402\( # (
1403[^\\\x80-\xff\n\015()] * # normal*
1404(?: # (
1405(?: \\ [^\x80-\xff] |
1406\( # (
1407[^\\\x80-\xff\n\015()] * # normal*
1408(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1409\) # )
1410) # special
1411[^\\\x80-\xff\n\015()] * # normal*
1412)* # )*
1413\) # )
1414[\040\t]* )* # If comment found, allow more spaces.
1415(?:
1416[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1417(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1418|
1419\[ # [
1420(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1421\] # ]
1422)
1423[\040\t]* # Nab whitespace.
1424(?:
1425\( # (
1426[^\\\x80-\xff\n\015()] * # normal*
1427(?: # (
1428(?: \\ [^\x80-\xff] |
1429\( # (
1430[^\\\x80-\xff\n\015()] * # normal*
1431(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1432\) # )
1433) # special
1434[^\\\x80-\xff\n\015()] * # normal*
1435)* # )*
1436\) # )
1437[\040\t]* )* # If comment found, allow more spaces.
1438# optional trailing comments
1439)*
1440(?: ,
1441[\040\t]* # Nab whitespace.
1442(?:
1443\( # (
1444[^\\\x80-\xff\n\015()] * # normal*
1445(?: # (
1446(?: \\ [^\x80-\xff] |
1447\( # (
1448[^\\\x80-\xff\n\015()] * # normal*
1449(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1450\) # )
1451) # special
1452[^\\\x80-\xff\n\015()] * # normal*
1453)* # )*
1454\) # )
1455[\040\t]* )* # If comment found, allow more spaces.
1456@
1457[\040\t]* # Nab whitespace.
1458(?:
1459\( # (
1460[^\\\x80-\xff\n\015()] * # normal*
1461(?: # (
1462(?: \\ [^\x80-\xff] |
1463\( # (
1464[^\\\x80-\xff\n\015()] * # normal*
1465(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1466\) # )
1467) # special
1468[^\\\x80-\xff\n\015()] * # normal*
1469)* # )*
1470\) # )
1471[\040\t]* )* # If comment found, allow more spaces.
1472(?:
1473[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1474(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1475|
1476\[ # [
1477(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1478\] # ]
1479)
1480[\040\t]* # Nab whitespace.
1481(?:
1482\( # (
1483[^\\\x80-\xff\n\015()] * # normal*
1484(?: # (
1485(?: \\ [^\x80-\xff] |
1486\( # (
1487[^\\\x80-\xff\n\015()] * # normal*
1488(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1489\) # )
1490) # special
1491[^\\\x80-\xff\n\015()] * # normal*
1492)* # )*
1493\) # )
1494[\040\t]* )* # If comment found, allow more spaces.
1495# optional trailing comments
1496(?:
1497\.
1498[\040\t]* # Nab whitespace.
1499(?:
1500\( # (
1501[^\\\x80-\xff\n\015()] * # normal*
1502(?: # (
1503(?: \\ [^\x80-\xff] |
1504\( # (
1505[^\\\x80-\xff\n\015()] * # normal*
1506(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1507\) # )
1508) # special
1509[^\\\x80-\xff\n\015()] * # normal*
1510)* # )*
1511\) # )
1512[\040\t]* )* # If comment found, allow more spaces.
1513(?:
1514[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1515(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1516|
1517\[ # [
1518(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1519\] # ]
1520)
1521[\040\t]* # Nab whitespace.
1522(?:
1523\( # (
1524[^\\\x80-\xff\n\015()] * # normal*
1525(?: # (
1526(?: \\ [^\x80-\xff] |
1527\( # (
1528[^\\\x80-\xff\n\015()] * # normal*
1529(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1530\) # )
1531) # special
1532[^\\\x80-\xff\n\015()] * # normal*
1533)* # )*
1534\) # )
1535[\040\t]* )* # If comment found, allow more spaces.
1536# optional trailing comments
1537)*
1538)* # additional domains
1539:
1540[\040\t]* # Nab whitespace.
1541(?:
1542\( # (
1543[^\\\x80-\xff\n\015()] * # normal*
1544(?: # (
1545(?: \\ [^\x80-\xff] |
1546\( # (
1547[^\\\x80-\xff\n\015()] * # normal*
1548(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1549\) # )
1550) # special
1551[^\\\x80-\xff\n\015()] * # normal*
1552)* # )*
1553\) # )
1554[\040\t]* )* # If comment found, allow more spaces.
1555# optional trailing comments
1556)? # optional route
1557(?:
1558[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1559(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1560# Atom
1561| # or
1562" # "
1563[^\\\x80-\xff\n\015"] * # normal
1564(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
1565" # "
1566# Quoted string
1567)
1568[\040\t]* # Nab whitespace.
1569(?:
1570\( # (
1571[^\\\x80-\xff\n\015()] * # normal*
1572(?: # (
1573(?: \\ [^\x80-\xff] |
1574\( # (
1575[^\\\x80-\xff\n\015()] * # normal*
1576(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1577\) # )
1578) # special
1579[^\\\x80-\xff\n\015()] * # normal*
1580)* # )*
1581\) # )
1582[\040\t]* )* # If comment found, allow more spaces.
1583(?:
1584\.
1585[\040\t]* # Nab whitespace.
1586(?:
1587\( # (
1588[^\\\x80-\xff\n\015()] * # normal*
1589(?: # (
1590(?: \\ [^\x80-\xff] |
1591\( # (
1592[^\\\x80-\xff\n\015()] * # normal*
1593(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1594\) # )
1595) # special
1596[^\\\x80-\xff\n\015()] * # normal*
1597)* # )*
1598\) # )
1599[\040\t]* )* # If comment found, allow more spaces.
1600(?:
1601[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1602(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1603# Atom
1604| # or
1605" # "
1606[^\\\x80-\xff\n\015"] * # normal
1607(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
1608" # "
1609# Quoted string
1610)
1611[\040\t]* # Nab whitespace.
1612(?:
1613\( # (
1614[^\\\x80-\xff\n\015()] * # normal*
1615(?: # (
1616(?: \\ [^\x80-\xff] |
1617\( # (
1618[^\\\x80-\xff\n\015()] * # normal*
1619(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1620\) # )
1621) # special
1622[^\\\x80-\xff\n\015()] * # normal*
1623)* # )*
1624\) # )
1625[\040\t]* )* # If comment found, allow more spaces.
1626# additional words
1627)*
1628@
1629[\040\t]* # Nab whitespace.
1630(?:
1631\( # (
1632[^\\\x80-\xff\n\015()] * # normal*
1633(?: # (
1634(?: \\ [^\x80-\xff] |
1635\( # (
1636[^\\\x80-\xff\n\015()] * # normal*
1637(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1638\) # )
1639) # special
1640[^\\\x80-\xff\n\015()] * # normal*
1641)* # )*
1642\) # )
1643[\040\t]* )* # If comment found, allow more spaces.
1644(?:
1645[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1646(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1647|
1648\[ # [
1649(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1650\] # ]
1651)
1652[\040\t]* # Nab whitespace.
1653(?:
1654\( # (
1655[^\\\x80-\xff\n\015()] * # normal*
1656(?: # (
1657(?: \\ [^\x80-\xff] |
1658\( # (
1659[^\\\x80-\xff\n\015()] * # normal*
1660(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1661\) # )
1662) # special
1663[^\\\x80-\xff\n\015()] * # normal*
1664)* # )*
1665\) # )
1666[\040\t]* )* # If comment found, allow more spaces.
1667# optional trailing comments
1668(?:
1669\.
1670[\040\t]* # Nab whitespace.
1671(?:
1672\( # (
1673[^\\\x80-\xff\n\015()] * # normal*
1674(?: # (
1675(?: \\ [^\x80-\xff] |
1676\( # (
1677[^\\\x80-\xff\n\015()] * # normal*
1678(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1679\) # )
1680) # special
1681[^\\\x80-\xff\n\015()] * # normal*
1682)* # )*
1683\) # )
1684[\040\t]* )* # If comment found, allow more spaces.
1685(?:
1686[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
1687(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
1688|
1689\[ # [
1690(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
1691\] # ]
1692)
1693[\040\t]* # Nab whitespace.
1694(?:
1695\( # (
1696[^\\\x80-\xff\n\015()] * # normal*
1697(?: # (
1698(?: \\ [^\x80-\xff] |
1699\( # (
1700[^\\\x80-\xff\n\015()] * # normal*
1701(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
1702\) # )
1703) # special
1704[^\\\x80-\xff\n\015()] * # normal*
1705)* # )*
1706\) # )
1707[\040\t]* )* # If comment found, allow more spaces.
1708# optional trailing comments
1709)*
1710# address spec
1711> # >
1712# name and address
1713)
1714/x
1715 Alan Other <user\@dom.ain>
1716 0: Alan Other <user@dom.ain>
1717 <user\@dom.ain>
1718 0: user@dom.ain
1719 user\@dom.ain
1720 0: user@dom.ain
1721 \"A. Other\" <user.1234\@dom.ain> (a comment)
1722 0: "A. Other" <user.1234@dom.ain>
1723 A. Other <user.1234\@dom.ain> (a comment)
1724 0: Other <user.1234@dom.ain>
1725 \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
1726 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay
1727 A missing angle <user\@some.where
1728 0: user@some.where
1729\= Expect no match
1730 The quick brown fox
1731No match
1732
1733/abc\0def\00pqr\000xyz\0000AB/
1734 abc\0def\00pqr\000xyz\0000AB
1735 0: abc\x00def\x00pqr\x00xyz\x000AB
1736 abc456 abc\0def\00pqr\000xyz\0000ABCDE
1737 0: abc\x00def\x00pqr\x00xyz\x000AB
1738
1739/abc\x0def\x00pqr\x000xyz\x0000AB/
1740 abc\x0def\x00pqr\x000xyz\x0000AB
1741 0: abc\x0def\x00pqr\x000xyz\x0000AB
1742 abc456 abc\x0def\x00pqr\x000xyz\x0000ABCDE
1743 0: abc\x0def\x00pqr\x000xyz\x0000AB
1744
1745/^[\000-\037]/
1746 \0A
1747 0: \x00
1748 \01B
1749 0: \x01
1750 \037C
1751 0: \x1f
1752
1753#.NET doesn't do octal with 1 number
1754
1755/^(cow|)\1(bell)/
1756 cowcowbell
1757 0: cowcowbell
1758 1: cow
1759 2: bell
1760 bell
1761 0: bell
1762 1:
1763 2: bell
1764\= Expect no match
1765 cowbell
1766No match
1767
1768/^\s/
1769 \040abc
1770 0:
1771 \x0cabc
1772 0: \x0c
1773 \nabc
1774 0: \x0a
1775 \rabc
1776 0: \x0d
1777 \tabc
1778 0: \x09
1779\= Expect no match
1780 abc
1781No match
1782
1783/^a b
1784 c/x
1785 abc
1786 0: abc
1787
1788/^(a|)\1*b/
1789 ab
1790 0: ab
1791 1: a
1792 aaaab
1793 0: aaaab
1794 1: a
1795 b
1796 0: b
1797 1:
1798\= Expect no match
1799 acb
1800No match
1801
1802/^(a|)\1+b/
1803 aab
1804 0: aab
1805 1: a
1806 aaaab
1807 0: aaaab
1808 1: a
1809 b
1810 0: b
1811 1:
1812\= Expect no match
1813 ab
1814No match
1815
1816/^(a|)\1?b/
1817 ab
1818 0: ab
1819 1: a
1820 aab
1821 0: aab
1822 1: a
1823 b
1824 0: b
1825 1:
1826\= Expect no match
1827 acb
1828No match
1829
1830/^(a|)\1{2}b/
1831 aaab
1832 0: aaab
1833 1: a
1834 b
1835 0: b
1836 1:
1837\= Expect no match
1838 ab
1839No match
1840 aab
1841No match
1842 aaaab
1843No match
1844
1845/^(a|)\1{2,3}b/
1846 aaab
1847 0: aaab
1848 1: a
1849 aaaab
1850 0: aaaab
1851 1: a
1852 b
1853 0: b
1854 1:
1855\= Expect no match
1856 ab
1857No match
1858 aab
1859No match
1860 aaaaab
1861No match
1862
1863/ab{1,3}bc/
1864 abbbbc
1865 0: abbbbc
1866 abbbc
1867 0: abbbc
1868 abbc
1869 0: abbc
1870\= Expect no match
1871 abc
1872No match
1873 abbbbbc
1874No match
1875
1876/([^.]*)\.([^:]*):[T ]+(.*)/
1877 track1.title:TBlah blah blah
1878 0: track1.title:TBlah blah blah
1879 1: track1
1880 2: title
1881 3: Blah blah blah
1882
1883/([^.]*)\.([^:]*):[T ]+(.*)/i
1884 track1.title:TBlah blah blah
1885 0: track1.title:TBlah blah blah
1886 1: track1
1887 2: title
1888 3: Blah blah blah
1889
1890/([^.]*)\.([^:]*):[t ]+(.*)/i
1891 track1.title:TBlah blah blah
1892 0: track1.title:TBlah blah blah
1893 1: track1
1894 2: title
1895 3: Blah blah blah
1896
1897/^[W-c]+$/
1898 WXY_^abc
1899 0: WXY_^abc
1900\= Expect no match
1901 wxy
1902No match
1903
1904/^[W-c]+$/i
1905 WXY_^abc
1906 0: WXY_^abc
1907 wxy_^ABC
1908 0: wxy_^ABC
1909
1910/^[\x3f-\x5F]+$/i
1911 WXY_^abc
1912 0: WXY_^abc
1913 wxy_^ABC
1914 0: wxy_^ABC
1915
1916/^abc$/m
1917 abc
1918 0: abc
1919 qqq\nabc
1920 0: abc
1921 abc\nzzz
1922 0: abc
1923 qqq\nabc\nzzz
1924 0: abc
1925
1926/^abc$/
1927 abc
1928 0: abc
1929\= Expect no match
1930 qqq\nabc
1931No match
1932 abc\nzzz
1933No match
1934 qqq\nabc\nzzz
1935No match
1936
1937/\Aabc\Z/m
1938 abc
1939 0: abc
1940 abc\n
1941 0: abc
1942\= Expect no match
1943 qqq\nabc
1944No match
1945 abc\nzzz
1946No match
1947 qqq\nabc\nzzz
1948No match
1949
1950/\A(.)*\Z/s
1951 abc\ndef
1952 0: abc\x0adef
1953 1: f
1954
1955/\A(.)*\Z/m
1956\= Expect no match
1957 abc\ndef
1958No match
1959
1960/(?:b)|(?::+)/
1961 b::c
1962 0: b
1963 c::b
1964 0: ::
1965
1966/[-az]+/
1967 az-
1968 0: az-
1969\= Expect no match
1970 b
1971No match
1972
1973/[az-]+/
1974 za-
1975 0: za-
1976\= Expect no match
1977 b
1978No match
1979
1980/[a\-z]+/
1981 a-z
1982 0: a-z
1983\= Expect no match
1984 b
1985No match
1986
1987/[a-z]+/
1988 abcdxyz
1989 0: abcdxyz
1990
1991/[\d-]+/
1992 12-34
1993 0: 12-34
1994\= Expect no match
1995 aaa
1996No match
1997
1998/[\d-z]+/
1999 12-34z
2000 0: 12-34z
2001\= Expect no match
2002 aaa
2003No match
2004
2005/\x5c/
2006 \\
2007 0: \
2008
2009/\x20Z/
2010 the Zoo
2011 0: Z
2012\= Expect no match
2013 Zulu
2014No match
2015
2016/(abc)\1/i
2017 abcabc
2018 0: abcabc
2019 1: abc
2020 ABCabc
2021 0: ABCabc
2022 1: ABC
2023 abcABC
2024 0: abcABC
2025 1: abc
2026
2027/abc$/
2028 abc
2029 0: abc
2030 abc\n
2031 0: abc
2032\= Expect no match
2033 abc\ndef
2034No match
2035
2036/(abc)\123/
2037 abc\x53
2038 0: abcS
2039 1: abc
2040
2041/(abc)\100/
2042 abc\x40
2043 0: abc@
2044 1: abc
2045 abc\100
2046 0: abc@
2047 1: abc
2048
2049/(abc)\1000/
2050 abc\x400
2051 0: abc@0
2052 1: abc
2053 abc\x40\x30
2054 0: abc@0
2055 1: abc
2056 abc\1000
2057 0: abc@0
2058 1: abc
2059 abc\100\x30
2060 0: abc@0
2061 1: abc
2062 abc\100\060
2063 0: abc@0
2064 1: abc
2065 abc\100\60
2066 0: abc@0
2067 1: abc
2068
2069/^(A)(B)(C)(D)(E)(F)(G)(H)(I)\8\9$/
2070 ABCDEFGHIHI
2071 0: ABCDEFGHIHI
2072 1: A
2073 2: B
2074 3: C
2075 4: D
2076 5: E
2077 6: F
2078 7: G
2079 8: H
2080 9: I
2081
2082/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\12\123/
2083 abcdefghijkllS
2084 0: abcdefghijkllS
2085 1: a
2086 2: b
2087 3: c
2088 4: d
2089 5: e
2090 6: f
2091 7: g
2092 8: h
2093 9: i
209410: j
209511: k
209612: l
2097
2098/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\12\123/
2099 abcdefghijk\12S
2100 0: abcdefghijk\x0aS
2101 1: a
2102 2: b
2103 3: c
2104 4: d
2105 5: e
2106 6: f
2107 7: g
2108 8: h
2109 9: i
211010: j
211111: k
2112
2113/a{0}bc/
2114 bc
2115 0: bc
2116
2117/(a|(bc)){0,0}?xyz/
2118 xyz
2119 0: xyz
2120
2121/abc[\10]de/
2122 abc\010de
2123 0: abc\x08de
2124
2125/abc[\1]de/
2126 abc\1de
2127 0: abc\x01de
2128
2129/(abc)[\1]de/
2130 abc\1de
2131 0: abc\x01de
2132 1: abc
2133
2134/(?s)a.b/
2135 a\nb
2136 0: a\x0ab
2137
2138/^([^a])([^\b])([^c]*)([^d]{3,4})/
2139 baNOTccccd
2140 0: baNOTcccc
2141 1: b
2142 2: a
2143 3: NOT
2144 4: cccc
2145 baNOTcccd
2146 0: baNOTccc
2147 1: b
2148 2: a
2149 3: NOT
2150 4: ccc
2151 baNOTccd
2152 0: baNOTcc
2153 1: b
2154 2: a
2155 3: NO
2156 4: Tcc
2157 bacccd
2158 0: baccc
2159 1: b
2160 2: a
2161 3:
2162 4: ccc
2163\= Expect no match
2164 anything
2165No match
2166 b\bc
2167No match
2168 baccd
2169No match
2170
2171/[^a]/
2172 Abc
2173 0: A
2174
2175/[^a]/i
2176 Abc
2177 0: b
2178
2179/[^a]+/
2180 AAAaAbc
2181 0: AAA
2182
2183/[^a]+/i
2184 AAAaAbc
2185 0: bc
2186
2187/[^a]+/
2188 bbb\nccc
2189 0: bbb\x0accc
2190
2191/[^k]$/
2192 abc
2193 0: c
2194\= Expect no match
2195 abk
2196No match
2197
2198/[^k]{2,3}$/
2199 abc
2200 0: abc
2201 kbc
2202 0: bc
2203 kabc
2204 0: abc
2205\= Expect no match
2206 abk
2207No match
2208 akb
2209No match
2210 akk
2211No match
2212
2213/^\d{8,}\@.+[^k]$/
2214 12345678\@a.b.c.d
2215 0: 12345678@a.b.c.d
2216 123456789\@x.y.z
2217 0: 123456789@x.y.z
2218\= Expect no match
2219 12345678\@x.y.uk
2220No match
2221 1234567\@a.b.c.d
2222No match
2223
2224/(a)\1{8,}/
2225 aaaaaaaaa
2226 0: aaaaaaaaa
2227 1: a
2228 aaaaaaaaaa
2229 0: aaaaaaaaaa
2230 1: a
2231\= Expect no match
2232 aaaaaaa
2233No match
2234
2235/[^a]/
2236 aaaabcd
2237 0: b
2238 aaAabcd
2239 0: A
2240
2241/[^a]/i
2242 aaaabcd
2243 0: b
2244 aaAabcd
2245 0: b
2246
2247/[^az]/
2248 aaaabcd
2249 0: b
2250 aaAabcd
2251 0: A
2252
2253/[^az]/i
2254 aaaabcd
2255 0: b
2256 aaAabcd
2257 0: b
2258
2259# trimmed upper ascii since Go is UTF-8
2260/\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\040\041\042\043\044\045\046\047\050\051\052\053\054\055\056\057\060\061\062\063\064\065\066\067\070\071\072\073\074\075\076\077\100\101\102\103\104\105\106\107\110\111\112\113\114\115\116\117\120\121\122\123\124\125\126\127\130\131\132\133\134\135\136\137\140\141\142\143\144\145\146\147\150\151\152\153\154\155\156\157\160\161\162\163\164\165\166\167\170\171\172\173\174\175\176\177/
2261 \000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\040\041\042\043\044\045\046\047\050\051\052\053\054\055\056\057\060\061\062\063\064\065\066\067\070\071\072\073\074\075\076\077\100\101\102\103\104\105\106\107\110\111\112\113\114\115\116\117\120\121\122\123\124\125\126\127\130\131\132\133\134\135\136\137\140\141\142\143\144\145\146\147\150\151\152\153\154\155\156\157\160\161\162\163\164\165\166\167\170\171\172\173\174\175\176\177
2262 0: \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f
2263
2264/P[^*]TAIRE[^*]{1,6}?LL/
2265 xxxxxxxxxxxPSTAIREISLLxxxxxxxxx
2266 0: PSTAIREISLL
2267
2268/P[^*]TAIRE[^*]{1,}?LL/
2269 xxxxxxxxxxxPSTAIREISLLxxxxxxxxx
2270 0: PSTAIREISLL
2271
2272/(\.\d\d[1-9]?)\d+/
2273 1.230003938
2274 0: .230003938
2275 1: .23
2276 1.875000282
2277 0: .875000282
2278 1: .875
2279 1.235
2280 0: .235
2281 1: .23
2282
2283/(\.\d\d((?=0)|\d(?=\d)))/
2284 1.230003938
2285 0: .23
2286 1: .23
2287 2:
2288 1.875000282
2289 0: .875
2290 1: .875
2291 2: 5
2292\= Expect no match
2293 1.235
2294No match
2295
2296/\b(foo)\s+(\w+)/i
2297 Food is on the foo table
2298 0: foo table
2299 1: foo
2300 2: table
2301
2302/foo(.*)bar/
2303 The food is under the bar in the barn.
2304 0: food is under the bar in the bar
2305 1: d is under the bar in the
2306
2307/foo(.*?)bar/
2308 The food is under the bar in the barn.
2309 0: food is under the bar
2310 1: d is under the
2311
2312/(.*)(\d*)/
2313 I have 2 numbers: 53147
2314 0: I have 2 numbers: 53147
2315 1: I have 2 numbers: 53147
2316 2:
2317
2318/(.*)(\d+)/
2319 I have 2 numbers: 53147
2320 0: I have 2 numbers: 53147
2321 1: I have 2 numbers: 5314
2322 2: 7
2323
2324/(.*?)(\d*)/
2325 I have 2 numbers: 53147
2326 0:
2327 1:
2328 2:
2329
2330/(.*?)(\d+)/
2331 I have 2 numbers: 53147
2332 0: I have 2
2333 1: I have
2334 2: 2
2335
2336/(.*)(\d+)$/
2337 I have 2 numbers: 53147
2338 0: I have 2 numbers: 53147
2339 1: I have 2 numbers: 5314
2340 2: 7
2341
2342/(.*?)(\d+)$/
2343 I have 2 numbers: 53147
2344 0: I have 2 numbers: 53147
2345 1: I have 2 numbers:
2346 2: 53147
2347
2348/(.*)\b(\d+)$/
2349 I have 2 numbers: 53147
2350 0: I have 2 numbers: 53147
2351 1: I have 2 numbers:
2352 2: 53147
2353
2354/(.*\D)(\d+)$/
2355 I have 2 numbers: 53147
2356 0: I have 2 numbers: 53147
2357 1: I have 2 numbers:
2358 2: 53147
2359
2360/^\D*(?!123)/
2361 ABC123
2362 0: AB
2363
2364/^(\D*)(?=\d)(?!123)/
2365 ABC445
2366 0: ABC
2367 1: ABC
2368\= Expect no match
2369 ABC123
2370No match
2371
2372/^[W-]46]/
2373 W46]789
2374 0: W46]
2375 -46]789
2376 0: -46]
2377\= Expect no match
2378 Wall
2379No match
2380 Zebra
2381No match
2382 42
2383No match
2384 [abcd]
2385No match
2386 ]abcd[
2387No match
2388
2389/^[W-\]46]/
2390 W46]789
2391 0: W
2392 Wall
2393 0: W
2394 Zebra
2395 0: Z
2396 Xylophone
2397 0: X
2398 42
2399 0: 4
2400 [abcd]
2401 0: [
2402 ]abcd[
2403 0: ]
2404 \\backslash
2405 0: \
2406\= Expect no match
2407 -46]789
2408No match
2409 well
2410No match
2411
2412/\d\d\/\d\d\/\d\d\d\d/
2413 01/01/2000
2414 0: 01/01/2000
2415
2416/word (?:[a-zA-Z0-9]+ ){0,10}otherword/
2417 word cat dog elephant mussel cow horse canary baboon snake shark otherword
2418 0: word cat dog elephant mussel cow horse canary baboon snake shark otherword
2419\= Expect no match
2420 word cat dog elephant mussel cow horse canary baboon snake shark
2421No match
2422
2423/word (?:[a-zA-Z0-9]+ ){0,300}otherword/
2424\= Expect no match
2425 word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope
2426No match
2427
2428/^(a){0,0}/
2429 bcd
2430 0:
2431 abc
2432 0:
2433 aab
2434 0:
2435
2436/^(a){0,1}/
2437 bcd
2438 0:
2439 abc
2440 0: a
2441 1: a
2442 aab
2443 0: a
2444 1: a
2445
2446/^(a){0,2}/
2447 bcd
2448 0:
2449 abc
2450 0: a
2451 1: a
2452 aab
2453 0: aa
2454 1: a
2455
2456/^(a){0,3}/
2457 bcd
2458 0:
2459 abc
2460 0: a
2461 1: a
2462 aab
2463 0: aa
2464 1: a
2465 aaa
2466 0: aaa
2467 1: a
2468
2469/^(a){0,}/
2470 bcd
2471 0:
2472 abc
2473 0: a
2474 1: a
2475 aab
2476 0: aa
2477 1: a
2478 aaa
2479 0: aaa
2480 1: a
2481 aaaaaaaa
2482 0: aaaaaaaa
2483 1: a
2484
2485/^(a){1,1}/
2486 abc
2487 0: a
2488 1: a
2489 aab
2490 0: a
2491 1: a
2492\= Expect no match
2493 bcd
2494No match
2495
2496/^(a){1,2}/
2497 abc
2498 0: a
2499 1: a
2500 aab
2501 0: aa
2502 1: a
2503\= Expect no match
2504 bcd
2505No match
2506
2507/^(a){1,3}/
2508 abc
2509 0: a
2510 1: a
2511 aab
2512 0: aa
2513 1: a
2514 aaa
2515 0: aaa
2516 1: a
2517\= Expect no match
2518 bcd
2519No match
2520
2521/^(a){1,}/
2522 abc
2523 0: a
2524 1: a
2525 aab
2526 0: aa
2527 1: a
2528 aaa
2529 0: aaa
2530 1: a
2531 aaaaaaaa
2532 0: aaaaaaaa
2533 1: a
2534\= Expect no match
2535 bcd
2536No match
2537
2538/.*\.gif/
2539 borfle\nbib.gif\nno
2540 0: bib.gif
2541
2542/.{0,}\.gif/
2543 borfle\nbib.gif\nno
2544 0: bib.gif
2545
2546/.*\.gif/m
2547 borfle\nbib.gif\nno
2548 0: bib.gif
2549
2550/.*\.gif/s
2551 borfle\nbib.gif\nno
2552 0: borfle\x0abib.gif
2553
2554/.*\.gif/ms
2555 borfle\nbib.gif\nno
2556 0: borfle\x0abib.gif
2557
2558/.*$/
2559 borfle\nbib.gif\nno
2560 0: no
2561
2562/.*$/m
2563 borfle\nbib.gif\nno
2564 0: borfle
2565
2566/.*$/s
2567 borfle\nbib.gif\nno
2568 0: borfle\x0abib.gif\x0ano
2569
2570/.*$/ms
2571 borfle\nbib.gif\nno
2572 0: borfle\x0abib.gif\x0ano
2573
2574/.*$/
2575 borfle\nbib.gif\nno\n
2576 0: no
2577
2578/.*$/m
2579 borfle\nbib.gif\nno\n
2580 0: borfle
2581
2582/.*$/s
2583 borfle\nbib.gif\nno\n
2584 0: borfle\x0abib.gif\x0ano\x0a
2585
2586/.*$/ms
2587 borfle\nbib.gif\nno\n
2588 0: borfle\x0abib.gif\x0ano\x0a
2589
2590/(.*X|^B)/
2591 abcde\n1234Xyz
2592 0: 1234X
2593 1: 1234X
2594 BarFoo
2595 0: B
2596 1: B
2597\= Expect no match
2598 abcde\nBar
2599No match
2600
2601/(.*X|^B)/m
2602 abcde\n1234Xyz
2603 0: 1234X
2604 1: 1234X
2605 BarFoo
2606 0: B
2607 1: B
2608 abcde\nBar
2609 0: B
2610 1: B
2611
2612/(.*X|^B)/s
2613 abcde\n1234Xyz
2614 0: abcde\x0a1234X
2615 1: abcde\x0a1234X
2616 BarFoo
2617 0: B
2618 1: B
2619\= Expect no match
2620 abcde\nBar
2621No match
2622
2623/(.*X|^B)/ms
2624 abcde\n1234Xyz
2625 0: abcde\x0a1234X
2626 1: abcde\x0a1234X
2627 BarFoo
2628 0: B
2629 1: B
2630 abcde\nBar
2631 0: B
2632 1: B
2633
2634/(?s)(.*X|^B)/
2635 abcde\n1234Xyz
2636 0: abcde\x0a1234X
2637 1: abcde\x0a1234X
2638 BarFoo
2639 0: B
2640 1: B
2641\= Expect no match
2642 abcde\nBar
2643No match
2644
2645/(?s:.*X|^B)/
2646 abcde\n1234Xyz
2647 0: abcde\x0a1234X
2648 BarFoo
2649 0: B
2650\= Expect no match
2651 abcde\nBar
2652No match
2653
2654/^.*B/
2655\= Expect no match
2656 abc\nB
2657No match
2658
2659/(?s)^.*B/
2660 abc\nB
2661 0: abc\x0aB
2662
2663/(?m)^.*B/
2664 abc\nB
2665 0: B
2666
2667/(?ms)^.*B/
2668 abc\nB
2669 0: abc\x0aB
2670
2671/(?ms)^B/
2672 abc\nB
2673 0: B
2674
2675/(?s)B$/
2676 B\n
2677 0: B
2678
2679/^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]/
2680 123456654321
2681 0: 123456654321
2682
2683/^\d\d\d\d\d\d\d\d\d\d\d\d/
2684 123456654321
2685 0: 123456654321
2686
2687/^[\d][\d][\d][\d][\d][\d][\d][\d][\d][\d][\d][\d]/
2688 123456654321
2689 0: 123456654321
2690
2691/^[abc]{12}/
2692 abcabcabcabc
2693 0: abcabcabcabc
2694
2695/^[a-c]{12}/
2696 abcabcabcabc
2697 0: abcabcabcabc
2698
2699/^(a|b|c){12}/
2700 abcabcabcabc
2701 0: abcabcabcabc
2702 1: c
2703
2704/^[abcdefghijklmnopqrstuvwxy0123456789]/
2705 n
2706 0: n
2707\= Expect no match
2708 z
2709No match
2710
2711/abcde{0,0}/
2712 abcd
2713 0: abcd
2714\= Expect no match
2715 abce
2716No match
2717
2718/ab[cd]{0,0}e/
2719 abe
2720 0: abe
2721\= Expect no match
2722 abcde
2723No match
2724
2725/ab(c){0,0}d/
2726 abd
2727 0: abd
2728\= Expect no match
2729 abcd
2730No match
2731
2732/a(b*)/
2733 a
2734 0: a
2735 1:
2736 ab
2737 0: ab
2738 1: b
2739 abbbb
2740 0: abbbb
2741 1: bbbb
2742\= Expect no match
2743 bbbbb
2744No match
2745
2746/ab\d{0}e/
2747 abe
2748 0: abe
2749\= Expect no match
2750 ab1e
2751No match
2752
2753/"([^\\"]+|\\.)*"/
2754 the \"quick\" brown fox
2755 0: "quick"
2756 1: quick
2757 \"the \\\"quick\\\" brown fox\"
2758 0: "the \"quick\" brown fox"
2759 1: brown fox
2760
2761/<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is
2762 <TR BGCOLOR='#DBE9E9'><TD align=left valign=top>43.<a href='joblist.cfm?JobID=94 6735&Keyword='>Word Processor<BR>(N-1286)</a></TD><TD align=left valign=top>Lega lstaff.com</TD><TD align=left valign=top>CA - Statewide</TD></TR>
2763 0: <TR BGCOLOR='#DBE9E9'><TD align=left valign=top>43.<a href='joblist.cfm?JobID=94 6735&Keyword='>Word Processor<BR>(N-1286)</a></TD><TD align=left valign=top>Lega lstaff.com</TD><TD align=left valign=top>CA - Statewide</TD></TR>
2764 1: BGCOLOR='#DBE9E9'
2765 2: align=left valign=top
2766 3: 43.
2767 4: <a href='joblist.cfm?JobID=94 6735&Keyword='>Word Processor<BR>(N-1286)
2768 5:
2769 6:
2770 7: <unset>
2771 8: align=left valign=top
2772 9: Lega lstaff.com
277310: align=left valign=top
277411: CA - Statewide
2775
2776/a[^a]b/
2777 acb
2778 0: acb
2779 a\nb
2780 0: a\x0ab
2781
2782/a.b/
2783 acb
2784 0: acb
2785\= Expect no match
2786 a\nb
2787No match
2788
2789/a[^a]b/s
2790 acb
2791 0: acb
2792 a\nb
2793 0: a\x0ab
2794
2795/a.b/s
2796 acb
2797 0: acb
2798 a\nb
2799 0: a\x0ab
2800
2801/^(b+?|a){1,2}?c/
2802 bac
2803 0: bac
2804 1: a
2805 bbac
2806 0: bbac
2807 1: a
2808 bbbac
2809 0: bbbac
2810 1: a
2811 bbbbac
2812 0: bbbbac
2813 1: a
2814 bbbbbac
2815 0: bbbbbac
2816 1: a
2817
2818/^(b+|a){1,2}?c/
2819 bac
2820 0: bac
2821 1: a
2822 bbac
2823 0: bbac
2824 1: a
2825 bbbac
2826 0: bbbac
2827 1: a
2828 bbbbac
2829 0: bbbbac
2830 1: a
2831 bbbbbac
2832 0: bbbbbac
2833 1: a
2834
2835/(?!\A)x/m
2836 a\bx\n
2837 0: x
2838 a\nx\n
2839 0: x
2840\= Expect no match
2841 x\nb\n
2842No match
2843
2844/(A|B)*?CD/
2845 CD
2846 0: CD
2847
2848/(A|B)*CD/
2849 CD
2850 0: CD
2851
2852/(AB)*?\1/
2853 ABABAB
2854 0: ABAB
2855 1: AB
2856
2857/(AB)*\1/
2858 ABABAB
2859 0: ABABAB
2860 1: AB
2861
2862/(?<!bar)foo/
2863 foo
2864 0: foo
2865 catfood
2866 0: foo
2867 arfootle
2868 0: foo
2869 rfoosh
2870 0: foo
2871\= Expect no match
2872 barfoo
2873No match
2874 towbarfoo
2875No match
2876
2877/\w{3}(?<!bar)foo/
2878 catfood
2879 0: catfoo
2880\= Expect no match
2881 foo
2882No match
2883 barfoo
2884No match
2885 towbarfoo
2886No match
2887
2888/(?<=(foo)a)bar/
2889 fooabar
2890 0: bar
2891 1: foo
2892\= Expect no match
2893 bar
2894No match
2895 foobbar
2896No match
2897
2898/\Aabc\z/m
2899 abc
2900 0: abc
2901\= Expect no match
2902 abc\n
2903No match
2904 qqq\nabc
2905No match
2906 abc\nzzz
2907No match
2908 qqq\nabc\nzzz
2909No match
2910
2911"(?>.*/)foo"
2912 /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
2913 0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
2914\= Expect no match
2915 /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/
2916No match
2917
2918/(?>(\.\d\d[1-9]?))\d+/
2919 1.230003938
2920 0: .230003938
2921 1: .23
2922 1.875000282
2923 0: .875000282
2924 1: .875
2925\= Expect no match
2926 1.235
2927No match
2928
2929/^((?>\w+)|(?>\s+))*$/
2930 now is the time for all good men to come to the aid of the party
2931 0: now is the time for all good men to come to the aid of the party
2932 1: party
2933\= Expect no match
2934 this is not a line with only words and spaces!
2935No match
2936
2937/(\d+)(\w)/
2938 12345a
2939 0: 12345a
2940 1: 12345
2941 2: a
2942 12345+
2943 0: 12345
2944 1: 1234
2945 2: 5
2946
2947/((?>\d+))(\w)/
2948 12345a
2949 0: 12345a
2950 1: 12345
2951 2: a
2952\= Expect no match
2953 12345+
2954No match
2955
2956/(?>a+)b/
2957 aaab
2958 0: aaab
2959
2960/((?>a+)b)/
2961 aaab
2962 0: aaab
2963 1: aaab
2964
2965/(?>(a+))b/
2966 aaab
2967 0: aaab
2968 1: aaa
2969
2970/(?>b)+/
2971 aaabbbccc
2972 0: bbb
2973
2974/(?>a+|b+|c+)*c/
2975 aaabbbbccccd
2976 0: aaabbbbc
2977
2978/((?>[^()]+)|\([^()]*\))+/
2979 ((abc(ade)ufh()()x
2980 0: abc(ade)ufh()()x
2981 1: x
2982
2983/\(((?>[^()]+)|\([^()]+\))+\)/
2984 (abc)
2985 0: (abc)
2986 1: abc
2987 (abc(def)xyz)
2988 0: (abc(def)xyz)
2989 1: xyz
2990\= Expect no match
2991 ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
2992No match
2993
2994/a(?-i)b/i
2995 ab
2996 0: ab
2997 Ab
2998 0: Ab
2999\= Expect no match
3000 aB
3001No match
3002 AB
3003No match
3004
3005/(a (?x)b c)d e/
3006 a bcd e
3007 0: a bcd e
3008 1: a bc
3009\= Expect no match
3010 a b cd e
3011No match
3012 abcd e
3013No match
3014 a bcde
3015No match
3016
3017/(a b(?x)c d (?-x)e f)/
3018 a bcde f
3019 0: a bcde f
3020 1: a bcde f
3021\= Expect no match
3022 abcdef
3023No match
3024
3025/(a(?i)b)c/
3026 abc
3027 0: abc
3028 1: ab
3029 aBc
3030 0: aBc
3031 1: aB
3032\= Expect no match
3033 abC
3034No match
3035 aBC
3036No match
3037 Abc
3038No match
3039 ABc
3040No match
3041 ABC
3042No match
3043 AbC
3044No match
3045
3046/a(?i:b)c/
3047 abc
3048 0: abc
3049 aBc
3050 0: aBc
3051\= Expect no match
3052 ABC
3053No match
3054 abC
3055No match
3056 aBC
3057No match
3058
3059/a(?i:b)*c/
3060 aBc
3061 0: aBc
3062 aBBc
3063 0: aBBc
3064\= Expect no match
3065 aBC
3066No match
3067 aBBC
3068No match
3069
3070/a(?=b(?i)c)\w\wd/
3071 abcd
3072 0: abcd
3073 abCd
3074 0: abCd
3075\= Expect no match
3076 aBCd
3077No match
3078 abcD
3079No match
3080
3081/(?s-i:more.*than).*million/i
3082 more than million
3083 0: more than million
3084 more than MILLION
3085 0: more than MILLION
3086 more \n than Million
3087 0: more \x0a than Million
3088\= Expect no match
3089 MORE THAN MILLION
3090No match
3091 more \n than \n million
3092No match
3093
3094/(?:(?s-i)more.*than).*million/i
3095 more than million
3096 0: more than million
3097 more than MILLION
3098 0: more than MILLION
3099 more \n than Million
3100 0: more \x0a than Million
3101\= Expect no match
3102 MORE THAN MILLION
3103No match
3104 more \n than \n million
3105No match
3106
3107/(?>a(?i)b+)+c/
3108 abc
3109 0: abc
3110 aBbc
3111 0: aBbc
3112 aBBc
3113 0: aBBc
3114\= Expect no match
3115 Abc
3116No match
3117 abAb
3118No match
3119 abbC
3120No match
3121
3122/(?=a(?i)b)\w\wc/
3123 abc
3124 0: abc
3125 aBc
3126 0: aBc
3127\= Expect no match
3128 Ab
3129No match
3130 abC
3131No match
3132 aBC
3133No match
3134
3135/(?<=a(?i)b)(\w\w)c/
3136 abxxc
3137 0: xxc
3138 1: xx
3139 aBxxc
3140 0: xxc
3141 1: xx
3142\= Expect no match
3143 Abxxc
3144No match
3145 ABxxc
3146No match
3147 abxxC
3148No match
3149
3150/(?:(a)|b)(?(1)A|B)/
3151 aA
3152 0: aA
3153 1: a
3154 bB
3155 0: bB
3156\= Expect no match
3157 aB
3158No match
3159 bA
3160No match
3161
3162/^(a)?(?(1)a|b)+$/
3163 aa
3164 0: aa
3165 1: a
3166 b
3167 0: b
3168 bb
3169 0: bb
3170\= Expect no match
3171 ab
3172No match
3173
3174# Perl gets this next one wrong if the pattern ends with $; in that case it
3175# fails to match "12".
3176
3177/^(?(?=abc)\w{3}:|\d\d)/
3178 abc:
3179 0: abc:
3180 12
3181 0: 12
3182 123
3183 0: 12
3184\= Expect no match
3185 xyz
3186No match
3187
3188/^(?(?!abc)\d\d|\w{3}:)$/
3189 abc:
3190 0: abc:
3191 12
3192 0: 12
3193\= Expect no match
3194 123
3195No match
3196 xyz
3197No match
3198
3199/(?(?<=foo)bar|cat)/
3200 foobar
3201 0: bar
3202 cat
3203 0: cat
3204 fcat
3205 0: cat
3206 focat
3207 0: cat
3208\= Expect no match
3209 foocat
3210No match
3211
3212/(?(?<!foo)cat|bar)/
3213 foobar
3214 0: bar
3215 cat
3216 0: cat
3217 fcat
3218 0: cat
3219 focat
3220 0: cat
3221\= Expect no match
3222 foocat
3223No match
3224
3225/( \( )? [^()]+ (?(1) \) |) /x
3226 abcd
3227 0: abcd
3228 (abcd)
3229 0: (abcd)
3230 1: (
3231 the quick (abcd) fox
3232 0: the quick
3233 (abcd
3234 0: abcd
3235
3236/( \( )? [^()]+ (?(1) \) ) /x
3237 abcd
3238 0: abcd
3239 (abcd)
3240 0: (abcd)
3241 1: (
3242 the quick (abcd) fox
3243 0: the quick
3244 (abcd
3245 0: abcd
3246
3247/^(?(2)a|(1)(2))+$/
3248 12
3249 0: 12
3250 1: 1
3251 2: 2
3252 12a
3253 0: 12a
3254 1: 1
3255 2: 2
3256 12aa
3257 0: 12aa
3258 1: 1
3259 2: 2
3260\= Expect no match
3261 1234
3262No match
3263
3264/((?i)blah)\s+\1/
3265 blah blah
3266 0: blah blah
3267 1: blah
3268 BLAH BLAH
3269 0: BLAH BLAH
3270 1: BLAH
3271 Blah Blah
3272 0: Blah Blah
3273 1: Blah
3274 blaH blaH
3275 0: blaH blaH
3276 1: blaH
3277\= Expect no match
3278 blah BLAH
3279No match
3280 Blah blah
3281No match
3282 blaH blah
3283No match
3284
3285/((?i)blah)\s+(?i:\1)/
3286 blah blah
3287 0: blah blah
3288 1: blah
3289 BLAH BLAH
3290 0: BLAH BLAH
3291 1: BLAH
3292 Blah Blah
3293 0: Blah Blah
3294 1: Blah
3295 blaH blaH
3296 0: blaH blaH
3297 1: blaH
3298 blah BLAH
3299 0: blah BLAH
3300 1: blah
3301 Blah blah
3302 0: Blah blah
3303 1: Blah
3304 blaH blah
3305 0: blaH blah
3306 1: blaH
3307
3308/(?>a*)*/
3309 a
3310 0: a
3311 aa
3312 0: aa
3313 aaaa
3314 0: aaaa
3315
3316/(abc|)+/
3317 abc
3318 0: abc
3319 1:
3320 abcabc
3321 0: abcabc
3322 1:
3323 abcabcabc
3324 0: abcabcabc
3325 1:
3326 xyz
3327 0:
3328 1:
3329
3330/([a]*)*/
3331 a
3332 0: a
3333 1:
3334 aaaaa
3335 0: aaaaa
3336 1:
3337
3338/([ab]*)*/
3339 a
3340 0: a
3341 1:
3342 b
3343 0: b
3344 1:
3345 ababab
3346 0: ababab
3347 1:
3348 aaaabcde
3349 0: aaaab
3350 1:
3351 bbbb
3352 0: bbbb
3353 1:
3354
3355/([^a]*)*/
3356 b
3357 0: b
3358 1:
3359 bbbb
3360 0: bbbb
3361 1:
3362 aaa
3363 0:
3364 1:
3365
3366/([^ab]*)*/
3367 cccc
3368 0: cccc
3369 1:
3370 abab
3371 0:
3372 1:
3373
3374/([a]*?)*/
3375 a
3376 0:
3377 1:
3378 aaaa
3379 0:
3380 1:
3381
3382/([ab]*?)*/
3383 a
3384 0:
3385 1:
3386 b
3387 0:
3388 1:
3389 abab
3390 0:
3391 1:
3392 baba
3393 0:
3394 1:
3395
3396/([^a]*?)*/
3397 b
3398 0:
3399 1:
3400 bbbb
3401 0:
3402 1:
3403 aaa
3404 0:
3405 1:
3406
3407/([^ab]*?)*/
3408 c
3409 0:
3410 1:
3411 cccc
3412 0:
3413 1:
3414 baba
3415 0:
3416 1:
3417
3418/(?>a*)*/
3419 a
3420 0: a
3421 aaabcde
3422 0: aaa
3423
3424/((?>a*))*/
3425 aaaaa
3426 0: aaaaa
3427 1:
3428 aabbaa
3429 0: aa
3430 1:
3431
3432/((?>a*?))*/
3433 aaaaa
3434 0:
3435 1:
3436 aabbaa
3437 0:
3438 1:
3439
3440/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /x
3441 12-sep-98
3442 0: 12-sep-98
3443 12-09-98
3444 0: 12-09-98
3445\= Expect no match
3446 sep-12-98
3447No match
3448
3449/(?<=(foo))bar\1/
3450 foobarfoo
3451 0: barfoo
3452 1: foo
3453 foobarfootling
3454 0: barfoo
3455 1: foo
3456\= Expect no match
3457 foobar
3458No match
3459 barfoo
3460No match
3461
3462/(?i:saturday|sunday)/
3463 saturday
3464 0: saturday
3465 sunday
3466 0: sunday
3467 Saturday
3468 0: Saturday
3469 Sunday
3470 0: Sunday
3471 SATURDAY
3472 0: SATURDAY
3473 SUNDAY
3474 0: SUNDAY
3475 SunDay
3476 0: SunDay
3477
3478/(a(?i)bc|BB)x/
3479 abcx
3480 0: abcx
3481 1: abc
3482 aBCx
3483 0: aBCx
3484 1: aBC
3485 bbx
3486 0: bbx
3487 1: bb
3488 BBx
3489 0: BBx
3490 1: BB
3491\= Expect no match
3492 abcX
3493No match
3494 aBCX
3495No match
3496 bbX
3497No match
3498 BBX
3499No match
3500
3501/^([ab](?i)[cd]|[ef])/
3502 ac
3503 0: ac
3504 1: ac
3505 aC
3506 0: aC
3507 1: aC
3508 bD
3509 0: bD
3510 1: bD
3511 elephant
3512 0: e
3513 1: e
3514 Europe
3515 0: E
3516 1: E
3517 frog
3518 0: f
3519 1: f
3520 France
3521 0: F
3522 1: F
3523\= Expect no match
3524 Africa
3525No match
3526
3527/^(ab|a(?i)[b-c](?m-i)d|x(?i)y|z)/
3528 ab
3529 0: ab
3530 1: ab
3531 aBd
3532 0: aBd
3533 1: aBd
3534 xy
3535 0: xy
3536 1: xy
3537 xY
3538 0: xY
3539 1: xY
3540 zebra
3541 0: z
3542 1: z
3543 Zambesi
3544 0: Z
3545 1: Z
3546\= Expect no match
3547 aCD
3548No match
3549 XY
3550No match
3551
3552/(?<=foo\n)^bar/m
3553 foo\nbar
3554 0: bar
3555\= Expect no match
3556 bar
3557No match
3558 baz\nbar
3559No match
3560
3561/(?<=(?<!foo)bar)baz/
3562 barbaz
3563 0: baz
3564 barbarbaz
3565 0: baz
3566 koobarbaz
3567 0: baz
3568\= Expect no match
3569 baz
3570No match
3571 foobarbaz
3572No match
3573
3574# The cases of aaaa and aaaaaa are missed out below because Perl does things
3575# differently. We know that odd, and maybe incorrect, things happen with
3576# recursive references in Perl, as far as 5.11.3 - see some stuff in test #2.
3577
3578/^(a\1?){4}$/
3579 aaaaa
3580 0: aaaaa
3581 1: a
3582 aaaaaaa
3583 0: aaaaaaa
3584 1: a
3585 aaaaaaaaaa
3586 0: aaaaaaaaaa
3587 1: aaaa
3588\= Expect no match
3589 a
3590No match
3591 aa
3592No match
3593 aaa
3594No match
3595 aaaaaaaa
3596No match
3597 aaaaaaaaa
3598No match
3599 aaaaaaaaaaa
3600No match
3601 aaaaaaaaaaaa
3602No match
3603 aaaaaaaaaaaaa
3604No match
3605 aaaaaaaaaaaaaa
3606No match
3607 aaaaaaaaaaaaaaa
3608No match
3609 aaaaaaaaaaaaaaaa
3610No match
3611
3612/^(a\1?)(a\1?)(a\2?)(a\3?)$/
3613 aaaa
3614 0: aaaa
3615 1: a
3616 2: a
3617 3: a
3618 4: a
3619 aaaaa
3620 0: aaaaa
3621 1: a
3622 2: aa
3623 3: a
3624 4: a
3625 aaaaaa
3626 0: aaaaaa
3627 1: a
3628 2: aa
3629 3: a
3630 4: aa
3631 aaaaaaa
3632 0: aaaaaaa
3633 1: a
3634 2: aa
3635 3: aaa
3636 4: a
3637 aaaaaaaaaa
3638 0: aaaaaaaaaa
3639 1: a
3640 2: aa
3641 3: aaa
3642 4: aaaa
3643\= Expect no match
3644 a
3645No match
3646 aa
3647No match
3648 aaa
3649No match
3650 aaaaaaaa
3651No match
3652 aaaaaaaaa
3653No match
3654 aaaaaaaaaaa
3655No match
3656 aaaaaaaaaaaa
3657No match
3658 aaaaaaaaaaaaa
3659No match
3660 aaaaaaaaaaaaaa
3661No match
3662 aaaaaaaaaaaaaaa
3663No match
3664 aaaaaaaaaaaaaaaa
3665No match
3666
3667# The following tests are taken from the Perl 5.005 test suite; some of them
3668# are compatible with 5.004, but I'd rather not have to sort them out.
3669
3670/abc/
3671 abc
3672 0: abc
3673 xabcy
3674 0: abc
3675 ababc
3676 0: abc
3677\= Expect no match
3678 xbc
3679No match
3680 axc
3681No match
3682 abx
3683No match
3684
3685/ab*c/
3686 abc
3687 0: abc
3688
3689/ab*bc/
3690 abc
3691 0: abc
3692 abbc
3693 0: abbc
3694 abbbbc
3695 0: abbbbc
3696
3697/.{1}/
3698 abbbbc
3699 0: a
3700
3701/.{3,4}/
3702 abbbbc
3703 0: abbb
3704
3705/ab{0,}bc/
3706 abbbbc
3707 0: abbbbc
3708
3709/ab+bc/
3710 abbc
3711 0: abbc
3712\= Expect no match
3713 abc
3714No match
3715 abq
3716No match
3717
3718/ab{1,}bc/
3719
3720/ab+bc/
3721 abbbbc
3722 0: abbbbc
3723
3724/ab{1,}bc/
3725 abbbbc
3726 0: abbbbc
3727
3728/ab{1,3}bc/
3729 abbbbc
3730 0: abbbbc
3731
3732/ab{3,4}bc/
3733 abbbbc
3734 0: abbbbc
3735
3736/ab{4,5}bc/
3737\= Expect no match
3738 abq
3739No match
3740 abbbbc
3741No match
3742
3743/ab?bc/
3744 abbc
3745 0: abbc
3746 abc
3747 0: abc
3748
3749/ab{0,1}bc/
3750 abc
3751 0: abc
3752
3753/ab?bc/
3754
3755/ab?c/
3756 abc
3757 0: abc
3758
3759/ab{0,1}c/
3760 abc
3761 0: abc
3762
3763/^abc$/
3764 abc
3765 0: abc
3766\= Expect no match
3767 abbbbc
3768No match
3769 abcc
3770No match
3771
3772/^abc/
3773 abcc
3774 0: abc
3775
3776/^abc$/
3777
3778/abc$/
3779 aabc
3780 0: abc
3781\= Expect no match
3782 aabcd
3783No match
3784
3785/^/
3786 abc
3787 0:
3788
3789/$/
3790 abc
3791 0:
3792
3793/a.c/
3794 abc
3795 0: abc
3796 axc
3797 0: axc
3798
3799/a.*c/
3800 axyzc
3801 0: axyzc
3802
3803/a[bc]d/
3804 abd
3805 0: abd
3806\= Expect no match
3807 axyzd
3808No match
3809 abc
3810No match
3811
3812/a[b-d]e/
3813 ace
3814 0: ace
3815
3816/a[b-d]/
3817 aac
3818 0: ac
3819
3820/a[-b]/
3821 a-
3822 0: a-
3823
3824/a[b-]/
3825 a-
3826 0: a-
3827
3828/a]/
3829 a]
3830 0: a]
3831
3832/a[]]b/
3833 a]b
3834 0: a]b
3835
3836/a[^bc]d/
3837 aed
3838 0: aed
3839\= Expect no match
3840 abd
3841No match
3842 abd
3843No match
3844
3845/a[^-b]c/
3846 adc
3847 0: adc
3848
3849/a[^]b]c/
3850 adc
3851 0: adc
3852 a-c
3853 0: a-c
3854\= Expect no match
3855 a]c
3856No match
3857
3858/\ba\b/
3859 a-
3860 0: a
3861 -a
3862 0: a
3863 -a-
3864 0: a
3865
3866/\by\b/
3867\= Expect no match
3868 xy
3869No match
3870 yz
3871No match
3872 xyz
3873No match
3874
3875/\Ba\B/
3876\= Expect no match
3877 a-
3878No match
3879 -a
3880No match
3881 -a-
3882No match
3883
3884/\By\b/
3885 xy
3886 0: y
3887
3888/\by\B/
3889 yz
3890 0: y
3891
3892/\By\B/
3893 xyz
3894 0: y
3895
3896/\w/
3897 a
3898 0: a
3899
3900/\W/
3901 -
3902 0: -
3903\= Expect no match
3904 a
3905No match
3906
3907/a\sb/
3908 a b
3909 0: a b
3910
3911/a\Sb/
3912 a-b
3913 0: a-b
3914\= Expect no match
3915 a b
3916No match
3917
3918/\d/
3919 1
3920 0: 1
3921
3922/\D/
3923 -
3924 0: -
3925\= Expect no match
3926 1
3927No match
3928
3929/[\w]/
3930 a
3931 0: a
3932
3933/[\W]/
3934 -
3935 0: -
3936\= Expect no match
3937 a
3938No match
3939
3940/a[\s]b/
3941 a b
3942 0: a b
3943
3944/a[\S]b/
3945 a-b
3946 0: a-b
3947\= Expect no match
3948 a b
3949No match
3950
3951/[\d]/
3952 1
3953 0: 1
3954
3955/[\D]/
3956 -
3957 0: -
3958\= Expect no match
3959 1
3960No match
3961
3962/ab|cd/
3963 abc
3964 0: ab
3965 abcd
3966 0: ab
3967
3968/()ef/
3969 def
3970 0: ef
3971 1:
3972
3973/$b/
3974
3975/a\(b/
3976 a(b
3977 0: a(b
3978
3979/a\(*b/
3980 ab
3981 0: ab
3982 a((b
3983 0: a((b
3984
3985/a\\b/
3986 a\\b
3987 0: a\b
3988
3989/((a))/
3990 abc
3991 0: a
3992 1: a
3993 2: a
3994
3995/(a)b(c)/
3996 abc
3997 0: abc
3998 1: a
3999 2: c
4000
4001/a+b+c/
4002 aabbabc
4003 0: abc
4004
4005/a{1,}b{1,}c/
4006 aabbabc
4007 0: abc
4008
4009/a.+?c/
4010 abcabc
4011 0: abc
4012
4013/(a+|b)*/
4014 ab
4015 0: ab
4016 1: b
4017
4018/(a+|b){0,}/
4019 ab
4020 0: ab
4021 1: b
4022
4023/(a+|b)+/
4024 ab
4025 0: ab
4026 1: b
4027
4028/(a+|b){1,}/
4029 ab
4030 0: ab
4031 1: b
4032
4033/(a+|b)?/
4034 ab
4035 0: a
4036 1: a
4037
4038/(a+|b){0,1}/
4039 ab
4040 0: a
4041 1: a
4042
4043/[^ab]*/
4044 cde
4045 0: cde
4046
4047/abc/
4048\= Expect no match
4049 b
4050No match
4051
4052/a*/
4053 \
4054 0:
4055
4056/([abc])*d/
4057 abbbcd
4058 0: abbbcd
4059 1: c
4060
4061/([abc])*bcd/
4062 abcd
4063 0: abcd
4064 1: a
4065
4066/a|b|c|d|e/
4067 e
4068 0: e
4069
4070/(a|b|c|d|e)f/
4071 ef
4072 0: ef
4073 1: e
4074
4075/abcd*efg/
4076 abcdefg
4077 0: abcdefg
4078
4079/ab*/
4080 xabyabbbz
4081 0: ab
4082 xayabbbz
4083 0: a
4084
4085/(ab|cd)e/
4086 abcde
4087 0: cde
4088 1: cd
4089
4090/[abhgefdc]ij/
4091 hij
4092 0: hij
4093
4094/^(ab|cd)e/
4095
4096/(abc|)ef/
4097 abcdef
4098 0: ef
4099 1:
4100
4101/(a|b)c*d/
4102 abcd
4103 0: bcd
4104 1: b
4105
4106/(ab|ab*)bc/
4107 abc
4108 0: abc
4109 1: a
4110
4111/a([bc]*)c*/
4112 abc
4113 0: abc
4114 1: bc
4115
4116/a([bc]*)(c*d)/
4117 abcd
4118 0: abcd
4119 1: bc
4120 2: d
4121
4122/a([bc]+)(c*d)/
4123 abcd
4124 0: abcd
4125 1: bc
4126 2: d
4127
4128/a([bc]*)(c+d)/
4129 abcd
4130 0: abcd
4131 1: b
4132 2: cd
4133
4134/a[bcd]*dcdcde/
4135 adcdcde
4136 0: adcdcde
4137
4138/a[bcd]+dcdcde/
4139\= Expect no match
4140 abcde
4141No match
4142 adcdcde
4143No match
4144
4145/(ab|a)b*c/
4146 abc
4147 0: abc
4148 1: ab
4149
4150/((a)(b)c)(d)/
4151 abcd
4152 0: abcd
4153 1: abc
4154 2: a
4155 3: b
4156 4: d
4157
4158/[a-zA-Z_][a-zA-Z0-9_]*/
4159 alpha
4160 0: alpha
4161
4162/^a(bc+|b[eh])g|.h$/
4163 abh
4164 0: bh
4165
4166/(bc+d$|ef*g.|h?i(j|k))/
4167 effgz
4168 0: effgz
4169 1: effgz
4170 ij
4171 0: ij
4172 1: ij
4173 2: j
4174 reffgz
4175 0: effgz
4176 1: effgz
4177\= Expect no match
4178 effg
4179No match
4180 bcdd
4181No match
4182
4183/((((((((((a))))))))))/
4184 a
4185 0: a
4186 1: a
4187 2: a
4188 3: a
4189 4: a
4190 5: a
4191 6: a
4192 7: a
4193 8: a
4194 9: a
419510: a
4196
4197/((((((((((a))))))))))\10/
4198 aa
4199 0: aa
4200 1: a
4201 2: a
4202 3: a
4203 4: a
4204 5: a
4205 6: a
4206 7: a
4207 8: a
4208 9: a
420910: a
4210
4211/(((((((((a)))))))))/
4212 a
4213 0: a
4214 1: a
4215 2: a
4216 3: a
4217 4: a
4218 5: a
4219 6: a
4220 7: a
4221 8: a
4222 9: a
4223
4224/multiple words of text/
4225\= Expect no match
4226 aa
4227No match
4228 uh-uh
4229No match
4230
4231/multiple words/
4232 multiple words, yeah
4233 0: multiple words
4234
4235/(.*)c(.*)/
4236 abcde
4237 0: abcde
4238 1: ab
4239 2: de
4240
4241/\((.*), (.*)\)/
4242 (a, b)
4243 0: (a, b)
4244 1: a
4245 2: b
4246
4247/[k]/
4248
4249/abcd/
4250 abcd
4251 0: abcd
4252
4253/a(bc)d/
4254 abcd
4255 0: abcd
4256 1: bc
4257
4258/a[-]?c/
4259 ac
4260 0: ac
4261
4262/(abc)\1/
4263 abcabc
4264 0: abcabc
4265 1: abc
4266
4267/([a-c]*)\1/
4268 abcabc
4269 0: abcabc
4270 1: abc
4271
4272/(a)|\1/
4273 a
4274 0: a
4275 1: a
4276 ab
4277 0: a
4278 1: a
4279\= Expect no match
4280 x
4281No match
4282
4283/(([a-c])b*?\2)*/
4284 ababbbcbc
4285 0: ababb
4286 1: bb
4287 2: b
4288
4289/(([a-c])b*?\2){3}/
4290 ababbbcbc
4291 0: ababbbcbc
4292 1: cbc
4293 2: c
4294
4295/((\3|b)\2(a)x)+/
4296 aaaxabaxbaaxbbax
4297 0: bbax
4298 1: bbax
4299 2: b
4300 3: a
4301
4302/((\3|b)\2(a)){2,}/
4303 bbaababbabaaaaabbaaaabba
4304 0: bbaaaabba
4305 1: bba
4306 2: b
4307 3: a
4308
4309/abc/i
4310 ABC
4311 0: ABC
4312 XABCY
4313 0: ABC
4314 ABABC
4315 0: ABC
4316\= Expect no match
4317 aaxabxbaxbbx
4318No match
4319 XBC
4320No match
4321 AXC
4322No match
4323 ABX
4324No match
4325
4326/ab*c/i
4327 ABC
4328 0: ABC
4329
4330/ab*bc/i
4331 ABC
4332 0: ABC
4333 ABBC
4334 0: ABBC
4335
4336/ab*?bc/i
4337 ABBBBC
4338 0: ABBBBC
4339
4340/ab{0,}?bc/i
4341 ABBBBC
4342 0: ABBBBC
4343
4344/ab+?bc/i
4345 ABBC
4346 0: ABBC
4347
4348/ab+bc/i
4349\= Expect no match
4350 ABC
4351No match
4352 ABQ
4353No match
4354
4355/ab{1,}bc/i
4356
4357/ab+bc/i
4358 ABBBBC
4359 0: ABBBBC
4360
4361/ab{1,}?bc/i
4362 ABBBBC
4363 0: ABBBBC
4364
4365/ab{1,3}?bc/i
4366 ABBBBC
4367 0: ABBBBC
4368
4369/ab{3,4}?bc/i
4370 ABBBBC
4371 0: ABBBBC
4372
4373/ab{4,5}?bc/i
4374\= Expect no match
4375 ABQ
4376No match
4377 ABBBBC
4378No match
4379
4380/ab??bc/i
4381 ABBC
4382 0: ABBC
4383 ABC
4384 0: ABC
4385
4386/ab{0,1}?bc/i
4387 ABC
4388 0: ABC
4389
4390/ab??bc/i
4391
4392/ab??c/i
4393 ABC
4394 0: ABC
4395
4396/ab{0,1}?c/i
4397 ABC
4398 0: ABC
4399
4400/^abc$/i
4401 ABC
4402 0: ABC
4403\= Expect no match
4404 ABBBBC
4405No match
4406 ABCC
4407No match
4408
4409/^abc/i
4410 ABCC
4411 0: ABC
4412
4413/^abc$/i
4414
4415/abc$/i
4416 AABC
4417 0: ABC
4418
4419/^/i
4420 ABC
4421 0:
4422
4423/$/i
4424 ABC
4425 0:
4426
4427/a.c/i
4428 ABC
4429 0: ABC
4430 AXC
4431 0: AXC
4432
4433/a.*?c/i
4434 AXYZC
4435 0: AXYZC
4436
4437/a.*c/i
4438 AABC
4439 0: AABC
4440\= Expect no match
4441 AXYZD
4442No match
4443
4444/a[bc]d/i
4445 ABD
4446 0: ABD
4447
4448/a[b-d]e/i
4449 ACE
4450 0: ACE
4451\= Expect no match
4452 ABC
4453No match
4454 ABD
4455No match
4456
4457/a[b-d]/i
4458 AAC
4459 0: AC
4460
4461/a[-b]/i
4462 A-
4463 0: A-
4464
4465/a[b-]/i
4466 A-
4467 0: A-
4468
4469/a]/i
4470 A]
4471 0: A]
4472
4473/a[]]b/i
4474 A]B
4475 0: A]B
4476
4477/a[^bc]d/i
4478 AED
4479 0: AED
4480
4481/a[^-b]c/i
4482 ADC
4483 0: ADC
4484\= Expect no match
4485 ABD
4486No match
4487 A-C
4488No match
4489
4490/a[^]b]c/i
4491 ADC
4492 0: ADC
4493
4494/ab|cd/i
4495 ABC
4496 0: AB
4497 ABCD
4498 0: AB
4499
4500/()ef/i
4501 DEF
4502 0: EF
4503 1:
4504
4505/$b/i
4506\= Expect no match
4507 A]C
4508No match
4509 B
4510No match
4511
4512/a\(b/i
4513 A(B
4514 0: A(B
4515
4516/a\(*b/i
4517 AB
4518 0: AB
4519 A((B
4520 0: A((B
4521
4522/a\\b/i
4523 A\\b
4524 0: A\b
4525 a\\B
4526 0: a\B
4527
4528/((a))/i
4529 ABC
4530 0: A
4531 1: A
4532 2: A
4533
4534/(a)b(c)/i
4535 ABC
4536 0: ABC
4537 1: A
4538 2: C
4539
4540/a+b+c/i
4541 AABBABC
4542 0: ABC
4543
4544/a{1,}b{1,}c/i
4545 AABBABC
4546 0: ABC
4547
4548/a.+?c/i
4549 ABCABC
4550 0: ABC
4551
4552/a.*?c/i
4553 ABCABC
4554 0: ABC
4555
4556/a.{0,5}?c/i
4557 ABCABC
4558 0: ABC
4559
4560/(a+|b)*/i
4561 AB
4562 0: AB
4563 1: B
4564
4565/(a+|b){0,}/i
4566 AB
4567 0: AB
4568 1: B
4569
4570/(a+|b)+/i
4571 AB
4572 0: AB
4573 1: B
4574
4575/(a+|b){1,}/i
4576 AB
4577 0: AB
4578 1: B
4579
4580/(a+|b)?/i
4581 AB
4582 0: A
4583 1: A
4584
4585/(a+|b){0,1}/i
4586 AB
4587 0: A
4588 1: A
4589
4590/(a+|b){0,1}?/i
4591 AB
4592 0:
4593
4594/[^ab]*/i
4595 CDE
4596 0: CDE
4597
4598/([abc])*d/i
4599 ABBBCD
4600 0: ABBBCD
4601 1: C
4602
4603/([abc])*bcd/i
4604 ABCD
4605 0: ABCD
4606 1: A
4607
4608/a|b|c|d|e/i
4609 E
4610 0: E
4611
4612/(a|b|c|d|e)f/i
4613 EF
4614 0: EF
4615 1: E
4616
4617/abcd*efg/i
4618 ABCDEFG
4619 0: ABCDEFG
4620
4621/ab*/i
4622 XABYABBBZ
4623 0: AB
4624 XAYABBBZ
4625 0: A
4626
4627/(ab|cd)e/i
4628 ABCDE
4629 0: CDE
4630 1: CD
4631
4632/[abhgefdc]ij/i
4633 HIJ
4634 0: HIJ
4635
4636/^(ab|cd)e/i
4637\= Expect no match
4638 ABCDE
4639No match
4640
4641/(abc|)ef/i
4642 ABCDEF
4643 0: EF
4644 1:
4645
4646/(a|b)c*d/i
4647 ABCD
4648 0: BCD
4649 1: B
4650
4651/(ab|ab*)bc/i
4652 ABC
4653 0: ABC
4654 1: A
4655
4656/a([bc]*)c*/i
4657 ABC
4658 0: ABC
4659 1: BC
4660
4661/a([bc]*)(c*d)/i
4662 ABCD
4663 0: ABCD
4664 1: BC
4665 2: D
4666
4667/a([bc]+)(c*d)/i
4668 ABCD
4669 0: ABCD
4670 1: BC
4671 2: D
4672
4673/a([bc]*)(c+d)/i
4674 ABCD
4675 0: ABCD
4676 1: B
4677 2: CD
4678
4679/a[bcd]*dcdcde/i
4680 ADCDCDE
4681 0: ADCDCDE
4682
4683/a[bcd]+dcdcde/i
4684
4685/(ab|a)b*c/i
4686 ABC
4687 0: ABC
4688 1: AB
4689
4690/((a)(b)c)(d)/i
4691 ABCD
4692 0: ABCD
4693 1: ABC
4694 2: A
4695 3: B
4696 4: D
4697
4698/[a-zA-Z_][a-zA-Z0-9_]*/i
4699 ALPHA
4700 0: ALPHA
4701
4702/^a(bc+|b[eh])g|.h$/i
4703 ABH
4704 0: BH
4705
4706/(bc+d$|ef*g.|h?i(j|k))/i
4707 EFFGZ
4708 0: EFFGZ
4709 1: EFFGZ
4710 IJ
4711 0: IJ
4712 1: IJ
4713 2: J
4714 REFFGZ
4715 0: EFFGZ
4716 1: EFFGZ
4717\= Expect no match
4718 ADCDCDE
4719No match
4720 EFFG
4721No match
4722 BCDD
4723No match
4724
4725/((((((((((a))))))))))/i
4726 A
4727 0: A
4728 1: A
4729 2: A
4730 3: A
4731 4: A
4732 5: A
4733 6: A
4734 7: A
4735 8: A
4736 9: A
473710: A
4738
4739/((((((((((a))))))))))\10/i
4740 AA
4741 0: AA
4742 1: A
4743 2: A
4744 3: A
4745 4: A
4746 5: A
4747 6: A
4748 7: A
4749 8: A
4750 9: A
475110: A
4752
4753/(((((((((a)))))))))/i
4754 A
4755 0: A
4756 1: A
4757 2: A
4758 3: A
4759 4: A
4760 5: A
4761 6: A
4762 7: A
4763 8: A
4764 9: A
4765
4766/(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))/i
4767 A
4768 0: A
4769 1: A
4770
4771/(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/i
4772 C
4773 0: C
4774 1: C
4775
4776/multiple words of text/i
4777\= Expect no match
4778 AA
4779No match
4780 UH-UH
4781No match
4782
4783/multiple words/i
4784 MULTIPLE WORDS, YEAH
4785 0: MULTIPLE WORDS
4786
4787/(.*)c(.*)/i
4788 ABCDE
4789 0: ABCDE
4790 1: AB
4791 2: DE
4792
4793/\((.*), (.*)\)/i
4794 (A, B)
4795 0: (A, B)
4796 1: A
4797 2: B
4798
4799/[k]/i
4800
4801/abcd/i
4802 ABCD
4803 0: ABCD
4804
4805/a(bc)d/i
4806 ABCD
4807 0: ABCD
4808 1: BC
4809
4810/a[-]?c/i
4811 AC
4812 0: AC
4813
4814/(abc)\1/i
4815 ABCABC
4816 0: ABCABC
4817 1: ABC
4818
4819/([a-c]*)\1/i
4820 ABCABC
4821 0: ABCABC
4822 1: ABC
4823
4824/a(?!b)./
4825 abad
4826 0: ad
4827
4828/a(?=d)./
4829 abad
4830 0: ad
4831
4832/a(?=c|d)./
4833 abad
4834 0: ad
4835
4836/a(?:b|c|d)(.)/
4837 ace
4838 0: ace
4839 1: e
4840
4841/a(?:b|c|d)*(.)/
4842 ace
4843 0: ace
4844 1: e
4845
4846/a(?:b|c|d)+?(.)/
4847 ace
4848 0: ace
4849 1: e
4850 acdbcdbe
4851 0: acd
4852 1: d
4853
4854/a(?:b|c|d)+(.)/
4855 acdbcdbe
4856 0: acdbcdbe
4857 1: e
4858
4859/a(?:b|c|d){2}(.)/
4860 acdbcdbe
4861 0: acdb
4862 1: b
4863
4864/a(?:b|c|d){4,5}(.)/
4865 acdbcdbe
4866 0: acdbcdb
4867 1: b
4868
4869/a(?:b|c|d){4,5}?(.)/
4870 acdbcdbe
4871 0: acdbcd
4872 1: d
4873
4874/((foo)|(bar))*/
4875 foobar
4876 0: foobar
4877 1: bar
4878 2: foo
4879 3: bar
4880
4881/a(?:b|c|d){6,7}(.)/
4882 acdbcdbe
4883 0: acdbcdbe
4884 1: e
4885
4886/a(?:b|c|d){6,7}?(.)/
4887 acdbcdbe
4888 0: acdbcdbe
4889 1: e
4890
4891/a(?:b|c|d){5,6}(.)/
4892 acdbcdbe
4893 0: acdbcdbe
4894 1: e
4895
4896/a(?:b|c|d){5,6}?(.)/
4897 acdbcdbe
4898 0: acdbcdb
4899 1: b
4900
4901/a(?:b|c|d){5,7}(.)/
4902 acdbcdbe
4903 0: acdbcdbe
4904 1: e
4905
4906/a(?:b|c|d){5,7}?(.)/
4907 acdbcdbe
4908 0: acdbcdb
4909 1: b
4910
4911/a(?:b|(c|e){1,2}?|d)+?(.)/
4912 ace
4913 0: ace
4914 1: c
4915 2: e
4916
4917/^(.+)?B/
4918 AB
4919 0: AB
4920 1: A
4921
4922/^([^a-z])|(\^)$/
4923 .
4924 0: .
4925 1: .
4926
4927/^[<>]&/
4928 <&OUT
4929 0: <&
4930
4931/^(a\1?){4}$/
4932 aaaaaaaaaa
4933 0: aaaaaaaaaa
4934 1: aaaa
4935\= Expect no match
4936 AB
4937No match
4938 aaaaaaaaa
4939No match
4940 aaaaaaaaaaa
4941No match
4942
4943/^(a(?(1)\1)){4}$/
4944 aaaaaaaaaa
4945 0: aaaaaaaaaa
4946 1: aaaa
4947\= Expect no match
4948 aaaaaaaaa
4949No match
4950 aaaaaaaaaaa
4951No match
4952
4953/(?:(f)(o)(o)|(b)(a)(r))*/
4954 foobar
4955 0: foobar
4956 1: f
4957 2: o
4958 3: o
4959 4: b
4960 5: a
4961 6: r
4962
4963/(?<=a)b/
4964 ab
4965 0: b
4966\= Expect no match
4967 cb
4968No match
4969 b
4970No match
4971
4972/(?<!c)b/
4973 ab
4974 0: b
4975 b
4976 0: b
4977 b
4978 0: b
4979
4980/(?:..)*a/
4981 aba
4982 0: aba
4983
4984/(?:..)*?a/
4985 aba
4986 0: a
4987
4988/^(?:b|a(?=(.)))*\1/
4989 abc
4990 0: ab
4991 1: b
4992
4993/^(){3,5}/
4994 abc
4995 0:
4996 1:
4997
4998/^(a+)*ax/
4999 aax
5000 0: aax
5001 1: a
5002
5003/^((a|b)+)*ax/
5004 aax
5005 0: aax
5006 1: a
5007 2: a
5008
5009/^((a|bc)+)*ax/
5010 aax
5011 0: aax
5012 1: a
5013 2: a
5014
5015/(a|x)*ab/
5016 cab
5017 0: ab
5018
5019/(a)*ab/
5020 cab
5021 0: ab
5022
5023/(?:(?i)a)b/
5024 ab
5025 0: ab
5026
5027/((?i)a)b/
5028 ab
5029 0: ab
5030 1: a
5031
5032/(?:(?i)a)b/
5033 Ab
5034 0: Ab
5035
5036/((?i)a)b/
5037 Ab
5038 0: Ab
5039 1: A
5040
5041/(?:(?i)a)b/
5042\= Expect no match
5043 cb
5044No match
5045 aB
5046No match
5047
5048/((?i)a)b/
5049
5050/(?i:a)b/
5051 ab
5052 0: ab
5053
5054/((?i:a))b/
5055 ab
5056 0: ab
5057 1: a
5058
5059/(?i:a)b/
5060 Ab
5061 0: Ab
5062
5063/((?i:a))b/
5064 Ab
5065 0: Ab
5066 1: A
5067
5068/(?i:a)b/
5069\= Expect no match
5070 aB
5071No match
5072 aB
5073No match
5074
5075/((?i:a))b/
5076
5077/(?:(?-i)a)b/i
5078 ab
5079 0: ab
5080
5081/((?-i)a)b/i
5082 ab
5083 0: ab
5084 1: a
5085
5086/(?:(?-i)a)b/i
5087 aB
5088 0: aB
5089
5090/((?-i)a)b/i
5091 aB
5092 0: aB
5093 1: a
5094
5095/(?:(?-i)a)b/i
5096 aB
5097 0: aB
5098\= Expect no match
5099 Ab
5100No match
5101 AB
5102No match
5103
5104/(?-i:a)b/i
5105 ab
5106 0: ab
5107
5108/((?-i:a))b/i
5109 ab
5110 0: ab
5111 1: a
5112
5113/(?-i:a)b/i
5114 aB
5115 0: aB
5116
5117/((?-i:a))b/i
5118 aB
5119 0: aB
5120 1: a
5121
5122/(?-i:a)b/i
5123\= Expect no match
5124 AB
5125No match
5126 Ab
5127No match
5128
5129/((?-i:a))b/i
5130
5131/(?-i:a)b/i
5132 aB
5133 0: aB
5134
5135/((?-i:a))b/i
5136 aB
5137 0: aB
5138 1: a
5139
5140/(?-i:a)b/i
5141\= Expect no match
5142 Ab
5143No match
5144 AB
5145No match
5146
5147/((?-i:a))b/i
5148
5149/((?-i:a.))b/i
5150\= Expect no match
5151 AB
5152No match
5153 a\nB
5154No match
5155
5156/((?s-i:a.))b/i
5157 a\nB
5158 0: a\x0aB
5159 1: a\x0a
5160
5161/(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b)))/
5162 cabbbb
5163 0: cabbbb
5164
5165/(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/
5166 caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
5167 0: caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
5168
5169/(ab)\d\1/i
5170 Ab4ab
5171 0: Ab4ab
5172 1: Ab
5173 ab4Ab
5174 0: ab4Ab
5175 1: ab
5176
5177/foo\w*\d{4}baz/
5178 foobar1234baz
5179 0: foobar1234baz
5180
5181/x(~~)*(?:(?:F)?)?/
5182 x~~
5183 0: x~~
5184 1: ~~
5185
5186/^a(?#xxx){3}c/
5187 aaac
5188 0: aaac
5189
5190/^a (?#xxx) (?#yyy) {3}c/x
5191 aaac
5192 0: aaac
5193
5194/(?<![cd])b/
5195\= Expect no match
5196 B\nB
5197No match
5198 dbcb
5199No match
5200
5201/(?<![cd])[ab]/
5202 dbaacb
5203 0: a
5204
5205/(?<!(c|d))b/
5206
5207/(?<!(c|d))[ab]/
5208 dbaacb
5209 0: a
5210
5211/(?<!cd)[ab]/
5212 cdaccb
5213 0: b
5214
5215/^(?:a?b?)*$/
5216 \
5217 0:
5218 a
5219 0: a
5220 ab
5221 0: ab
5222 aaa
5223 0: aaa
5224\= Expect no match
5225 dbcb
5226No match
5227 a--
5228No match
5229 aa--
5230No match
5231
5232/((?s)^a(.))((?m)^b$)/
5233 a\nb\nc\n
5234 0: a\x0ab
5235 1: a\x0a
5236 2: \x0a
5237 3: b
5238
5239/((?m)^b$)/
5240 a\nb\nc\n
5241 0: b
5242 1: b
5243
5244/(?m)^b/
5245 a\nb\n
5246 0: b
5247
5248/(?m)^(b)/
5249 a\nb\n
5250 0: b
5251 1: b
5252
5253/((?m)^b)/
5254 a\nb\n
5255 0: b
5256 1: b
5257
5258/\n((?m)^b)/
5259 a\nb\n
5260 0: \x0ab
5261 1: b
5262
5263/((?s).)c(?!.)/
5264 a\nb\nc\n
5265 0: \x0ac
5266 1: \x0a
5267 a\nb\nc\n
5268 0: \x0ac
5269 1: \x0a
5270
5271/((?s)b.)c(?!.)/
5272 a\nb\nc\n
5273 0: b\x0ac
5274 1: b\x0a
5275 a\nb\nc\n
5276 0: b\x0ac
5277 1: b\x0a
5278
5279/^b/
5280
5281/()^b/
5282\= Expect no match
5283 a\nb\nc\n
5284No match
5285 a\nb\nc\n
5286No match
5287
5288/((?m)^b)/
5289 a\nb\nc\n
5290 0: b
5291 1: b
5292
5293/(x)?(?(1)a|b)/
5294\= Expect no match
5295 a
5296No match
5297 a
5298No match
5299
5300/(x)?(?(1)b|a)/
5301 a
5302 0: a
5303
5304/()?(?(1)b|a)/
5305 a
5306 0: a
5307
5308/()(?(1)b|a)/
5309
5310/()?(?(1)a|b)/
5311 a
5312 0: a
5313 1:
5314
5315/^(\()?blah(?(1)(\)))$/
5316 (blah)
5317 0: (blah)
5318 1: (
5319 2: )
5320 blah
5321 0: blah
5322\= Expect no match
5323 a
5324No match
5325 blah)
5326No match
5327 (blah
5328No match
5329
5330/^(\(+)?blah(?(1)(\)))$/
5331 (blah)
5332 0: (blah)
5333 1: (
5334 2: )
5335 blah
5336 0: blah
5337\= Expect no match
5338 blah)
5339No match
5340 (blah
5341No match
5342
5343/(?(?!a)a|b)/
5344
5345/(?(?!a)b|a)/
5346 a
5347 0: a
5348
5349/(?(?=a)b|a)/
5350\= Expect no match
5351 a
5352No match
5353 a
5354No match
5355
5356/(?(?=a)a|b)/
5357 a
5358 0: a
5359
5360/(?=(a+?))(\1ab)/
5361 aaab
5362 0: aab
5363 1: a
5364 2: aab
5365
5366/^(?=(a+?))\1ab/
5367
5368/(\w+:)+/
5369 one:
5370 0: one:
5371 1: one:
5372
5373/$(?<=^(a))/
5374 a
5375 0:
5376 1: a
5377
5378/(?=(a+?))(\1ab)/
5379 aaab
5380 0: aab
5381 1: a
5382 2: aab
5383
5384/^(?=(a+?))\1ab/
5385\= Expect no match
5386 aaab
5387No match
5388 aaab
5389No match
5390
5391/([\w:]+::)?(\w+)$/
5392 abcd
5393 0: abcd
5394 1: <unset>
5395 2: abcd
5396 xy:z:::abcd
5397 0: xy:z:::abcd
5398 1: xy:z:::
5399 2: abcd
5400
5401/^[^bcd]*(c+)/
5402 aexycd
5403 0: aexyc
5404 1: c
5405
5406/(a*)b+/
5407 caab
5408 0: aab
5409 1: aa
5410
5411/([\w:]+::)?(\w+)$/
5412 abcd
5413 0: abcd
5414 1: <unset>
5415 2: abcd
5416 xy:z:::abcd
5417 0: xy:z:::abcd
5418 1: xy:z:::
5419 2: abcd
5420\= Expect no match
5421 abcd:
5422No match
5423 abcd:
5424No match
5425
5426/^[^bcd]*(c+)/
5427 aexycd
5428 0: aexyc
5429 1: c
5430
5431/(>a+)ab/
5432
5433/(?>a+)b/
5434 aaab
5435 0: aaab
5436
5437/([[:]+)/
5438 a:[b]:
5439 0: :[
5440 1: :[
5441
5442/([[=]+)/
5443 a=[b]=
5444 0: =[
5445 1: =[
5446
5447/([[.]+)/
5448 a.[b].
5449 0: .[
5450 1: .[
5451
5452/((?>a+)b)/
5453 aaab
5454 0: aaab
5455 1: aaab
5456
5457/(?>(a+))b/
5458 aaab
5459 0: aaab
5460 1: aaa
5461
5462/((?>[^()]+)|\([^()]*\))+/
5463 ((abc(ade)ufh()()x
5464 0: abc(ade)ufh()()x
5465 1: x
5466
5467/a\Z/
5468\= Expect no match
5469 aaab
5470No match
5471 a\nb\n
5472No match
5473
5474/b\Z/
5475 a\nb\n
5476 0: b
5477
5478/b\z/
5479
5480/b\Z/
5481 a\nb
5482 0: b
5483
5484/b\z/
5485 a\nb
5486 0: b
5487
5488/^(?>(?(1)\.|())[^\W_](?>[a-z0-9-]*[^\W_])?)+$/
5489 a
5490 0: a
5491 1:
5492 abc
5493 0: abc
5494 1:
5495 a-b
5496 0: a-b
5497 1:
5498 0-9
5499 0: 0-9
5500 1:
5501 a.b
5502 0: a.b
5503 1:
5504 5.6.7
5505 0: 5.6.7
5506 1:
5507 the.quick.brown.fox
5508 0: the.quick.brown.fox
5509 1:
5510 a100.b200.300c
5511 0: a100.b200.300c
5512 1:
5513 12-ab.1245
5514 0: 12-ab.1245
5515 1:
5516\= Expect no match
5517 \
5518No match
5519 .a
5520No match
5521 -a
5522No match
5523 a-
5524No match
5525 a.
5526No match
5527 a_b
5528No match
5529 a.-
5530No match
5531 a..
5532No match
5533 ab..bc
5534No match
5535 the.quick.brown.fox-
5536No match
5537 the.quick.brown.fox.
5538No match
5539 the.quick.brown.fox_
5540No match
5541 the.quick.brown.fox+
5542No match
5543
5544/(?>.*)(?<=(abcd|wxyz))/
5545 alphabetabcd
5546 0: alphabetabcd
5547 1: abcd
5548 endingwxyz
5549 0: endingwxyz
5550 1: wxyz
5551\= Expect no match
5552 a rather long string that doesn't end with one of them
5553No match
5554
5555/word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword/
5556 word cat dog elephant mussel cow horse canary baboon snake shark otherword
5557 0: word cat dog elephant mussel cow horse canary baboon snake shark otherword
5558\= Expect no match
5559 word cat dog elephant mussel cow horse canary baboon snake shark
5560No match
5561
5562/word (?>[a-zA-Z0-9]+ ){0,30}otherword/
5563\= Expect no match
5564 word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope
5565No match
5566
5567/(?<=\d{3}(?!999))foo/
5568 999foo
5569 0: foo
5570 123999foo
5571 0: foo
5572\= Expect no match
5573 123abcfoo
5574No match
5575
5576/(?<=(?!...999)\d{3})foo/
5577 999foo
5578 0: foo
5579 123999foo
5580 0: foo
5581\= Expect no match
5582 123abcfoo
5583No match
5584
5585/(?<=\d{3}(?!999)...)foo/
5586 123abcfoo
5587 0: foo
5588 123456foo
5589 0: foo
5590\= Expect no match
5591 123999foo
5592No match
5593
5594/(?<=\d{3}...)(?<!999)foo/
5595 123abcfoo
5596 0: foo
5597 123456foo
5598 0: foo
5599\= Expect no match
5600 123999foo
5601No match
5602
5603/<a[\s]+href[\s]*=[\s]* # find <a href=
5604 ([\"\'])? # find single or double quote
5605 (?(1) (.*?)\1 | ([^\s]+)) # if quote found, match up to next matching
5606 # quote, otherwise match up to next space
5607/isx
5608 <a href=abcd xyz
5609 0: <a href=abcd
5610 1: <unset>
5611 2: <unset>
5612 3: abcd
5613 <a href=\"abcd xyz pqr\" cats
5614 0: <a href="abcd xyz pqr"
5615 1: "
5616 2: abcd xyz pqr
5617 <a href=\'abcd xyz pqr\' cats
5618 0: <a href='abcd xyz pqr'
5619 1: '
5620 2: abcd xyz pqr
5621
5622/<a\s+href\s*=\s* # find <a href=
5623 (["'])? # find single or double quote
5624 (?(1) (.*?)\1 | (\S+)) # if quote found, match up to next matching
5625 # quote, otherwise match up to next space
5626/isx
5627 <a href=abcd xyz
5628 0: <a href=abcd
5629 1: <unset>
5630 2: <unset>
5631 3: abcd
5632 <a href=\"abcd xyz pqr\" cats
5633 0: <a href="abcd xyz pqr"
5634 1: "
5635 2: abcd xyz pqr
5636 <a href = \'abcd xyz pqr\' cats
5637 0: <a href = 'abcd xyz pqr'
5638 1: '
5639 2: abcd xyz pqr
5640
5641/<a\s+href(?>\s*)=(?>\s*) # find <a href=
5642 (["'])? # find single or double quote
5643 (?(1) (.*?)\1 | (\S+)) # if quote found, match up to next matching
5644 # quote, otherwise match up to next space
5645/isx
5646 <a href=abcd xyz
5647 0: <a href=abcd
5648 1: <unset>
5649 2: <unset>
5650 3: abcd
5651 <a href=\"abcd xyz pqr\" cats
5652 0: <a href="abcd xyz pqr"
5653 1: "
5654 2: abcd xyz pqr
5655 <a href = \'abcd xyz pqr\' cats
5656 0: <a href = 'abcd xyz pqr'
5657 1: '
5658 2: abcd xyz pqr
5659
5660/((Z)+|A)*/
5661 ZABCDEFG
5662 0: ZA
5663 1: A
5664 2: Z
5665
5666/(Z()|A)*/
5667 ZABCDEFG
5668 0: ZA
5669 1: A
5670 2:
5671
5672/(Z(())|A)*/
5673 ZABCDEFG
5674 0: ZA
5675 1: A
5676 2:
5677 3:
5678
5679/((?>Z)+|A)*/
5680 ZABCDEFG
5681 0: ZA
5682 1: A
5683
5684/((?>)+|A)*/
5685 ZABCDEFG
5686 0:
5687 1:
5688
5689/^[\d-a]/
5690 abcde
5691 0: a
5692 -things
5693 0: -
5694 0digit
5695 0: 0
5696\= Expect no match
5697 bcdef
5698No match
5699
5700/[\s]+/
5701 > \x09\x0a\x0c\x0d\x0b<
5702 0: \x09\x0a\x0c\x0d\x0b
5703
5704/\s+/
5705 > \x09\x0a\x0c\x0d\x0b<
5706 0: \x09\x0a\x0c\x0d\x0b
5707
5708/a b/x
5709 ab
5710 0: ab
5711
5712/(?!\A)x/m
5713 a\nxb\n
5714 0: x
5715
5716/(?!^)x/m
5717\= Expect no match
5718 a\nxb\n
5719No match
5720
5721#/abc\Qabc\Eabc/
5722# abcabcabc
5723# 0: abcabcabc
5724
5725#/abc\Q(*+|\Eabc/
5726# abc(*+|abc
5727# 0: abc(*+|abc
5728
5729#/ abc\Q abc\Eabc/x
5730# abc abcabc
5731# 0: abc abcabc
5732#\= Expect no match
5733# abcabcabc
5734#No match
5735
5736#/abc#comment
5737# \Q#not comment
5738# literal\E/x
5739# abc#not comment\n literal
5740# 0: abc#not comment\x0a literal
5741
5742#/abc#comment
5743# \Q#not comment
5744# literal/x
5745# abc#not comment\n literal
5746# 0: abc#not comment\x0a literal
5747
5748#/abc#comment
5749# \Q#not comment
5750# literal\E #more comment
5751# /x
5752# abc#not comment\n literal
5753# 0: abc#not comment\x0a literal
5754
5755#/abc#comment
5756# \Q#not comment
5757# literal\E #more comment/x
5758# abc#not comment\n literal
5759# 0: abc#not comment\x0a literal
5760
5761#/\Qabc\$xyz\E/
5762# abc\\\$xyz
5763# 0: abc\$xyz
5764
5765#/\Qabc\E\$\Qxyz\E/
5766# abc\$xyz
5767# 0: abc$xyz
5768
5769/\Gabc/
5770 abc
5771 0: abc
5772\= Expect no match
5773 xyzabc
5774No match
5775
5776/a(?x: b c )d/
5777 XabcdY
5778 0: abcd
5779\= Expect no match
5780 Xa b c d Y
5781No match
5782
5783/((?x)x y z | a b c)/
5784 XabcY
5785 0: abc
5786 1: abc
5787 AxyzB
5788 0: xyz
5789 1: xyz
5790
5791/(?i)AB(?-i)C/
5792 XabCY
5793 0: abC
5794\= Expect no match
5795 XabcY
5796No match
5797
5798/((?i)AB(?-i)C|D)E/
5799 abCE
5800 0: abCE
5801 1: abC
5802 DE
5803 0: DE
5804 1: D
5805\= Expect no match
5806 abcE
5807No match
5808 abCe
5809No match
5810 dE
5811No match
5812 De
5813No match
5814
5815/(.*)\d+\1/
5816 abc123abc
5817 0: abc123abc
5818 1: abc
5819 abc123bc
5820 0: bc123bc
5821 1: bc
5822
5823/(.*)\d+\1/s
5824 abc123abc
5825 0: abc123abc
5826 1: abc
5827 abc123bc
5828 0: bc123bc
5829 1: bc
5830
5831/((.*))\d+\1/
5832 abc123abc
5833 0: abc123abc
5834 1: abc
5835 2: abc
5836 abc123bc
5837 0: bc123bc
5838 1: bc
5839 2: bc
5840
5841# This tests for an IPv6 address in the form where it can have up to
5842# eight components, one and only one of which is empty. This must be
5843# an internal component.
5844
5845/^(?!:) # colon disallowed at start
5846 (?: # start of item
5847 (?: [0-9a-f]{1,4} | # 1-4 hex digits or
5848 (?(1)0 | () ) ) # if null previously matched, fail; else null
5849 : # followed by colon
5850 ){1,7} # end item; 1-7 of them required
5851 [0-9a-f]{1,4} $ # final hex number at end of string
5852 (?(1)|.) # check that there was an empty component
5853 /ix
5854 a123::a123
5855 0: a123::a123
5856 1:
5857 a123:b342::abcd
5858 0: a123:b342::abcd
5859 1:
5860 a123:b342::324e:abcd
5861 0: a123:b342::324e:abcd
5862 1:
5863 a123:ddde:b342::324e:abcd
5864 0: a123:ddde:b342::324e:abcd
5865 1:
5866 a123:ddde:b342::324e:dcba:abcd
5867 0: a123:ddde:b342::324e:dcba:abcd
5868 1:
5869 a123:ddde:9999:b342::324e:dcba:abcd
5870 0: a123:ddde:9999:b342::324e:dcba:abcd
5871 1:
5872\= Expect no match
5873 1:2:3:4:5:6:7:8
5874No match
5875 a123:bce:ddde:9999:b342::324e:dcba:abcd
5876No match
5877 a123::9999:b342::324e:dcba:abcd
5878No match
5879 abcde:2:3:4:5:6:7:8
5880No match
5881 ::1
5882No match
5883 abcd:fee0:123::
5884No match
5885 :1
5886No match
5887 1:
5888No match
5889
5890#/[z\Qa-d]\E]/
5891# z
5892# 0: z
5893# a
5894# 0: a
5895# -
5896# 0: -
5897# d
5898# 0: d
5899# ]
5900# 0: ]
5901#\= Expect no match
5902# b
5903#No match
5904
5905#TODO: PCRE has an optimization to make this workable, .NET does not
5906#/(a+)*b/
5907#\= Expect no match
5908# aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
5909#No match
5910
5911# All these had to be updated because we understand unicode
5912# and this looks like it's expecting single byte matches
5913
5914# .NET generates \xe4...not sure what's up, might just be different code pages
5915/(?i)reg(?:ul(?:[aä]|ae)r|ex)/
5916 REGular
5917 0: REGular
5918 regulaer
5919 0: regulaer
5920 Regex
5921 0: Regex
5922 regulär
5923 0: regul\xc3\xa4r
5924
5925#/Åæåä[à-ÿÀ-ß]+/
5926# Åæåäà
5927# 0: \xc5\xe6\xe5\xe4\xe0
5928# Åæåäÿ
5929# 0: \xc5\xe6\xe5\xe4\xff
5930# ÅæåäÀ
5931# 0: \xc5\xe6\xe5\xe4\xc0
5932# Åæåäß
5933# 0: \xc5\xe6\xe5\xe4\xdf
5934
5935/(?<=Z)X./
5936 \x84XAZXB
5937 0: XB
5938
5939/ab cd (?x) de fg/
5940 ab cd defg
5941 0: ab cd defg
5942
5943/ab cd(?x) de fg/
5944 ab cddefg
5945 0: ab cddefg
5946\= Expect no match
5947 abcddefg
5948No match
5949
5950/(?<![^f]oo)(bar)/
5951 foobarX
5952 0: bar
5953 1: bar
5954\= Expect no match
5955 boobarX
5956No match
5957
5958/(?<![^f])X/
5959 offX
5960 0: X
5961\= Expect no match
5962 onyX
5963No match
5964
5965/(?<=[^f])X/
5966 onyX
5967 0: X
5968\= Expect no match
5969 offX
5970No match
5971
5972/(?:(?(1)a|b)(X))+/
5973 bXaX
5974 0: bXaX
5975 1: X
5976
5977/(?:(?(1)\1a|b)(X|Y))+/
5978 bXXaYYaY
5979 0: bXXaYYaY
5980 1: Y
5981 bXYaXXaX
5982 0: bX
5983 1: X
5984
5985# TODO: I think this is a difference caused by the
5986# collision of group numbers, but not sure
5987#/()()()()()()()()()(?:(?(10)\10a|b)(X|Y))+/
5988# bXXaYYaY
5989# 0: bX
5990# 1:
5991# 2:
5992# 3:
5993# 4:
5994# 5:
5995# 6:
5996# 7:
5997# 8:
5998# 9:
5999#10: X
6000
6001/[[,abc,]+]/
6002 abc]
6003 0: abc]
6004 a,b]
6005 0: a,b]
6006 [a,b,c]
6007 0: [a,b,c]
6008
6009/(?-x: )/x
6010 A\x20B
6011 0:
6012
6013"(?x)(?-x: \s*#\s*)"
6014 A # B
6015 0: #
6016\= Expect no match
6017 #
6018No match
6019
6020"(?x-is)(?:(?-ixs) \s*#\s*) include"
6021 A #include
6022 0: #include
6023\= Expect no match
6024 A#include
6025No match
6026 A #Include
6027No match
6028
6029/a*b*\w/
6030 aaabbbb
6031 0: aaabbbb
6032 aaaa
6033 0: aaaa
6034 a
6035 0: a
6036
6037/a*b?\w/
6038 aaabbbb
6039 0: aaabb
6040 aaaa
6041 0: aaaa
6042 a
6043 0: a
6044
6045/a*b{0,4}\w/
6046 aaabbbb
6047 0: aaabbbb
6048 aaaa
6049 0: aaaa
6050 a
6051 0: a
6052
6053/a*b{0,}\w/
6054 aaabbbb
6055 0: aaabbbb
6056 aaaa
6057 0: aaaa
6058 a
6059 0: a
6060
6061/a*\d*\w/
6062 0a
6063 0: 0a
6064 a
6065 0: a
6066
6067/a*b *\w/x
6068 a
6069 0: a
6070
6071/a*b#comment
6072 *\w/x
6073 a
6074 0: a
6075
6076/a* b *\w/x
6077 a
6078 0: a
6079
6080/^\w+=.*(\\\n.*)*/
6081 abc=xyz\\\npqr
6082 0: abc=xyz\
6083
6084/(?=(\w+))\1:/
6085 abcd:
6086 0: abcd:
6087 1: abcd
6088
6089/^(?=(\w+))\1:/
6090 abcd:
6091 0: abcd:
6092 1: abcd
6093
6094#/^\Eabc/
6095# abc
6096# 0: abc
6097
6098#/^[\Eabc]/
6099# a
6100# 0: a
6101#\= Expect no match
6102# E
6103#No match
6104
6105#/^[a-\Ec]/
6106# b
6107# 0: b
6108#\= Expect no match
6109# -
6110#No match
6111# E
6112#No match
6113
6114#/^[a\E\E-\Ec]/
6115# b
6116# 0: b
6117#\= Expect no match
6118# -
6119#No match
6120# E
6121#No match
6122
6123#/^[\E\Qa\E-\Qz\E]+/
6124# b
6125# 0: b
6126#\= Expect no match
6127# -
6128#No match
6129
6130#/^[a\Q]bc\E]/
6131# a
6132# 0: a
6133# ]
6134# 0: ]
6135# c
6136# 0: c
6137
6138#/^[a-\Q\E]/
6139# a
6140# 0: a
6141# -
6142# 0: -
6143
6144/^(a()*)*/
6145 aaaa
6146 0: aaaa
6147 1: a
6148 2:
6149
6150/^(?:a(?:(?:))*)*/
6151 aaaa
6152 0: aaaa
6153
6154/^(a()+)+/
6155 aaaa
6156 0: aaaa
6157 1: a
6158 2:
6159
6160/^(?:a(?:(?:))+)+/
6161 aaaa
6162 0: aaaa
6163
6164/(a){0,3}(?(1)b|(c|))*D/
6165 abbD
6166 0: abbD
6167 1: a
6168 ccccD
6169 0: ccccD
6170 1: <unset>
6171 2:
6172 D
6173 0: D
6174 1: <unset>
6175 2:
6176
6177# this is really long with debug -- removing for now
6178#/(a|)*\d/
6179# aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
6180# 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
6181# 1:
6182#\= Expect no match
6183# aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
6184#No match
6185
6186/(?>a|)*\d/
6187 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
6188 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
6189\= Expect no match
6190 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
6191No match
6192
6193/(?:a|)*\d/
6194 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
6195 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
6196\= Expect no match
6197 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
6198No match
6199
6200/^(?s)(?>.*)(?<!\n)/
6201 abc
6202 0: abc
6203\= Expect no match
6204 abc\n
6205No match
6206
6207/^(?![^\n]*\n\z)/
6208 abc
6209 0:
6210\= Expect no match
6211 abc\n
6212No match
6213
6214/\z(?<!\n)/
6215 abc
6216 0:
6217\= Expect no match
6218 abc\n
6219No match
6220
6221/(.*(.)?)*/
6222 abcd
6223 0: abcd
6224 1:
6225
6226/( (A | (?(1)0|) )* )/x
6227 abcd
6228 0:
6229 1:
6230 2:
6231
6232/( ( (?(1)0|) )* )/x
6233 abcd
6234 0:
6235 1:
6236 2:
6237
6238/( (?(1)0|)* )/x
6239 abcd
6240 0:
6241 1:
6242
6243/[[:abcd:xyz]]/
6244 a]
6245 0: a]
6246 :]
6247 0: :]
6248
6249/[abc[:x\]pqr]/
6250 a
6251 0: a
6252 [
6253 0: [
6254 :
6255 0: :
6256 ]
6257 0: ]
6258 p
6259 0: p
6260
6261/.*[op][xyz]/
6262\= Expect no match
6263 fooabcfoo
6264No match
6265
6266/(?(?=.*b)b|^)/
6267 adc
6268 0:
6269 abc
6270 0: b
6271
6272/(?(?=^.*b)b|^)/
6273 adc
6274 0:
6275\= Expect no match
6276 abc
6277No match
6278
6279/(?(?=.*b)b|^)*/
6280 adc
6281 0:
6282 abc
6283 0:
6284
6285/(?(?=.*b)b|^)+/
6286 adc
6287 0:
6288 abc
6289 0: b
6290
6291/(?(?=b).*b|^d)/
6292 abc
6293 0: b
6294
6295/(?(?=.*b).*b|^d)/
6296 abc
6297 0: ab
6298
6299/^%((?(?=[a])[^%])|b)*%$/
6300 %ab%
6301 0: %ab%
6302 1:
6303
6304/(?i)a(?-i)b|c/
6305 XabX
6306 0: ab
6307 XAbX
6308 0: Ab
6309 CcC
6310 0: c
6311\= Expect no match
6312 XABX
6313No match
6314
6315/[\x00-\xff\s]+/
6316 \x0a\x0b\x0c\x0d
6317 0: \x0a\x0b\x0c\x0d
6318
6319/(abc)\1/i
6320\= Expect no match
6321 abc
6322No match
6323
6324/(abc)\1/
6325\= Expect no match
6326 abc
6327No match
6328
6329/[^a]*/i
6330 12abc
6331 0: 12
6332 12ABC
6333 0: 12
6334
6335#Posses
6336/[^a]*/i
6337 12abc
6338 0: 12
6339 12ABC
6340 0: 12
6341
6342/[^a]*?X/i
6343\= Expect no match
6344 12abc
6345No match
6346 12ABC
6347No match
6348
6349/[^a]+?X/i
6350\= Expect no match
6351 12abc
6352No match
6353 12ABC
6354No match
6355
6356/[^a]?X/i
6357 12aXbcX
6358 0: X
6359 12AXBCX
6360 0: X
6361 BCX
6362 0: CX
6363
6364/[^a]??X/i
6365 12aXbcX
6366 0: X
6367 12AXBCX
6368 0: X
6369 BCX
6370 0: CX
6371
6372/[^a]{2,3}/i
6373 abcdef
6374 0: bcd
6375 ABCDEF
6376 0: BCD
6377
6378/[^a]{2,3}?/i
6379 abcdef
6380 0: bc
6381 ABCDEF
6382 0: BC
6383
6384/((a|)+)+Z/
6385 Z
6386 0: Z
6387 1:
6388 2:
6389
6390/(a)b|(a)c/
6391 ac
6392 0: ac
6393 1: <unset>
6394 2: a
6395
6396/(?>(a))b|(a)c/
6397 ac
6398 0: ac
6399 1: <unset>
6400 2: a
6401
6402/(?=(a))ab|(a)c/
6403 ac
6404 0: ac
6405 1: <unset>
6406 2: a
6407
6408/((?>(a))b|(a)c)/
6409 ac
6410 0: ac
6411 1: ac
6412 2: <unset>
6413 3: a
6414
6415/(?=(?>(a))b|(a)c)(..)/
6416 ac
6417 0: ac
6418 1: <unset>
6419 2: a
6420 3: ac
6421
6422/(?>(?>(a))b|(a)c)/
6423 ac
6424 0: ac
6425 1: <unset>
6426 2: a
6427
6428/((?>(a+)b)+(aabab))/
6429 aaaabaaabaabab
6430 0: aaaabaaabaabab
6431 1: aaaabaaabaabab
6432 2: aaa
6433 3: aabab
6434
6435/(?>a+|ab)+?c/
6436\= Expect no match
6437 aabc
6438No match
6439
6440/(?>a+|ab)+c/
6441\= Expect no match
6442 aabc
6443No match
6444
6445/(?:a+|ab)+c/
6446 aabc
6447 0: aabc
6448
6449/^(?:a|ab)+c/
6450 aaaabc
6451 0: aaaabc
6452
6453/(?=abc){0}xyz/
6454 xyz
6455 0: xyz
6456
6457/(?=abc){1}xyz/
6458\= Expect no match
6459 xyz
6460No match
6461
6462/(?=(a))?./
6463 ab
6464 0: a
6465 1: a
6466 bc
6467 0: b
6468
6469/(?=(a))??./
6470 ab
6471 0: a
6472 bc
6473 0: b
6474
6475/^(?!a){0}\w+/
6476 aaaaa
6477 0: aaaaa
6478
6479/(?<=(abc))?xyz/
6480 abcxyz
6481 0: xyz
6482 1: abc
6483 pqrxyz
6484 0: xyz
6485
6486/^[g<a>]+/
6487 ggg<<<aaa>>>
6488 0: ggg<<<aaa>>>
6489\= Expect no match
6490 \\ga
6491No match
6492
6493/^[ga]+/
6494 gggagagaxyz
6495 0: gggagaga
6496
6497/[:a]xxx[b:]/
6498 :xxx:
6499 0: :xxx:
6500
6501/(?<=a{2})b/i
6502 xaabc
6503 0: b
6504\= Expect no match
6505 xabc
6506No match
6507
6508/(?<!a{2})b/i
6509 xabc
6510 0: b
6511\= Expect no match
6512 xaabc
6513No match
6514
6515/(?<=[^a]{2})b/
6516 axxbc
6517 0: b
6518 aAAbc
6519 0: b
6520\= Expect no match
6521 xaabc
6522No match
6523
6524/(?<=[^a]{2})b/i
6525 axxbc
6526 0: b
6527\= Expect no match
6528 aAAbc
6529No match
6530 xaabc
6531No match
6532
6533#/(?|(abc)|(xyz))\1/
6534# abcabc
6535# 0: abcabc
6536# 1: abc
6537# xyzxyz
6538# 0: xyzxyz
6539# 1: xyz
6540#\= Expect no match
6541# abcxyz
6542#No match
6543# xyzabc
6544#No match
6545
6546#/(?|(abc)|(xyz))(?1)/
6547# abcabc
6548# 0: abcabc
6549# 1: abc
6550# xyzabc
6551# 0: xyzabc
6552# 1: xyz
6553#\= Expect no match
6554# xyzxyz
6555#No match
6556
6557#/^X(?5)(a)(?|(b)|(q))(c)(d)(Y)/
6558# XYabcdY
6559# 0: XYabcdY
6560# 1: a
6561# 2: b
6562# 3: c
6563# 4: d
6564# 5: Y
6565
6566#/^X(?7)(a)(?|(b|(r)(s))|(q))(c)(d)(Y)/
6567# XYabcdY
6568# 0: XYabcdY
6569# 1: a
6570# 2: b
6571# 3: <unset>
6572# 4: <unset>
6573# 5: c
6574# 6: d
6575# 7: Y
6576
6577#/^X(?7)(a)(?|(b|(?|(r)|(t))(s))|(q))(c)(d)(Y)/
6578# XYabcdY
6579# 0: XYabcdY
6580# 1: a
6581# 2: b
6582# 3: <unset>
6583# 4: <unset>
6584# 5: c
6585# 6: d
6586# 7: Y
6587
6588/(?'abc'\w+):\k<abc>{2}/
6589 a:aaxyz
6590 0: a:aa
6591 1: a
6592 ab:ababxyz
6593 0: ab:abab
6594 1: ab
6595\= Expect no match
6596 a:axyz
6597No match
6598 ab:abxyz
6599No match
6600
6601/^(?<ab>a)? (?(ab)b|c) (?(ab)d|e)/x
6602 abd
6603 0: abd
6604 1: a
6605 ce
6606 0: ce
6607
6608# .NET has more consistent grouping numbers with these dupe groups for the two options
6609/(?:a(?<quote> (?<apostrophe>')|(?<realquote>")) |b(?<quote> (?<apostrophe>')|(?<realquote>")) ) (?(quote)[a-z]+|[0-9]+)/x,dupnames
6610 a\"aaaaa
6611 0: a"aaaaa
6612 1: "
6613 2: <unset>
6614 3: "
6615 b\"aaaaa
6616 0: b"aaaaa
6617 1: "
6618 2: <unset>
6619 3: "
6620\= Expect no match
6621 b\"11111
6622No match
6623
6624#/(?P<L1>(?P<L2>0)(?P>L1)|(?P>L2))/
6625# 0
6626# 0: 0
6627# 1: 0
6628# 00
6629# 0: 00
6630# 1: 00
6631# 2: 0
6632# 0000
6633# 0: 0000
6634# 1: 0000
6635# 2: 0
6636
6637#/(?P<L1>(?P<L2>0)|(?P>L2)(?P>L1))/
6638# 0
6639# 0: 0
6640# 1: 0
6641# 2: 0
6642# 00
6643# 0: 0
6644# 1: 0
6645# 2: 0
6646# 0000
6647# 0: 0
6648# 1: 0
6649# 2: 0
6650
6651# Check the use of names for failure
6652
6653# Check opening parens in comment when seeking forward reference.
6654
6655#/(?P<abn>(?P=abn)xxx|)+/
6656# xxx
6657# 0:
6658# 1:
6659
6660#Posses
6661/^(a)?(\w)/
6662 aaaaX
6663 0: aa
6664 1: a
6665 2: a
6666 YZ
6667 0: Y
6668 1: <unset>
6669 2: Y
6670
6671#Posses
6672/^(?:a)?(\w)/
6673 aaaaX
6674 0: aa
6675 1: a
6676 YZ
6677 0: Y
6678 1: Y
6679
6680/\A.*?(a|bc)/
6681 ba
6682 0: ba
6683 1: a
6684
6685/\A.*?(?:a|bc|d)/
6686 ba
6687 0: ba
6688
6689# --------------------------
6690
6691/(another)?(\1?)test/
6692 hello world test
6693 0: test
6694 1: <unset>
6695 2:
6696
6697/(another)?(\1+)test/
6698\= Expect no match
6699 hello world test
6700No match
6701
6702/((?:a?)*)*c/
6703 aac
6704 0: aac
6705 1:
6706
6707/((?>a?)*)*c/
6708 aac
6709 0: aac
6710 1:
6711
6712/(?>.*?a)(?<=ba)/
6713 aba
6714 0: ba
6715
6716/(?:.*?a)(?<=ba)/
6717 aba
6718 0: aba
6719
6720/(?>.*?a)b/s
6721 aab
6722 0: ab
6723
6724/(?>.*?a)b/
6725 aab
6726 0: ab
6727
6728/(?>^a)b/s
6729\= Expect no match
6730 aab
6731No match
6732
6733/(?>.*?)(?<=(abcd)|(wxyz))/
6734 alphabetabcd
6735 0:
6736 1: abcd
6737 endingwxyz
6738 0:
6739 1: <unset>
6740 2: wxyz
6741
6742/(?>.*)(?<=(abcd)|(wxyz))/
6743 alphabetabcd
6744 0: alphabetabcd
6745 1: abcd
6746 endingwxyz
6747 0: endingwxyz
6748 1: <unset>
6749 2: wxyz
6750
6751"(?>.*)foo"
6752\= Expect no match
6753 abcdfooxyz
6754No match
6755
6756"(?>.*?)foo"
6757 abcdfooxyz
6758 0: foo
6759
6760# Tests that try to figure out how Perl works. My hypothesis is that the first
6761# verb that is backtracked onto is the one that acts. This seems to be the case
6762# almost all the time, but there is one exception that is perhaps a bug.
6763
6764/a(?=bc).|abd/
6765 abd
6766 0: abd
6767 abc
6768 0: ab
6769
6770/a(?>bc)d|abd/
6771 abceabd
6772 0: abd
6773
6774# These tests were formerly in test 2, but changes in PCRE and Perl have
6775# made them compatible.
6776
6777/^(a)?(?(1)a|b)+$/
6778\= Expect no match
6779 a
6780No match
6781
6782# ----
6783
6784/^\d*\w{4}/
6785 1234
6786 0: 1234
6787\= Expect no match
6788 123
6789No match
6790
6791/^[^b]*\w{4}/
6792 aaaa
6793 0: aaaa
6794\= Expect no match
6795 aaa
6796No match
6797
6798/^[^b]*\w{4}/i
6799 aaaa
6800 0: aaaa
6801\= Expect no match
6802 aaa
6803No match
6804
6805/^a*\w{4}/
6806 aaaa
6807 0: aaaa
6808\= Expect no match
6809 aaa
6810No match
6811
6812/^a*\w{4}/i
6813 aaaa
6814 0: aaaa
6815\= Expect no match
6816 aaa
6817No match
6818
6819/(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
6820 foofoo
6821 0: foofoo
6822 1: foo
6823 barbar
6824 0: barbar
6825 1: bar
6826
6827# A notable difference between PCRE and .NET. According to
6828# the PCRE docs:
6829# If you make a subroutine call to a non-unique named
6830# subpattern, the one that corresponds to the first
6831# occurrence of the name is used. In the absence of
6832# duplicate numbers (see the previous section) this is
6833# the one with the lowest number.
6834# .NET takes the most recently captured number according to MSDN:
6835# A backreference refers to the most recent definition of
6836# a group (the definition most immediately to the left,
6837# when matching left to right). When a group makes multiple
6838# captures, a backreference refers to the most recent capture.
6839
6840#/(?<n>A)(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
6841# AfooA
6842# 0: AfooA
6843# 1: A
6844# 2: foo
6845# AbarA
6846# 0: AbarA
6847# 1: A
6848# 2: <unset>
6849# 3: bar
6850#\= Expect no match
6851# Afoofoo
6852#No match
6853# Abarbar
6854#No match
6855
6856/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/
6857 1 IN SOA non-sp1 non-sp2(
6858 0: 1 IN SOA non-sp1 non-sp2(
6859 1: 1
6860 2: non-sp1
6861 3: non-sp2
6862
6863# TODO: .NET's group number ordering here in the second example is a bit odd
6864/^ (?:(?<A>A)|(?'B'B)(?<A>A)) (?(A)x) (?(B)y)$/x,dupnames
6865 Ax
6866 0: Ax
6867 1: A
6868 BAxy
6869 0: BAxy
6870 1: A
6871 2: B
6872
6873/ ^ a + b $ /x
6874 aaaab
6875 0: aaaab
6876
6877/ ^ a + #comment
6878 b $ /x
6879 aaaab
6880 0: aaaab
6881
6882/ ^ a + #comment
6883 #comment
6884 b $ /x
6885 aaaab
6886 0: aaaab
6887
6888/ ^ (?> a + ) b $ /x
6889 aaaab
6890 0: aaaab
6891
6892/ ^ ( a + ) + \w $ /x
6893 aaaab
6894 0: aaaab
6895 1: aaaa
6896
6897/(?:x|(?:(xx|yy)+|x|x|x|x|x)|a|a|a)bc/
6898\= Expect no match
6899 acb
6900No match
6901
6902#Posses
6903#/\A(?:[^\"]+|\"(?:[^\"]*|\"\")*\")+/
6904# NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
6905# 0: NON QUOTED "QUOT""ED" AFTER
6906
6907#Posses
6908#/\A(?:[^\"]+|\"(?:[^\"]+|\"\")*\")+/
6909# NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
6910# 0: NON QUOTED "QUOT""ED" AFTER
6911
6912#Posses
6913#/\A(?:[^\"]+|\"(?:[^\"]+|\"\")+\")+/
6914# NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
6915# 0: NON QUOTED "QUOT""ED" AFTER
6916
6917#Posses
6918#/\A([^\"1]+|[\"2]([^\"3]*|[\"4][\"5])*[\"6])+/
6919# NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
6920# 0: NON QUOTED "QUOT""ED" AFTER
6921# 1: AFTER
6922# 2:
6923
6924/^\w+(?>\s*)(?<=\w)/
6925 test test
6926 0: tes
6927
6928#/(?P<Name>a)?(?P<Name2>b)?(?(<Name>)c|d)*l/
6929# acl
6930# 0: acl
6931# 1: a
6932# bdl
6933# 0: bdl
6934# 1: <unset>
6935# 2: b
6936# adl
6937# 0: dl
6938# bcl
6939# 0: l
6940
6941/\sabc/
6942 \x0babc
6943 0: \x0babc
6944
6945#/[\Qa]\E]+/
6946# aa]]
6947# 0: aa]]
6948
6949#/[\Q]a\E]+/
6950# aa]]
6951# 0: aa]]
6952
6953/A((((((((a))))))))\8B/
6954 AaaB
6955 0: AaaB
6956 1: a
6957 2: a
6958 3: a
6959 4: a
6960 5: a
6961 6: a
6962 7: a
6963 8: a
6964
6965/A(((((((((a)))))))))\9B/
6966 AaaB
6967 0: AaaB
6968 1: a
6969 2: a
6970 3: a
6971 4: a
6972 5: a
6973 6: a
6974 7: a
6975 8: a
6976 9: a
6977
6978/(|ab)*?d/
6979 abd
6980 0: abd
6981 1: ab
6982 xyd
6983 0: d
6984
6985/(\2|a)(\1)/
6986 aaa
6987 0: aa
6988 1: a
6989 2: a
6990
6991/(\2)(\1)/
6992
6993"Z*(|d*){216}"
6994
6995/((((((((((((x))))))))))))\12/
6996 xx
6997 0: xx
6998 1: x
6999 2: x
7000 3: x
7001 4: x
7002 5: x
7003 6: x
7004 7: x
7005 8: x
7006 9: x
700710: x
700811: x
700912: x
7010
7011#"(?|(\k'Pm')|(?'Pm'))"
7012# abcd
7013# 0:
7014# 1:
7015
7016#/(?|(aaa)|(b))\g{1}/
7017# aaaaaa
7018# 0: aaaaaa
7019# 1: aaa
7020# bb
7021# 0: bb
7022# 1: b
7023
7024#/(?|(aaa)|(b))(?1)/
7025# aaaaaa
7026# 0: aaaaaa
7027# 1: aaa
7028# baaa
7029# 0: baaa
7030# 1: b
7031#\= Expect no match
7032# bb
7033#No match
7034
7035#/(?|(aaa)|(b))/
7036# xaaa
7037# 0: aaa
7038# 1: aaa
7039# xbc
7040# 0: b
7041# 1: b
7042
7043#/(?|(?'a'aaa)|(?'a'b))\k'a'/
7044# aaaaaa
7045# 0: aaaaaa
7046# 1: aaa
7047# bb
7048# 0: bb
7049# 1: b
7050
7051#/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/dupnames
7052# aaaccccaaa
7053# 0: aaaccccaaa
7054# 1: aaa
7055# 2: cccc
7056# bccccb
7057# 0: bccccb
7058# 1: b
7059# 2: cccc
7060
7061# End of testinput1