1 files changed, 654 insertions, 0 deletions
diff --git a/vendor/github.com/dlclark/regexp2/syntax/tree.go b/vendor/github.com/dlclark/regexp2/syntax/tree.go
new file mode 100644
index 0000000..ea28829
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/tree.go
@@ -0,0 +1,654 @@
+package syntax
+import (
+        "bytes"
+        "fmt"
+        "math"
+        "strconv"
+)
+type RegexTree struct {
+        root       *regexNode
+        caps       map[int]int
+        capnumlist []int
+        captop     int
+        Capnames   map[string]int
+        Caplist    []string
+        options    RegexOptions
+}
+// It is built into a parsed tree for a regular expression.
+// Implementation notes:
+//
+// Since the node tree is a temporary data structure only used
+// during compilation of the regexp to integer codes, it's
+// designed for clarity and convenience rather than
+// space efficiency.
+//
+// RegexNodes are built into a tree, linked by the n.children list.
+// Each node also has a n.parent and n.ichild member indicating
+// its parent and which child # it is in its parent's list.
+//
+// RegexNodes come in as many types as there are constructs in
+// a regular expression, for example, "concatenate", "alternate",
+// "one", "rept", "group". There are also node types for basic
+// peephole optimizations, e.g., "onerep", "notsetrep", etc.
+//
+// Because perl 5 allows "lookback" groups that scan backwards,
+// each node also gets a "direction". Normally the value of
+// boolean n.backward = false.
+//
+// During parsing, top-level nodes are also stacked onto a parse
+// stack (a stack of trees). For this purpose we have a n.next
+// pointer. [Note that to save a few bytes, we could overload the
+// n.parent pointer instead.]
+//
+// On the parse stack, each tree has a "role" - basically, the
+// nonterminal in the grammar that the parser has currently
+// assigned to the tree. That code is stored in n.role.
+//
+// Finally, some of the different kinds of nodes have data.
+// Two integers (for the looping constructs) are stored in
+// n.operands, an an object (either a string or a set)
+// is stored in n.data
+type regexNode struct {
+        t        nodeType
+        children []*regexNode
+        str      []rune
+        set      *CharSet
+        ch       rune
+        m        int
+        n        int
+        options  RegexOptions
+        next     *regexNode
+}
+type nodeType int32
+const (
+        // The following are leaves, and correspond to primitive operations
+        ntOnerep      nodeType = 0  // lef,back char,min,max    a {n}
+        ntNotonerep            = 1  // lef,back char,min,max    .{n}
+        ntSetrep               = 2  // lef,back set,min,max     [\d]{n}
+        ntOneloop              = 3  // lef,back char,min,max    a {,n}
+        ntNotoneloop           = 4  // lef,back char,min,max    .{,n}
+        ntSetloop              = 5  // lef,back set,min,max     [\d]{,n}
+        ntOnelazy              = 6  // lef,back char,min,max    a {,n}?
+        ntNotonelazy           = 7  // lef,back char,min,max    .{,n}?
+        ntSetlazy              = 8  // lef,back set,min,max     [\d]{,n}?
+        ntOne                  = 9  // lef      char            a
+        ntNotone               = 10 // lef      char            [^a]
+        ntSet                  = 11 // lef      set             [a-z\s]  \w \s \d
+        ntMulti                = 12 // lef      string          abcd
+        ntRef                  = 13 // lef      group           \#
+        ntBol                  = 14 //                          ^
+        ntEol                  = 15 //                          $
+        ntBoundary             = 16 //                          \b
+        ntNonboundary          = 17 //                          \B
+        ntBeginning            = 18 //                          \A
+        ntStart                = 19 //                          \G
+        ntEndZ                 = 20 //                          \Z
+        ntEnd                  = 21 //                          \Z
+        // Interior nodes do not correspond to primitive operations, but
+        // control structures compositing other operations
+        // Concat and alternate take n children, and can run forward or backwards
+        ntNothing     = 22 //          []
+        ntEmpty       = 23 //          ()
+        ntAlternate   = 24 //          a|b
+        ntConcatenate = 25 //          ab
+        ntLoop        = 26 // m,x      * + ? {,}
+        ntLazyloop    = 27 // m,x      *? +? ?? {,}?
+        ntCapture     = 28 // n        ()
+        ntGroup       = 29 //          (?:)
+        ntRequire     = 30 //          (?=) (?<=)
+        ntPrevent     = 31 //          (?!) (?<!)
+        ntGreedy      = 32 //          (?>) (?<)
+        ntTestref     = 33 //          (?(n) | )
+        ntTestgroup   = 34 //          (?(...) | )
+        ntECMABoundary    = 41 //                          \b
+        ntNonECMABoundary = 42 //                          \B
+)
+func newRegexNode(t nodeType, opt RegexOptions) *regexNode {
+        return &regexNode{
+                t:       t,
+                options: opt,
+        }
+}
+func newRegexNodeCh(t nodeType, opt RegexOptions, ch rune) *regexNode {
+        return &regexNode{
+                t:       t,
+                options: opt,
+                ch:      ch,
+        }
+}
+func newRegexNodeStr(t nodeType, opt RegexOptions, str []rune) *regexNode {
+        return &regexNode{
+                t:       t,
+                options: opt,
+                str:     str,
+        }
+}
+func newRegexNodeSet(t nodeType, opt RegexOptions, set *CharSet) *regexNode {
+        return &regexNode{
+                t:       t,
+                options: opt,
+                set:     set,
+        }
+}
+func newRegexNodeM(t nodeType, opt RegexOptions, m int) *regexNode {
+        return &regexNode{
+                t:       t,
+                options: opt,
+                m:       m,
+        }
+}
+func newRegexNodeMN(t nodeType, opt RegexOptions, m, n int) *regexNode {
+        return &regexNode{
+                t:       t,
+                options: opt,
+                m:       m,
+                n:       n,
+        }
+}
+func (n *regexNode) writeStrToBuf(buf *bytes.Buffer) {
+        for i := 0; i < len(n.str); i++ {
+                buf.WriteRune(n.str[i])
+        }
+}
+func (n *regexNode) addChild(child *regexNode) {
+        reduced := child.reduce()
+        n.children = append(n.children, reduced)
+        reduced.next = n
+}
+func (n *regexNode) insertChildren(afterIndex int, nodes []*regexNode) {
+        newChildren := make([]*regexNode, 0, len(n.children)+len(nodes))
+        n.children = append(append(append(newChildren, n.children[:afterIndex]...), nodes...), n.children[afterIndex:]...)
+}
+// removes children including the start but not the end index
+func (n *regexNode) removeChildren(startIndex, endIndex int) {
+        n.children = append(n.children[:startIndex], n.children[endIndex:]...)
+}
+// Pass type as OneLazy or OneLoop
+func (n *regexNode) makeRep(t nodeType, min, max int) {
+        n.t += (t - ntOne)
+        n.m = min
+        n.n = max
+}
+func (n *regexNode) reduce() *regexNode {
+        switch n.t {
+        case ntAlternate:
+                return n.reduceAlternation()
+        case ntConcatenate:
+                return n.reduceConcatenation()
+        case ntLoop, ntLazyloop:
+                return n.reduceRep()
+        case ntGroup:
+                return n.reduceGroup()
+        case ntSet, ntSetloop:
+                return n.reduceSet()
+        default:
+                return n
+        }
+}
+// Basic optimization. Single-letter alternations can be replaced
+// by faster set specifications, and nested alternations with no
+// intervening operators can be flattened:
+//
+// a|b|c|def|g|h -> [a-c]|def|[gh]
+// apple|(?:orange|pear)|grape -> apple|orange|pear|grape
+func (n *regexNode) reduceAlternation() *regexNode {
+        if len(n.children) == 0 {
+                return newRegexNode(ntNothing, n.options)
+        }
+        wasLastSet := false
+        lastNodeCannotMerge := false
+        var optionsLast RegexOptions
+        var i, j int
+        for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
+                at := n.children[i]
+                if j < i {
+                        n.children[j] = at
+                }
+                for {
+                        if at.t == ntAlternate {
+                                for k := 0; k < len(at.children); k++ {
+                                        at.children[k].next = n
+                                }
+                                n.insertChildren(i+1, at.children)
+                                j--
+                        } else if at.t == ntSet || at.t == ntOne {
+                                // Cannot merge sets if L or I options differ, or if either are negated.
+                                optionsAt := at.options & (RightToLeft | IgnoreCase)
+                                if at.t == ntSet {
+                                        if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !at.set.IsMergeable() {
+                                                wasLastSet = true
+                                                lastNodeCannotMerge = !at.set.IsMergeable()
+                                                optionsLast = optionsAt
+                                                break
+                                        }
+                                } else if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge {
+                                        wasLastSet = true
+                                        lastNodeCannotMerge = false
+                                        optionsLast = optionsAt
+                                        break
+                                }
+                                // The last node was a Set or a One, we're a Set or One and our options are the same.
+                                // Merge the two nodes.
+                                j--
+                                prev := n.children[j]
+                                var prevCharClass *CharSet
+                                if prev.t == ntOne {
+                                        prevCharClass = &CharSet{}
+                                        prevCharClass.addChar(prev.ch)
+                                } else {
+                                        prevCharClass = prev.set
+                                }
+                                if at.t == ntOne {
+                                        prevCharClass.addChar(at.ch)
+                                } else {
+                                        prevCharClass.addSet(*at.set)
+                                }
+                                prev.t = ntSet
+                                prev.set = prevCharClass
+                        } else if at.t == ntNothing {
+                                j--
+                        } else {
+                                wasLastSet = false
+                                lastNodeCannotMerge = false
+                        }
+                        break
+                }
+        }
+        if j < i {
+                n.removeChildren(j, i)
+        }
+        return n.stripEnation(ntNothing)
+}
+// Basic optimization. Adjacent strings can be concatenated.
+//
+// (?:abc)(?:def) -> abcdef
+func (n *regexNode) reduceConcatenation() *regexNode {
+        // Eliminate empties and concat adjacent strings/chars
+        var optionsLast RegexOptions
+        var optionsAt RegexOptions
+        var i, j int
+        if len(n.children) == 0 {
+                return newRegexNode(ntEmpty, n.options)
+        }
+        wasLastString := false
+        for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
+                var at, prev *regexNode
+                at = n.children[i]
+                if j < i {
+                        n.children[j] = at
+                }
+                if at.t == ntConcatenate &&
+                        ((at.options & RightToLeft) == (n.options & RightToLeft)) {
+                        for k := 0; k < len(at.children); k++ {
+                                at.children[k].next = n
+                        }
+                        //insert at.children at i+1 index in n.children
+                        n.insertChildren(i+1, at.children)
+                        j--
+                } else if at.t == ntMulti || at.t == ntOne {
+                        // Cannot merge strings if L or I options differ
+                        optionsAt = at.options & (RightToLeft | IgnoreCase)
+                        if !wasLastString || optionsLast != optionsAt {
+                                wasLastString = true
+                                optionsLast = optionsAt
+                                continue
+                        }
+                        j--
+                        prev = n.children[j]
+                        if prev.t == ntOne {
+                                prev.t = ntMulti
+                                prev.str = []rune{prev.ch}
+                        }
+                        if (optionsAt & RightToLeft) == 0 {
+                                if at.t == ntOne {
+                                        prev.str = append(prev.str, at.ch)
+                                } else {
+                                        prev.str = append(prev.str, at.str...)
+                                }
+                        } else {
+                                if at.t == ntOne {
+                                        // insert at the front by expanding our slice, copying the data over, and then setting the value
+                                        prev.str = append(prev.str, 0)
+                                        copy(prev.str[1:], prev.str)
+                                        prev.str[0] = at.ch
+                                } else {
+                                        //insert at the front...this one we'll make a new slice and copy both into it
+                                        merge := make([]rune, len(prev.str)+len(at.str))
+                                        copy(merge, at.str)
+                                        copy(merge[len(at.str):], prev.str)
+                                        prev.str = merge
+                                }
+                        }
+                } else if at.t == ntEmpty {
+                        j--
+                } else {
+                        wasLastString = false
+                }
+        }
+        if j < i {
+                // remove indices j through i from the children
+                n.removeChildren(j, i)
+        }
+        return n.stripEnation(ntEmpty)
+}
+// Nested repeaters just get multiplied with each other if they're not
+// too lumpy
+func (n *regexNode) reduceRep() *regexNode {
+        u := n
+        t := n.t
+        min := n.m
+        max := n.n
+        for {
+                if len(u.children) == 0 {
+                        break
+                }
+                child := u.children[0]
+                // multiply reps of the same type only
+                if child.t != t {
+                        childType := child.t
+                        if !(childType >= ntOneloop && childType <= ntSetloop && t == ntLoop ||
+                                childType >= ntOnelazy && childType <= ntSetlazy && t == ntLazyloop) {
+                                break
+                        }
+                }
+                // child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})?
+                // [but things like (a {2,})+ are not too lumpy...]
+                if u.m == 0 && child.m > 1 || child.n < child.m*2 {
+                        break
+                }
+                u = child
+                if u.m > 0 {
+                        if (math.MaxInt32-1)/u.m < min {
+                                u.m = math.MaxInt32
+                        } else {
+                                u.m = u.m * min
+                        }
+                }
+                if u.n > 0 {
+                        if (math.MaxInt32-1)/u.n < max {
+                                u.n = math.MaxInt32
+                        } else {
+                                u.n = u.n * max
+                        }
+                }
+        }
+        if math.MaxInt32 == min {
+                return newRegexNode(ntNothing, n.options)
+        }
+        return u
+}
+// Simple optimization. If a concatenation or alternation has only
+// one child strip out the intermediate node. If it has zero children,
+// turn it into an empty.
+func (n *regexNode) stripEnation(emptyType nodeType) *regexNode {
+        switch len(n.children) {
+        case 0:
+                return newRegexNode(emptyType, n.options)
+        case 1:
+                return n.children[0]
+        default:
+                return n
+        }
+}
+func (n *regexNode) reduceGroup() *regexNode {
+        u := n
+        for u.t == ntGroup {
+                u = u.children[0]
+        }
+        return u
+}
+// Simple optimization. If a set is a singleton, an inverse singleton,
+// or empty, it's transformed accordingly.
+func (n *regexNode) reduceSet() *regexNode {
+        // Extract empty-set, one and not-one case as special
+        if n.set == nil {
+                n.t = ntNothing
+        } else if n.set.IsSingleton() {
+                n.ch = n.set.SingletonChar()
+                n.set = nil
+                n.t += (ntOne - ntSet)
+        } else if n.set.IsSingletonInverse() {
+                n.ch = n.set.SingletonChar()
+                n.set = nil
+                n.t += (ntNotone - ntSet)
+        }
+        return n
+}
+func (n *regexNode) reverseLeft() *regexNode {
+        if n.options&RightToLeft != 0 && n.t == ntConcatenate && len(n.children) > 0 {
+                //reverse children order
+                for left, right := 0, len(n.children)-1; left < right; left, right = left+1, right-1 {
+                        n.children[left], n.children[right] = n.children[right], n.children[left]
+                }
+        }
+        return n
+}
+func (n *regexNode) makeQuantifier(lazy bool, min, max int) *regexNode {
+        if min == 0 && max == 0 {
+                return newRegexNode(ntEmpty, n.options)
+        }
+        if min == 1 && max == 1 {
+                return n
+        }
+        switch n.t {
+        case ntOne, ntNotone, ntSet:
+                if lazy {
+                        n.makeRep(Onelazy, min, max)
+                } else {
+                        n.makeRep(Oneloop, min, max)
+                }
+                return n
+        default:
+                var t nodeType
+                if lazy {
+                        t = ntLazyloop
+                } else {
+                        t = ntLoop
+                }
+                result := newRegexNodeMN(t, n.options, min, max)
+                result.addChild(n)
+                return result
+        }
+}
+// debug functions
+var typeStr = []string{
+        "Onerep", "Notonerep", "Setrep",
+        "Oneloop", "Notoneloop", "Setloop",
+        "Onelazy", "Notonelazy", "Setlazy",
+        "One", "Notone", "Set",
+        "Multi", "Ref",
+        "Bol", "Eol", "Boundary", "Nonboundary",
+        "Beginning", "Start", "EndZ", "End",
+        "Nothing", "Empty",
+        "Alternate", "Concatenate",
+        "Loop", "Lazyloop",
+        "Capture", "Group", "Require", "Prevent", "Greedy",
+        "Testref", "Testgroup",
+        "Unknown", "Unknown", "Unknown",
+        "Unknown", "Unknown", "Unknown",
+        "ECMABoundary", "NonECMABoundary",
+}
+func (n *regexNode) description() string {
+        buf := &bytes.Buffer{}
+        buf.WriteString(typeStr[n.t])
+        if (n.options & ExplicitCapture) != 0 {
+                buf.WriteString("-C")
+        }
+        if (n.options & IgnoreCase) != 0 {
+                buf.WriteString("-I")
+        }
+        if (n.options & RightToLeft) != 0 {
+                buf.WriteString("-L")
+        }
+        if (n.options & Multiline) != 0 {
+                buf.WriteString("-M")
+        }
+        if (n.options & Singleline) != 0 {
+                buf.WriteString("-S")
+        }
+        if (n.options & IgnorePatternWhitespace) != 0 {
+                buf.WriteString("-X")
+        }
+        if (n.options & ECMAScript) != 0 {
+                buf.WriteString("-E")
+        }
+        switch n.t {
+        case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntOne, ntNotone:
+                buf.WriteString("(Ch = " + CharDescription(n.ch) + ")")
+                break
+        case ntCapture:
+                buf.WriteString("(index = " + strconv.Itoa(n.m) + ", unindex = " + strconv.Itoa(n.n) + ")")
+                break
+        case ntRef, ntTestref:
+                buf.WriteString("(index = " + strconv.Itoa(n.m) + ")")
+                break
+        case ntMulti:
+                fmt.Fprintf(buf, "(String = %s)", string(n.str))
+                break
+        case ntSet, ntSetloop, ntSetlazy:
+                buf.WriteString("(Set = " + n.set.String() + ")")
+                break
+        }
+        switch n.t {
+        case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntSetloop, ntSetlazy, ntLoop, ntLazyloop:
+                buf.WriteString("(Min = ")
+                buf.WriteString(strconv.Itoa(n.m))
+                buf.WriteString(", Max = ")
+                if n.n == math.MaxInt32 {
+                        buf.WriteString("inf")
+                } else {
+                        buf.WriteString(strconv.Itoa(n.n))
+                }
+                buf.WriteString(")")
+                break
+        }
+        return buf.String()
+}
+var padSpace = []byte("                                ")
+func (t *RegexTree) Dump() string {
+        return t.root.dump()
+}
+func (n *regexNode) dump() string {
+        var stack []int
+        CurNode := n
+        CurChild := 0
+        buf := bytes.NewBufferString(CurNode.description())
+        buf.WriteRune('\n')
+        for {
+                if CurNode.children != nil && CurChild < len(CurNode.children) {
+                        stack = append(stack, CurChild+1)
+                        CurNode = CurNode.children[CurChild]
+                        CurChild = 0
+                        Depth := len(stack)
+                        if Depth > 32 {
+                                Depth = 32
+                        }
+                        buf.Write(padSpace[:Depth])
+                        buf.WriteString(CurNode.description())
+                        buf.WriteRune('\n')
+                } else {
+                        if len(stack) == 0 {
+                                break
+                        }
+                        CurChild = stack[len(stack)-1]
+                        stack = stack[:len(stack)-1]
+                        CurNode = CurNode.next
+                }
+        }
+        return buf.String()
+}

diff --git a/vendor/github.com/dlclark/regexp2/syntax/tree.go b/vendor/github.com/dlclark/regexp2/syntax/tree.go new file mode 100644 index 0000000..ea28829 --- /dev/null +++ b/vendor/github.com/dlclark/regexp2/syntax/tree.go
@@ -0,0 +1,654 @@
	1	package syntax
	2
	3	import (
	4	"bytes"
	5	"fmt"
	6	"math"
	7	"strconv"
	8	)
	9
	10	type RegexTree struct {
	11	root *regexNode
	12	caps map[int]int
	13	capnumlist []int
	14	captop int
	15	Capnames map[string]int
	16	Caplist []string
	17	options RegexOptions
	18	}
	19
	20	// It is built into a parsed tree for a regular expression.
	21
	22	// Implementation notes:
	23	//
	24	// Since the node tree is a temporary data structure only used
	25	// during compilation of the regexp to integer codes, it's
	26	// designed for clarity and convenience rather than
	27	// space efficiency.
	28	//
	29	// RegexNodes are built into a tree, linked by the n.children list.
	30	// Each node also has a n.parent and n.ichild member indicating
	31	// its parent and which child # it is in its parent's list.
	32	//
	33	// RegexNodes come in as many types as there are constructs in
	34	// a regular expression, for example, "concatenate", "alternate",
	35	// "one", "rept", "group". There are also node types for basic
	36	// peephole optimizations, e.g., "onerep", "notsetrep", etc.
	37	//
	38	// Because perl 5 allows "lookback" groups that scan backwards,
	39	// each node also gets a "direction". Normally the value of
	40	// boolean n.backward = false.
	41	//
	42	// During parsing, top-level nodes are also stacked onto a parse
	43	// stack (a stack of trees). For this purpose we have a n.next
	44	// pointer. [Note that to save a few bytes, we could overload the
	45	// n.parent pointer instead.]
	46	//
	47	// On the parse stack, each tree has a "role" - basically, the
	48	// nonterminal in the grammar that the parser has currently
	49	// assigned to the tree. That code is stored in n.role.
	50	//
	51	// Finally, some of the different kinds of nodes have data.
	52	// Two integers (for the looping constructs) are stored in
	53	// n.operands, an an object (either a string or a set)
	54	// is stored in n.data
	55	type regexNode struct {
	56	t nodeType
	57	children []*regexNode
	58	str []rune
	59	set *CharSet
	60	ch rune
	61	m int
	62	n int
	63	options RegexOptions
	64	next *regexNode
	65	}
	66
	67	type nodeType int32
	68
	69	const (
	70	// The following are leaves, and correspond to primitive operations
	71
	72	ntOnerep nodeType = 0 // lef,back char,min,max a {n}
	73	ntNotonerep = 1 // lef,back char,min,max .{n}
	74	ntSetrep = 2 // lef,back set,min,max [\d]{n}
	75	ntOneloop = 3 // lef,back char,min,max a {,n}
	76	ntNotoneloop = 4 // lef,back char,min,max .{,n}
	77	ntSetloop = 5 // lef,back set,min,max [\d]{,n}
	78	ntOnelazy = 6 // lef,back char,min,max a {,n}?
	79	ntNotonelazy = 7 // lef,back char,min,max .{,n}?
	80	ntSetlazy = 8 // lef,back set,min,max [\d]{,n}?
	81	ntOne = 9 // lef char a
	82	ntNotone = 10 // lef char [^a]
	83	ntSet = 11 // lef set [a-z\s] \w \s \d
	84	ntMulti = 12 // lef string abcd
	85	ntRef = 13 // lef group \#
	86	ntBol = 14 // ^
	87	ntEol = 15 // $
	88	ntBoundary = 16 // \b
	89	ntNonboundary = 17 // \B
	90	ntBeginning = 18 // \A
	91	ntStart = 19 // \G
	92	ntEndZ = 20 // \Z
	93	ntEnd = 21 // \Z
	94
	95	// Interior nodes do not correspond to primitive operations, but
	96	// control structures compositing other operations
	97
	98	// Concat and alternate take n children, and can run forward or backwards
	99
	100	ntNothing = 22 // []
	101	ntEmpty = 23 // ()
	102	ntAlternate = 24 // a\|b
	103	ntConcatenate = 25 // ab
	104	ntLoop = 26 // m,x * + ? {,}
	105	ntLazyloop = 27 // m,x *? +? ?? {,}?
	106	ntCapture = 28 // n ()
	107	ntGroup = 29 // (?:)
	108	ntRequire = 30 // (?=) (?<=)
	109	ntPrevent = 31 // (?!) (?<!)
	110	ntGreedy = 32 // (?>) (?<)
	111	ntTestref = 33 // (?(n) \| )
	112	ntTestgroup = 34 // (?(...) \| )
	113
	114	ntECMABoundary = 41 // \b
	115	ntNonECMABoundary = 42 // \B
	116	)
	117
	118	func newRegexNode(t nodeType, opt RegexOptions) *regexNode {
	119	return &regexNode{
	120	t: t,
	121	options: opt,
	122	}
	123	}
	124
	125	func newRegexNodeCh(t nodeType, opt RegexOptions, ch rune) *regexNode {
	126	return &regexNode{
	127	t: t,
	128	options: opt,
	129	ch: ch,
	130	}
	131	}
	132
	133	func newRegexNodeStr(t nodeType, opt RegexOptions, str []rune) *regexNode {
	134	return &regexNode{
	135	t: t,
	136	options: opt,
	137	str: str,
	138	}
	139	}
	140
	141	func newRegexNodeSet(t nodeType, opt RegexOptions, set CharSet) regexNode {
	142	return &regexNode{
	143	t: t,
	144	options: opt,
	145	set: set,
	146	}
	147	}
	148
	149	func newRegexNodeM(t nodeType, opt RegexOptions, m int) *regexNode {
	150	return &regexNode{
	151	t: t,
	152	options: opt,
	153	m: m,
	154	}
	155	}
	156	func newRegexNodeMN(t nodeType, opt RegexOptions, m, n int) *regexNode {
	157	return &regexNode{
	158	t: t,
	159	options: opt,
	160	m: m,
	161	n: n,
	162	}
	163	}
	164
	165	func (n regexNode) writeStrToBuf(buf bytes.Buffer) {
	166	for i := 0; i < len(n.str); i++ {
	167	buf.WriteRune(n.str[i])
	168	}
	169	}
	170
	171	func (n regexNode) addChild(child regexNode) {
	172	reduced := child.reduce()
	173	n.children = append(n.children, reduced)
	174	reduced.next = n
	175	}
	176
	177	func (n regexNode) insertChildren(afterIndex int, nodes []regexNode) {
	178	newChildren := make([]*regexNode, 0, len(n.children)+len(nodes))
	179	n.children = append(append(append(newChildren, n.children[:afterIndex]...), nodes...), n.children[afterIndex:]...)
	180	}
	181
	182	// removes children including the start but not the end index
	183	func (n *regexNode) removeChildren(startIndex, endIndex int) {
	184	n.children = append(n.children[:startIndex], n.children[endIndex:]...)
	185	}
	186
	187	// Pass type as OneLazy or OneLoop
	188	func (n *regexNode) makeRep(t nodeType, min, max int) {
	189	n.t += (t - ntOne)
	190	n.m = min
	191	n.n = max
	192	}
	193
	194	func (n regexNode) reduce() regexNode {
	195	switch n.t {
	196	case ntAlternate:
	197	return n.reduceAlternation()
	198
	199	case ntConcatenate:
	200	return n.reduceConcatenation()
	201
	202	case ntLoop, ntLazyloop:
	203	return n.reduceRep()
	204
	205	case ntGroup:
	206	return n.reduceGroup()
	207
	208	case ntSet, ntSetloop:
	209	return n.reduceSet()
	210
	211	default:
	212	return n
	213	}
	214	}
	215
	216	// Basic optimization. Single-letter alternations can be replaced
	217	// by faster set specifications, and nested alternations with no
	218	// intervening operators can be flattened:
	219	//
	220	// a\|b\|c\|def\|g\|h -> [a-c]\|def\|[gh]
	221	// apple\|(?:orange\|pear)\|grape -> apple\|orange\|pear\|grape
	222	func (n regexNode) reduceAlternation() regexNode {
	223	if len(n.children) == 0 {
	224	return newRegexNode(ntNothing, n.options)
	225	}
	226
	227	wasLastSet := false
	228	lastNodeCannotMerge := false
	229	var optionsLast RegexOptions
	230	var i, j int
	231
	232	for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
	233	at := n.children[i]
	234
	235	if j < i {
	236	n.children[j] = at
	237	}
	238
	239	for {
	240	if at.t == ntAlternate {
	241	for k := 0; k < len(at.children); k++ {
	242	at.children[k].next = n
	243	}
	244	n.insertChildren(i+1, at.children)
	245
	246	j--
	247	} else if at.t == ntSet \|\| at.t == ntOne {
	248	// Cannot merge sets if L or I options differ, or if either are negated.
	249	optionsAt := at.options & (RightToLeft \| IgnoreCase)
	250
	251	if at.t == ntSet {
	252	if !wasLastSet \|\| optionsLast != optionsAt \|\| lastNodeCannotMerge \|\| !at.set.IsMergeable() {
	253	wasLastSet = true
	254	lastNodeCannotMerge = !at.set.IsMergeable()
	255	optionsLast = optionsAt
	256	break
	257	}
	258	} else if !wasLastSet \|\| optionsLast != optionsAt \|\| lastNodeCannotMerge {
	259	wasLastSet = true
	260	lastNodeCannotMerge = false
	261	optionsLast = optionsAt
	262	break
	263	}
	264
	265	// The last node was a Set or a One, we're a Set or One and our options are the same.
	266	// Merge the two nodes.
	267	j--
	268	prev := n.children[j]
	269
	270	var prevCharClass *CharSet
	271	if prev.t == ntOne {
	272	prevCharClass = &CharSet{}
	273	prevCharClass.addChar(prev.ch)
	274	} else {
	275	prevCharClass = prev.set
	276	}
	277
	278	if at.t == ntOne {
	279	prevCharClass.addChar(at.ch)
	280	} else {
	281	prevCharClass.addSet(*at.set)
	282	}
	283
	284	prev.t = ntSet
	285	prev.set = prevCharClass
	286	} else if at.t == ntNothing {
	287	j--
	288	} else {
	289	wasLastSet = false
	290	lastNodeCannotMerge = false
	291	}
	292	break
	293	}
	294	}
	295
	296	if j < i {
	297	n.removeChildren(j, i)
	298	}
	299
	300	return n.stripEnation(ntNothing)
	301	}
	302
	303	// Basic optimization. Adjacent strings can be concatenated.
	304	//
	305	// (?:abc)(?:def) -> abcdef
	306	func (n regexNode) reduceConcatenation() regexNode {
	307	// Eliminate empties and concat adjacent strings/chars
	308
	309	var optionsLast RegexOptions
	310	var optionsAt RegexOptions
	311	var i, j int
	312
	313	if len(n.children) == 0 {
	314	return newRegexNode(ntEmpty, n.options)
	315	}
	316
	317	wasLastString := false
	318
	319	for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
	320	var at, prev *regexNode
	321
	322	at = n.children[i]
	323
	324	if j < i {
	325	n.children[j] = at
	326	}
	327
	328	if at.t == ntConcatenate &&
	329	((at.options & RightToLeft) == (n.options & RightToLeft)) {
	330	for k := 0; k < len(at.children); k++ {
	331	at.children[k].next = n
	332	}
	333
	334	//insert at.children at i+1 index in n.children
	335	n.insertChildren(i+1, at.children)
	336
	337	j--
	338	} else if at.t == ntMulti \|\| at.t == ntOne {
	339	// Cannot merge strings if L or I options differ
	340	optionsAt = at.options & (RightToLeft \| IgnoreCase)
	341
	342	if !wasLastString \|\| optionsLast != optionsAt {
	343	wasLastString = true
	344	optionsLast = optionsAt
	345	continue
	346	}
	347
	348	j--
	349	prev = n.children[j]
	350
	351	if prev.t == ntOne {
	352	prev.t = ntMulti
	353	prev.str = []rune{prev.ch}
	354	}
	355
	356	if (optionsAt & RightToLeft) == 0 {
	357	if at.t == ntOne {
	358	prev.str = append(prev.str, at.ch)
	359	} else {
	360	prev.str = append(prev.str, at.str...)
	361	}
	362	} else {
	363	if at.t == ntOne {
	364	// insert at the front by expanding our slice, copying the data over, and then setting the value
	365	prev.str = append(prev.str, 0)
	366	copy(prev.str[1:], prev.str)
	367	prev.str[0] = at.ch
	368	} else {
	369	//insert at the front...this one we'll make a new slice and copy both into it
	370	merge := make([]rune, len(prev.str)+len(at.str))
	371	copy(merge, at.str)
	372	copy(merge[len(at.str):], prev.str)
	373	prev.str = merge
	374	}
	375	}
	376	} else if at.t == ntEmpty {
	377	j--
	378	} else {
	379	wasLastString = false
	380	}
	381	}
	382
	383	if j < i {
	384	// remove indices j through i from the children
	385	n.removeChildren(j, i)
	386	}
	387
	388	return n.stripEnation(ntEmpty)
	389	}
	390
	391	// Nested repeaters just get multiplied with each other if they're not
	392	// too lumpy
	393	func (n regexNode) reduceRep() regexNode {
	394
	395	u := n
	396	t := n.t
	397	min := n.m
	398	max := n.n
	399
	400	for {
	401	if len(u.children) == 0 {
	402	break
	403	}
	404
	405	child := u.children[0]
	406
	407	// multiply reps of the same type only
	408	if child.t != t {
	409	childType := child.t
	410
	411	if !(childType >= ntOneloop && childType <= ntSetloop && t == ntLoop \|\|
	412	childType >= ntOnelazy && childType <= ntSetlazy && t == ntLazyloop) {
	413	break
	414	}
	415	}
	416
	417	// child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})?
	418	// [but things like (a {2,})+ are not too lumpy...]
	419	if u.m == 0 && child.m > 1 \|\| child.n < child.m*2 {
	420	break
	421	}
	422
	423	u = child
	424	if u.m > 0 {
	425	if (math.MaxInt32-1)/u.m < min {
	426	u.m = math.MaxInt32
	427	} else {
	428	u.m = u.m * min
	429	}
	430	}
	431	if u.n > 0 {
	432	if (math.MaxInt32-1)/u.n < max {
	433	u.n = math.MaxInt32
	434	} else {
	435	u.n = u.n * max
	436	}
	437	}
	438	}
	439
	440	if math.MaxInt32 == min {
	441	return newRegexNode(ntNothing, n.options)
	442	}
	443	return u
	444
	445	}
	446
	447	// Simple optimization. If a concatenation or alternation has only
	448	// one child strip out the intermediate node. If it has zero children,
	449	// turn it into an empty.
	450	func (n regexNode) stripEnation(emptyType nodeType) regexNode {
	451	switch len(n.children) {
	452	case 0:
	453	return newRegexNode(emptyType, n.options)
	454	case 1:
	455	return n.children[0]
	456	default:
	457	return n
	458	}
	459	}
	460
	461	func (n regexNode) reduceGroup() regexNode {
	462	u := n
	463
	464	for u.t == ntGroup {
	465	u = u.children[0]
	466	}
	467
	468	return u
	469	}
	470
	471	// Simple optimization. If a set is a singleton, an inverse singleton,
	472	// or empty, it's transformed accordingly.
	473	func (n regexNode) reduceSet() regexNode {
	474	// Extract empty-set, one and not-one case as special
	475
	476	if n.set == nil {
	477	n.t = ntNothing
	478	} else if n.set.IsSingleton() {
	479	n.ch = n.set.SingletonChar()
	480	n.set = nil
	481	n.t += (ntOne - ntSet)
	482	} else if n.set.IsSingletonInverse() {
	483	n.ch = n.set.SingletonChar()
	484	n.set = nil
	485	n.t += (ntNotone - ntSet)
	486	}
	487
	488	return n
	489	}
	490
	491	func (n regexNode) reverseLeft() regexNode {
	492	if n.options&RightToLeft != 0 && n.t == ntConcatenate && len(n.children) > 0 {
	493	//reverse children order
	494	for left, right := 0, len(n.children)-1; left < right; left, right = left+1, right-1 {
	495	n.children[left], n.children[right] = n.children[right], n.children[left]
	496	}
	497	}
	498
	499	return n
	500	}
	501
	502	func (n regexNode) makeQuantifier(lazy bool, min, max int) regexNode {
	503	if min == 0 && max == 0 {
	504	return newRegexNode(ntEmpty, n.options)
	505	}
	506
	507	if min == 1 && max == 1 {
	508	return n
	509	}
	510
	511	switch n.t {
	512	case ntOne, ntNotone, ntSet:
	513	if lazy {
	514	n.makeRep(Onelazy, min, max)
	515	} else {
	516	n.makeRep(Oneloop, min, max)
	517	}
	518	return n
	519
	520	default:
	521	var t nodeType
	522	if lazy {
	523	t = ntLazyloop
	524	} else {
	525	t = ntLoop
	526	}
	527	result := newRegexNodeMN(t, n.options, min, max)
	528	result.addChild(n)
	529	return result
	530	}
	531	}
	532
	533	// debug functions
	534
	535	var typeStr = []string{
	536	"Onerep", "Notonerep", "Setrep",
	537	"Oneloop", "Notoneloop", "Setloop",
	538	"Onelazy", "Notonelazy", "Setlazy",
	539	"One", "Notone", "Set",
	540	"Multi", "Ref",
	541	"Bol", "Eol", "Boundary", "Nonboundary",
	542	"Beginning", "Start", "EndZ", "End",
	543	"Nothing", "Empty",
	544	"Alternate", "Concatenate",
	545	"Loop", "Lazyloop",
	546	"Capture", "Group", "Require", "Prevent", "Greedy",
	547	"Testref", "Testgroup",
	548	"Unknown", "Unknown", "Unknown",
	549	"Unknown", "Unknown", "Unknown",
	550	"ECMABoundary", "NonECMABoundary",
	551	}
	552
	553	func (n *regexNode) description() string {
	554	buf := &bytes.Buffer{}
	555
	556	buf.WriteString(typeStr[n.t])
	557
	558	if (n.options & ExplicitCapture) != 0 {
	559	buf.WriteString("-C")
	560	}
	561	if (n.options & IgnoreCase) != 0 {
	562	buf.WriteString("-I")
	563	}
	564	if (n.options & RightToLeft) != 0 {
	565	buf.WriteString("-L")
	566	}
	567	if (n.options & Multiline) != 0 {
	568	buf.WriteString("-M")
	569	}
	570	if (n.options & Singleline) != 0 {
	571	buf.WriteString("-S")
	572	}
	573	if (n.options & IgnorePatternWhitespace) != 0 {
	574	buf.WriteString("-X")
	575	}
	576	if (n.options & ECMAScript) != 0 {
	577	buf.WriteString("-E")
	578	}
	579
	580	switch n.t {
	581	case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntOne, ntNotone:
	582	buf.WriteString("(Ch = " + CharDescription(n.ch) + ")")
	583	break
	584	case ntCapture:
	585	buf.WriteString("(index = " + strconv.Itoa(n.m) + ", unindex = " + strconv.Itoa(n.n) + ")")
	586	break
	587	case ntRef, ntTestref:
	588	buf.WriteString("(index = " + strconv.Itoa(n.m) + ")")
	589	break
	590	case ntMulti:
	591	fmt.Fprintf(buf, "(String = %s)", string(n.str))
	592	break
	593	case ntSet, ntSetloop, ntSetlazy:
	594	buf.WriteString("(Set = " + n.set.String() + ")")
	595	break
	596	}
	597
	598	switch n.t {
	599	case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntSetloop, ntSetlazy, ntLoop, ntLazyloop:
	600	buf.WriteString("(Min = ")
	601	buf.WriteString(strconv.Itoa(n.m))
	602	buf.WriteString(", Max = ")
	603	if n.n == math.MaxInt32 {
	604	buf.WriteString("inf")
	605	} else {
	606	buf.WriteString(strconv.Itoa(n.n))
	607	}
	608	buf.WriteString(")")
	609
	610	break
	611	}
	612
	613	return buf.String()
	614	}
	615
	616	var padSpace = []byte(" ")
	617
	618	func (t *RegexTree) Dump() string {
	619	return t.root.dump()
	620	}
	621
	622	func (n *regexNode) dump() string {
	623	var stack []int
	624	CurNode := n
	625	CurChild := 0
	626
	627	buf := bytes.NewBufferString(CurNode.description())
	628	buf.WriteRune('\n')
	629
	630	for {
	631	if CurNode.children != nil && CurChild < len(CurNode.children) {
	632	stack = append(stack, CurChild+1)
	633	CurNode = CurNode.children[CurChild]
	634	CurChild = 0
	635
	636	Depth := len(stack)
	637	if Depth > 32 {
	638	Depth = 32
	639	}
	640	buf.Write(padSpace[:Depth])
	641	buf.WriteString(CurNode.description())
	642	buf.WriteRune('\n')
	643	} else {
	644	if len(stack) == 0 {
	645	break
	646	}
	647
	648	CurChild = stack[len(stack)-1]
	649	stack = stack[:len(stack)-1]
	650	CurNode = CurNode.next
	651	}
	652	}
	653	return buf.String()
	654	}