Added vendor lock on depsHEAD master

author: Mitja Felicijan <mitja.felicijan@gmail.com> 2024-10-25 00:47:47 +0200
committer: Mitja Felicijan <mitja.felicijan@gmail.com> 2024-10-25 00:47:47 +0200
commit: c6cc0108ca7738023b45e0eeac0fa2390532dd93 (patch)
tree: 36890e6cd3091bbab8efbe686cc56f467f645bfd /vendor/github.com/dlclark/regexp2/syntax/tree.go
parent: 0130404a1dc663d4aa68d780c9bcb23a4243e68d (diff)
download: jbmafp-c6cc0108ca7738023b45e0eeac0fa2390532dd93.tar.gz
1 files changed, 654 insertions, 0 deletions
diff --git a/vendor/github.com/dlclark/regexp2/syntax/tree.go b/vendor/github.com/dlclark/regexp2/syntax/tree.go
new file mode 100644
index 0000000..ea28829
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/tree.go
@@ -0,0 +1,654 @@
+package syntax
+
+import (
+	"bytes"
+	"fmt"
+	"math"
+	"strconv"
+)
+
+type RegexTree struct {
+	root       *regexNode
+	caps       map[int]int
+	capnumlist []int
+	captop     int
+	Capnames   map[string]int
+	Caplist    []string
+	options    RegexOptions
+}
+
+// It is built into a parsed tree for a regular expression.
+
+// Implementation notes:
+//
+// Since the node tree is a temporary data structure only used
+// during compilation of the regexp to integer codes, it's
+// designed for clarity and convenience rather than
+// space efficiency.
+//
+// RegexNodes are built into a tree, linked by the n.children list.
+// Each node also has a n.parent and n.ichild member indicating
+// its parent and which child # it is in its parent's list.
+//
+// RegexNodes come in as many types as there are constructs in
+// a regular expression, for example, "concatenate", "alternate",
+// "one", "rept", "group". There are also node types for basic
+// peephole optimizations, e.g., "onerep", "notsetrep", etc.
+//
+// Because perl 5 allows "lookback" groups that scan backwards,
+// each node also gets a "direction". Normally the value of
+// boolean n.backward = false.
+//
+// During parsing, top-level nodes are also stacked onto a parse
+// stack (a stack of trees). For this purpose we have a n.next
+// pointer. [Note that to save a few bytes, we could overload the
+// n.parent pointer instead.]
+//
+// On the parse stack, each tree has a "role" - basically, the
+// nonterminal in the grammar that the parser has currently
+// assigned to the tree. That code is stored in n.role.
+//
+// Finally, some of the different kinds of nodes have data.
+// Two integers (for the looping constructs) are stored in
+// n.operands, an an object (either a string or a set)
+// is stored in n.data
+type regexNode struct {
+	t        nodeType
+	children []*regexNode
+	str      []rune
+	set      *CharSet
+	ch       rune
+	m        int
+	n        int
+	options  RegexOptions
+	next     *regexNode
+}
+
+type nodeType int32
+
+const (
+	// The following are leaves, and correspond to primitive operations
+
+	ntOnerep      nodeType = 0  // lef,back char,min,max    a {n}
+	ntNotonerep            = 1  // lef,back char,min,max    .{n}
+	ntSetrep               = 2  // lef,back set,min,max     [\d]{n}
+	ntOneloop              = 3  // lef,back char,min,max    a {,n}
+	ntNotoneloop           = 4  // lef,back char,min,max    .{,n}
+	ntSetloop              = 5  // lef,back set,min,max     [\d]{,n}
+	ntOnelazy              = 6  // lef,back char,min,max    a {,n}?
+	ntNotonelazy           = 7  // lef,back char,min,max    .{,n}?
+	ntSetlazy              = 8  // lef,back set,min,max     [\d]{,n}?
+	ntOne                  = 9  // lef      char            a
+	ntNotone               = 10 // lef      char            [^a]
+	ntSet                  = 11 // lef      set             [a-z\s]  \w \s \d
+	ntMulti                = 12 // lef      string          abcd
+	ntRef                  = 13 // lef      group           \#
+	ntBol                  = 14 //                          ^
+	ntEol                  = 15 //                          $
+	ntBoundary             = 16 //                          \b
+	ntNonboundary          = 17 //                          \B
+	ntBeginning            = 18 //                          \A
+	ntStart                = 19 //                          \G
+	ntEndZ                 = 20 //                          \Z
+	ntEnd                  = 21 //                          \Z
+
+	// Interior nodes do not correspond to primitive operations, but
+	// control structures compositing other operations
+
+	// Concat and alternate take n children, and can run forward or backwards
+
+	ntNothing     = 22 //          []
+	ntEmpty       = 23 //          ()
+	ntAlternate   = 24 //          a|b
+	ntConcatenate = 25 //          ab
+	ntLoop        = 26 // m,x      * + ? {,}
+	ntLazyloop    = 27 // m,x      *? +? ?? {,}?
+	ntCapture     = 28 // n        ()
+	ntGroup       = 29 //          (?:)
+	ntRequire     = 30 //          (?=) (?<=)
+	ntPrevent     = 31 //          (?!) (?<!)
+	ntGreedy      = 32 //          (?>) (?<)
+	ntTestref     = 33 //          (?(n) | )
+	ntTestgroup   = 34 //          (?(...) | )
+
+	ntECMABoundary    = 41 //                          \b
+	ntNonECMABoundary = 42 //                          \B
+)
+
+func newRegexNode(t nodeType, opt RegexOptions) *regexNode {
+	return &regexNode{
+		t:       t,
+		options: opt,
+	}
+}
+
+func newRegexNodeCh(t nodeType, opt RegexOptions, ch rune) *regexNode {
+	return &regexNode{
+		t:       t,
+		options: opt,
+		ch:      ch,
+	}
+}
+
+func newRegexNodeStr(t nodeType, opt RegexOptions, str []rune) *regexNode {
+	return &regexNode{
+		t:       t,
+		options: opt,
+		str:     str,
+	}
+}
+
+func newRegexNodeSet(t nodeType, opt RegexOptions, set *CharSet) *regexNode {
+	return &regexNode{
+		t:       t,
+		options: opt,
+		set:     set,
+	}
+}
+
+func newRegexNodeM(t nodeType, opt RegexOptions, m int) *regexNode {
+	return &regexNode{
+		t:       t,
+		options: opt,
+		m:       m,
+	}
+}
+func newRegexNodeMN(t nodeType, opt RegexOptions, m, n int) *regexNode {
+	return &regexNode{
+		t:       t,
+		options: opt,
+		m:       m,
+		n:       n,
+	}
+}
+
+func (n *regexNode) writeStrToBuf(buf *bytes.Buffer) {
+	for i := 0; i < len(n.str); i++ {
+		buf.WriteRune(n.str[i])
+	}
+}
+
+func (n *regexNode) addChild(child *regexNode) {
+	reduced := child.reduce()
+	n.children = append(n.children, reduced)
+	reduced.next = n
+}
+
+func (n *regexNode) insertChildren(afterIndex int, nodes []*regexNode) {
+	newChildren := make([]*regexNode, 0, len(n.children)+len(nodes))
+	n.children = append(append(append(newChildren, n.children[:afterIndex]...), nodes...), n.children[afterIndex:]...)
+}
+
+// removes children including the start but not the end index
+func (n *regexNode) removeChildren(startIndex, endIndex int) {
+	n.children = append(n.children[:startIndex], n.children[endIndex:]...)
+}
+
+// Pass type as OneLazy or OneLoop
+func (n *regexNode) makeRep(t nodeType, min, max int) {
+	n.t += (t - ntOne)
+	n.m = min
+	n.n = max
+}
+
+func (n *regexNode) reduce() *regexNode {
+	switch n.t {
+	case ntAlternate:
+		return n.reduceAlternation()
+
+	case ntConcatenate:
+		return n.reduceConcatenation()
+
+	case ntLoop, ntLazyloop:
+		return n.reduceRep()
+
+	case ntGroup:
+		return n.reduceGroup()
+
+	case ntSet, ntSetloop:
+		return n.reduceSet()
+
+	default:
+		return n
+	}
+}
+
+// Basic optimization. Single-letter alternations can be replaced
+// by faster set specifications, and nested alternations with no
+// intervening operators can be flattened:
+//
+// a|b|c|def|g|h -> [a-c]|def|[gh]
+// apple|(?:orange|pear)|grape -> apple|orange|pear|grape
+func (n *regexNode) reduceAlternation() *regexNode {
+	if len(n.children) == 0 {
+		return newRegexNode(ntNothing, n.options)
+	}
+
+	wasLastSet := false
+	lastNodeCannotMerge := false
+	var optionsLast RegexOptions
+	var i, j int
+
+	for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
+		at := n.children[i]
+
+		if j < i {
+			n.children[j] = at
+		}
+
+		for {
+			if at.t == ntAlternate {
+				for k := 0; k < len(at.children); k++ {
+					at.children[k].next = n
+				}
+				n.insertChildren(i+1, at.children)
+
+				j--
+			} else if at.t == ntSet || at.t == ntOne {
+				// Cannot merge sets if L or I options differ, or if either are negated.
+				optionsAt := at.options & (RightToLeft | IgnoreCase)
+
+				if at.t == ntSet {
+					if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !at.set.IsMergeable() {
+						wasLastSet = true
+						lastNodeCannotMerge = !at.set.IsMergeable()
+						optionsLast = optionsAt
+						break
+					}
+				} else if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge {
+					wasLastSet = true
+					lastNodeCannotMerge = false
+					optionsLast = optionsAt
+					break
+				}
+
+				// The last node was a Set or a One, we're a Set or One and our options are the same.
+				// Merge the two nodes.
+				j--
+				prev := n.children[j]
+
+				var prevCharClass *CharSet
+				if prev.t == ntOne {
+					prevCharClass = &CharSet{}
+					prevCharClass.addChar(prev.ch)
+				} else {
+					prevCharClass = prev.set
+				}
+
+				if at.t == ntOne {
+					prevCharClass.addChar(at.ch)
+				} else {
+					prevCharClass.addSet(*at.set)
+				}
+
+				prev.t = ntSet
+				prev.set = prevCharClass
+			} else if at.t == ntNothing {
+				j--
+			} else {
+				wasLastSet = false
+				lastNodeCannotMerge = false
+			}
+			break
+		}
+	}
+
+	if j < i {
+		n.removeChildren(j, i)
+	}
+
+	return n.stripEnation(ntNothing)
+}
+
+// Basic optimization. Adjacent strings can be concatenated.
+//
+// (?:abc)(?:def) -> abcdef
+func (n *regexNode) reduceConcatenation() *regexNode {
+	// Eliminate empties and concat adjacent strings/chars
+
+	var optionsLast RegexOptions
+	var optionsAt RegexOptions
+	var i, j int
+
+	if len(n.children) == 0 {
+		return newRegexNode(ntEmpty, n.options)
+	}
+
+	wasLastString := false
+
+	for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
+		var at, prev *regexNode
+
+		at = n.children[i]
+
+		if j < i {
+			n.children[j] = at
+		}
+
+		if at.t == ntConcatenate &&
+			((at.options & RightToLeft) == (n.options & RightToLeft)) {
+			for k := 0; k < len(at.children); k++ {
+				at.children[k].next = n
+			}
+
+			//insert at.children at i+1 index in n.children
+			n.insertChildren(i+1, at.children)
+
+			j--
+		} else if at.t == ntMulti || at.t == ntOne {
+			// Cannot merge strings if L or I options differ
+			optionsAt = at.options & (RightToLeft | IgnoreCase)
+
+			if !wasLastString || optionsLast != optionsAt {
+				wasLastString = true
+				optionsLast = optionsAt
+				continue
+			}
+
+			j--
+			prev = n.children[j]
+
+			if prev.t == ntOne {
+				prev.t = ntMulti
+				prev.str = []rune{prev.ch}
+			}
+
+			if (optionsAt & RightToLeft) == 0 {
+				if at.t == ntOne {
+					prev.str = append(prev.str, at.ch)
+				} else {
+					prev.str = append(prev.str, at.str...)
+				}
+			} else {
+				if at.t == ntOne {
+					// insert at the front by expanding our slice, copying the data over, and then setting the value
+					prev.str = append(prev.str, 0)
+					copy(prev.str[1:], prev.str)
+					prev.str[0] = at.ch
+				} else {
+					//insert at the front...this one we'll make a new slice and copy both into it
+					merge := make([]rune, len(prev.str)+len(at.str))
+					copy(merge, at.str)
+					copy(merge[len(at.str):], prev.str)
+					prev.str = merge
+				}
+			}
+		} else if at.t == ntEmpty {
+			j--
+		} else {
+			wasLastString = false
+		}
+	}
+
+	if j < i {
+		// remove indices j through i from the children
+		n.removeChildren(j, i)
+	}
+
+	return n.stripEnation(ntEmpty)
+}
+
+// Nested repeaters just get multiplied with each other if they're not
+// too lumpy
+func (n *regexNode) reduceRep() *regexNode {
+
+	u := n
+	t := n.t
+	min := n.m
+	max := n.n
+
+	for {
+		if len(u.children) == 0 {
+			break
+		}
+
+		child := u.children[0]
+
+		// multiply reps of the same type only
+		if child.t != t {
+			childType := child.t
+
+			if !(childType >= ntOneloop && childType <= ntSetloop && t == ntLoop ||
+				childType >= ntOnelazy && childType <= ntSetlazy && t == ntLazyloop) {
+				break
+			}
+		}
+
+		// child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})?
+		// [but things like (a {2,})+ are not too lumpy...]
+		if u.m == 0 && child.m > 1 || child.n < child.m*2 {
+			break
+		}
+
+		u = child
+		if u.m > 0 {
+			if (math.MaxInt32-1)/u.m < min {
+				u.m = math.MaxInt32
+			} else {
+				u.m = u.m * min
+			}
+		}
+		if u.n > 0 {
+			if (math.MaxInt32-1)/u.n < max {
+				u.n = math.MaxInt32
+			} else {
+				u.n = u.n * max
+			}
+		}
+	}
+
+	if math.MaxInt32 == min {
+		return newRegexNode(ntNothing, n.options)
+	}
+	return u
+
+}
+
+// Simple optimization. If a concatenation or alternation has only
+// one child strip out the intermediate node. If it has zero children,
+// turn it into an empty.
+func (n *regexNode) stripEnation(emptyType nodeType) *regexNode {
+	switch len(n.children) {
+	case 0:
+		return newRegexNode(emptyType, n.options)
+	case 1:
+		return n.children[0]
+	default:
+		return n
+	}
+}
+
+func (n *regexNode) reduceGroup() *regexNode {
+	u := n
+
+	for u.t == ntGroup {
+		u = u.children[0]
+	}
+
+	return u
+}
+
+// Simple optimization. If a set is a singleton, an inverse singleton,
+// or empty, it's transformed accordingly.
+func (n *regexNode) reduceSet() *regexNode {
+	// Extract empty-set, one and not-one case as special
+
+	if n.set == nil {
+		n.t = ntNothing
+	} else if n.set.IsSingleton() {
+		n.ch = n.set.SingletonChar()
+		n.set = nil
+		n.t += (ntOne - ntSet)
+	} else if n.set.IsSingletonInverse() {
+		n.ch = n.set.SingletonChar()
+		n.set = nil
+		n.t += (ntNotone - ntSet)
+	}
+
+	return n
+}
+
+func (n *regexNode) reverseLeft() *regexNode {
+	if n.options&RightToLeft != 0 && n.t == ntConcatenate && len(n.children) > 0 {
+		//reverse children order
+		for left, right := 0, len(n.children)-1; left < right; left, right = left+1, right-1 {
+			n.children[left], n.children[right] = n.children[right], n.children[left]
+		}
+	}
+
+	return n
+}
+
+func (n *regexNode) makeQuantifier(lazy bool, min, max int) *regexNode {
+	if min == 0 && max == 0 {
+		return newRegexNode(ntEmpty, n.options)
+	}
+
+	if min == 1 && max == 1 {
+		return n
+	}
+
+	switch n.t {
+	case ntOne, ntNotone, ntSet:
+		if lazy {
+			n.makeRep(Onelazy, min, max)
+		} else {
+			n.makeRep(Oneloop, min, max)
+		}
+		return n
+
+	default:
+		var t nodeType
+		if lazy {
+			t = ntLazyloop
+		} else {
+			t = ntLoop
+		}
+		result := newRegexNodeMN(t, n.options, min, max)
+		result.addChild(n)
+		return result
+	}
+}
+
+// debug functions
+
+var typeStr = []string{
+	"Onerep", "Notonerep", "Setrep",
+	"Oneloop", "Notoneloop", "Setloop",
+	"Onelazy", "Notonelazy", "Setlazy",
+	"One", "Notone", "Set",
+	"Multi", "Ref",
+	"Bol", "Eol", "Boundary", "Nonboundary",
+	"Beginning", "Start", "EndZ", "End",
+	"Nothing", "Empty",
+	"Alternate", "Concatenate",
+	"Loop", "Lazyloop",
+	"Capture", "Group", "Require", "Prevent", "Greedy",
+	"Testref", "Testgroup",
+	"Unknown", "Unknown", "Unknown",
+	"Unknown", "Unknown", "Unknown",
+	"ECMABoundary", "NonECMABoundary",
+}
+
+func (n *regexNode) description() string {
+	buf := &bytes.Buffer{}
+
+	buf.WriteString(typeStr[n.t])
+
+	if (n.options & ExplicitCapture) != 0 {
+		buf.WriteString("-C")
+	}
+	if (n.options & IgnoreCase) != 0 {
+		buf.WriteString("-I")
+	}
+	if (n.options & RightToLeft) != 0 {
+		buf.WriteString("-L")
+	}
+	if (n.options & Multiline) != 0 {
+		buf.WriteString("-M")
+	}
+	if (n.options & Singleline) != 0 {
+		buf.WriteString("-S")
+	}
+	if (n.options & IgnorePatternWhitespace) != 0 {
+		buf.WriteString("-X")
+	}
+	if (n.options & ECMAScript) != 0 {
+		buf.WriteString("-E")
+	}
+
+	switch n.t {
+	case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntOne, ntNotone:
+		buf.WriteString("(Ch = " + CharDescription(n.ch) + ")")
+		break
+	case ntCapture:
+		buf.WriteString("(index = " + strconv.Itoa(n.m) + ", unindex = " + strconv.Itoa(n.n) + ")")
+		break
+	case ntRef, ntTestref:
+		buf.WriteString("(index = " + strconv.Itoa(n.m) + ")")
+		break
+	case ntMulti:
+		fmt.Fprintf(buf, "(String = %s)", string(n.str))
+		break
+	case ntSet, ntSetloop, ntSetlazy:
+		buf.WriteString("(Set = " + n.set.String() + ")")
+		break
+	}
+
+	switch n.t {
+	case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntSetloop, ntSetlazy, ntLoop, ntLazyloop:
+		buf.WriteString("(Min = ")
+		buf.WriteString(strconv.Itoa(n.m))
+		buf.WriteString(", Max = ")
+		if n.n == math.MaxInt32 {
+			buf.WriteString("inf")
+		} else {
+			buf.WriteString(strconv.Itoa(n.n))
+		}
+		buf.WriteString(")")
+
+		break
+	}
+
+	return buf.String()
+}
+
+var padSpace = []byte("                                ")
+
+func (t *RegexTree) Dump() string {
+	return t.root.dump()
+}
+
+func (n *regexNode) dump() string {
+	var stack []int
+	CurNode := n
+	CurChild := 0
+
+	buf := bytes.NewBufferString(CurNode.description())
+	buf.WriteRune('\n')
+
+	for {
+		if CurNode.children != nil && CurChild < len(CurNode.children) {
+			stack = append(stack, CurChild+1)
+			CurNode = CurNode.children[CurChild]
+			CurChild = 0
+
+			Depth := len(stack)
+			if Depth > 32 {
+				Depth = 32
+			}
+			buf.Write(padSpace[:Depth])
+			buf.WriteString(CurNode.description())
+			buf.WriteRune('\n')
+		} else {
+			if len(stack) == 0 {
+				break
+			}
+
+			CurChild = stack[len(stack)-1]
+			stack = stack[:len(stack)-1]
+			CurNode = CurNode.next
+		}
+	}
+	return buf.String()
+}
author	Mitja Felicijan <mitja.felicijan@gmail.com>	2024-10-25 00:47:47 +0200
committer	Mitja Felicijan <mitja.felicijan@gmail.com>	2024-10-25 00:47:47 +0200
commit	c6cc0108ca7738023b45e0eeac0fa2390532dd93 (patch)
tree	36890e6cd3091bbab8efbe686cc56f467f645bfd /vendor/github.com/dlclark/regexp2/syntax/tree.go
parent	0130404a1dc663d4aa68d780c9bcb23a4243e68d (diff)
download	jbmafp-c6cc0108ca7738023b45e0eeac0fa2390532dd93.tar.gz