diff options
Diffstat (limited to 'vendor/github.com/dlclark/regexp2/syntax/tree.go')
| -rw-r--r-- | vendor/github.com/dlclark/regexp2/syntax/tree.go | 654 |
1 files changed, 654 insertions, 0 deletions
diff --git a/vendor/github.com/dlclark/regexp2/syntax/tree.go b/vendor/github.com/dlclark/regexp2/syntax/tree.go new file mode 100644 index 0000000..ea28829 --- /dev/null +++ b/vendor/github.com/dlclark/regexp2/syntax/tree.go | |||
| @@ -0,0 +1,654 @@ | |||
| 1 | package syntax | ||
| 2 | |||
| 3 | import ( | ||
| 4 | "bytes" | ||
| 5 | "fmt" | ||
| 6 | "math" | ||
| 7 | "strconv" | ||
| 8 | ) | ||
| 9 | |||
| 10 | type RegexTree struct { | ||
| 11 | root *regexNode | ||
| 12 | caps map[int]int | ||
| 13 | capnumlist []int | ||
| 14 | captop int | ||
| 15 | Capnames map[string]int | ||
| 16 | Caplist []string | ||
| 17 | options RegexOptions | ||
| 18 | } | ||
| 19 | |||
| 20 | // It is built into a parsed tree for a regular expression. | ||
| 21 | |||
| 22 | // Implementation notes: | ||
| 23 | // | ||
| 24 | // Since the node tree is a temporary data structure only used | ||
| 25 | // during compilation of the regexp to integer codes, it's | ||
| 26 | // designed for clarity and convenience rather than | ||
| 27 | // space efficiency. | ||
| 28 | // | ||
| 29 | // RegexNodes are built into a tree, linked by the n.children list. | ||
| 30 | // Each node also has a n.parent and n.ichild member indicating | ||
| 31 | // its parent and which child # it is in its parent's list. | ||
| 32 | // | ||
| 33 | // RegexNodes come in as many types as there are constructs in | ||
| 34 | // a regular expression, for example, "concatenate", "alternate", | ||
| 35 | // "one", "rept", "group". There are also node types for basic | ||
| 36 | // peephole optimizations, e.g., "onerep", "notsetrep", etc. | ||
| 37 | // | ||
| 38 | // Because perl 5 allows "lookback" groups that scan backwards, | ||
| 39 | // each node also gets a "direction". Normally the value of | ||
| 40 | // boolean n.backward = false. | ||
| 41 | // | ||
| 42 | // During parsing, top-level nodes are also stacked onto a parse | ||
| 43 | // stack (a stack of trees). For this purpose we have a n.next | ||
| 44 | // pointer. [Note that to save a few bytes, we could overload the | ||
| 45 | // n.parent pointer instead.] | ||
| 46 | // | ||
| 47 | // On the parse stack, each tree has a "role" - basically, the | ||
| 48 | // nonterminal in the grammar that the parser has currently | ||
| 49 | // assigned to the tree. That code is stored in n.role. | ||
| 50 | // | ||
| 51 | // Finally, some of the different kinds of nodes have data. | ||
| 52 | // Two integers (for the looping constructs) are stored in | ||
| 53 | // n.operands, an an object (either a string or a set) | ||
| 54 | // is stored in n.data | ||
| 55 | type regexNode struct { | ||
| 56 | t nodeType | ||
| 57 | children []*regexNode | ||
| 58 | str []rune | ||
| 59 | set *CharSet | ||
| 60 | ch rune | ||
| 61 | m int | ||
| 62 | n int | ||
| 63 | options RegexOptions | ||
| 64 | next *regexNode | ||
| 65 | } | ||
| 66 | |||
| 67 | type nodeType int32 | ||
| 68 | |||
| 69 | const ( | ||
| 70 | // The following are leaves, and correspond to primitive operations | ||
| 71 | |||
| 72 | ntOnerep nodeType = 0 // lef,back char,min,max a {n} | ||
| 73 | ntNotonerep = 1 // lef,back char,min,max .{n} | ||
| 74 | ntSetrep = 2 // lef,back set,min,max [\d]{n} | ||
| 75 | ntOneloop = 3 // lef,back char,min,max a {,n} | ||
| 76 | ntNotoneloop = 4 // lef,back char,min,max .{,n} | ||
| 77 | ntSetloop = 5 // lef,back set,min,max [\d]{,n} | ||
| 78 | ntOnelazy = 6 // lef,back char,min,max a {,n}? | ||
| 79 | ntNotonelazy = 7 // lef,back char,min,max .{,n}? | ||
| 80 | ntSetlazy = 8 // lef,back set,min,max [\d]{,n}? | ||
| 81 | ntOne = 9 // lef char a | ||
| 82 | ntNotone = 10 // lef char [^a] | ||
| 83 | ntSet = 11 // lef set [a-z\s] \w \s \d | ||
| 84 | ntMulti = 12 // lef string abcd | ||
| 85 | ntRef = 13 // lef group \# | ||
| 86 | ntBol = 14 // ^ | ||
| 87 | ntEol = 15 // $ | ||
| 88 | ntBoundary = 16 // \b | ||
| 89 | ntNonboundary = 17 // \B | ||
| 90 | ntBeginning = 18 // \A | ||
| 91 | ntStart = 19 // \G | ||
| 92 | ntEndZ = 20 // \Z | ||
| 93 | ntEnd = 21 // \Z | ||
| 94 | |||
| 95 | // Interior nodes do not correspond to primitive operations, but | ||
| 96 | // control structures compositing other operations | ||
| 97 | |||
| 98 | // Concat and alternate take n children, and can run forward or backwards | ||
| 99 | |||
| 100 | ntNothing = 22 // [] | ||
| 101 | ntEmpty = 23 // () | ||
| 102 | ntAlternate = 24 // a|b | ||
| 103 | ntConcatenate = 25 // ab | ||
| 104 | ntLoop = 26 // m,x * + ? {,} | ||
| 105 | ntLazyloop = 27 // m,x *? +? ?? {,}? | ||
| 106 | ntCapture = 28 // n () | ||
| 107 | ntGroup = 29 // (?:) | ||
| 108 | ntRequire = 30 // (?=) (?<=) | ||
| 109 | ntPrevent = 31 // (?!) (?<!) | ||
| 110 | ntGreedy = 32 // (?>) (?<) | ||
| 111 | ntTestref = 33 // (?(n) | ) | ||
| 112 | ntTestgroup = 34 // (?(...) | ) | ||
| 113 | |||
| 114 | ntECMABoundary = 41 // \b | ||
| 115 | ntNonECMABoundary = 42 // \B | ||
| 116 | ) | ||
| 117 | |||
| 118 | func newRegexNode(t nodeType, opt RegexOptions) *regexNode { | ||
| 119 | return ®exNode{ | ||
| 120 | t: t, | ||
| 121 | options: opt, | ||
| 122 | } | ||
| 123 | } | ||
| 124 | |||
| 125 | func newRegexNodeCh(t nodeType, opt RegexOptions, ch rune) *regexNode { | ||
| 126 | return ®exNode{ | ||
| 127 | t: t, | ||
| 128 | options: opt, | ||
| 129 | ch: ch, | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | func newRegexNodeStr(t nodeType, opt RegexOptions, str []rune) *regexNode { | ||
| 134 | return ®exNode{ | ||
| 135 | t: t, | ||
| 136 | options: opt, | ||
| 137 | str: str, | ||
| 138 | } | ||
| 139 | } | ||
| 140 | |||
| 141 | func newRegexNodeSet(t nodeType, opt RegexOptions, set *CharSet) *regexNode { | ||
| 142 | return ®exNode{ | ||
| 143 | t: t, | ||
| 144 | options: opt, | ||
| 145 | set: set, | ||
| 146 | } | ||
| 147 | } | ||
| 148 | |||
| 149 | func newRegexNodeM(t nodeType, opt RegexOptions, m int) *regexNode { | ||
| 150 | return ®exNode{ | ||
| 151 | t: t, | ||
| 152 | options: opt, | ||
| 153 | m: m, | ||
| 154 | } | ||
| 155 | } | ||
| 156 | func newRegexNodeMN(t nodeType, opt RegexOptions, m, n int) *regexNode { | ||
| 157 | return ®exNode{ | ||
| 158 | t: t, | ||
| 159 | options: opt, | ||
| 160 | m: m, | ||
| 161 | n: n, | ||
| 162 | } | ||
| 163 | } | ||
| 164 | |||
| 165 | func (n *regexNode) writeStrToBuf(buf *bytes.Buffer) { | ||
| 166 | for i := 0; i < len(n.str); i++ { | ||
| 167 | buf.WriteRune(n.str[i]) | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | func (n *regexNode) addChild(child *regexNode) { | ||
| 172 | reduced := child.reduce() | ||
| 173 | n.children = append(n.children, reduced) | ||
| 174 | reduced.next = n | ||
| 175 | } | ||
| 176 | |||
| 177 | func (n *regexNode) insertChildren(afterIndex int, nodes []*regexNode) { | ||
| 178 | newChildren := make([]*regexNode, 0, len(n.children)+len(nodes)) | ||
| 179 | n.children = append(append(append(newChildren, n.children[:afterIndex]...), nodes...), n.children[afterIndex:]...) | ||
| 180 | } | ||
| 181 | |||
| 182 | // removes children including the start but not the end index | ||
| 183 | func (n *regexNode) removeChildren(startIndex, endIndex int) { | ||
| 184 | n.children = append(n.children[:startIndex], n.children[endIndex:]...) | ||
| 185 | } | ||
| 186 | |||
| 187 | // Pass type as OneLazy or OneLoop | ||
| 188 | func (n *regexNode) makeRep(t nodeType, min, max int) { | ||
| 189 | n.t += (t - ntOne) | ||
| 190 | n.m = min | ||
| 191 | n.n = max | ||
| 192 | } | ||
| 193 | |||
| 194 | func (n *regexNode) reduce() *regexNode { | ||
| 195 | switch n.t { | ||
| 196 | case ntAlternate: | ||
| 197 | return n.reduceAlternation() | ||
| 198 | |||
| 199 | case ntConcatenate: | ||
| 200 | return n.reduceConcatenation() | ||
| 201 | |||
| 202 | case ntLoop, ntLazyloop: | ||
| 203 | return n.reduceRep() | ||
| 204 | |||
| 205 | case ntGroup: | ||
| 206 | return n.reduceGroup() | ||
| 207 | |||
| 208 | case ntSet, ntSetloop: | ||
| 209 | return n.reduceSet() | ||
| 210 | |||
| 211 | default: | ||
| 212 | return n | ||
| 213 | } | ||
| 214 | } | ||
| 215 | |||
| 216 | // Basic optimization. Single-letter alternations can be replaced | ||
| 217 | // by faster set specifications, and nested alternations with no | ||
| 218 | // intervening operators can be flattened: | ||
| 219 | // | ||
| 220 | // a|b|c|def|g|h -> [a-c]|def|[gh] | ||
| 221 | // apple|(?:orange|pear)|grape -> apple|orange|pear|grape | ||
| 222 | func (n *regexNode) reduceAlternation() *regexNode { | ||
| 223 | if len(n.children) == 0 { | ||
| 224 | return newRegexNode(ntNothing, n.options) | ||
| 225 | } | ||
| 226 | |||
| 227 | wasLastSet := false | ||
| 228 | lastNodeCannotMerge := false | ||
| 229 | var optionsLast RegexOptions | ||
| 230 | var i, j int | ||
| 231 | |||
| 232 | for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 { | ||
| 233 | at := n.children[i] | ||
| 234 | |||
| 235 | if j < i { | ||
| 236 | n.children[j] = at | ||
| 237 | } | ||
| 238 | |||
| 239 | for { | ||
| 240 | if at.t == ntAlternate { | ||
| 241 | for k := 0; k < len(at.children); k++ { | ||
| 242 | at.children[k].next = n | ||
| 243 | } | ||
| 244 | n.insertChildren(i+1, at.children) | ||
| 245 | |||
| 246 | j-- | ||
| 247 | } else if at.t == ntSet || at.t == ntOne { | ||
| 248 | // Cannot merge sets if L or I options differ, or if either are negated. | ||
| 249 | optionsAt := at.options & (RightToLeft | IgnoreCase) | ||
| 250 | |||
| 251 | if at.t == ntSet { | ||
| 252 | if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !at.set.IsMergeable() { | ||
| 253 | wasLastSet = true | ||
| 254 | lastNodeCannotMerge = !at.set.IsMergeable() | ||
| 255 | optionsLast = optionsAt | ||
| 256 | break | ||
| 257 | } | ||
| 258 | } else if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge { | ||
| 259 | wasLastSet = true | ||
| 260 | lastNodeCannotMerge = false | ||
| 261 | optionsLast = optionsAt | ||
| 262 | break | ||
| 263 | } | ||
| 264 | |||
| 265 | // The last node was a Set or a One, we're a Set or One and our options are the same. | ||
| 266 | // Merge the two nodes. | ||
| 267 | j-- | ||
| 268 | prev := n.children[j] | ||
| 269 | |||
| 270 | var prevCharClass *CharSet | ||
| 271 | if prev.t == ntOne { | ||
| 272 | prevCharClass = &CharSet{} | ||
| 273 | prevCharClass.addChar(prev.ch) | ||
| 274 | } else { | ||
| 275 | prevCharClass = prev.set | ||
| 276 | } | ||
| 277 | |||
| 278 | if at.t == ntOne { | ||
| 279 | prevCharClass.addChar(at.ch) | ||
| 280 | } else { | ||
| 281 | prevCharClass.addSet(*at.set) | ||
| 282 | } | ||
| 283 | |||
| 284 | prev.t = ntSet | ||
| 285 | prev.set = prevCharClass | ||
| 286 | } else if at.t == ntNothing { | ||
| 287 | j-- | ||
| 288 | } else { | ||
| 289 | wasLastSet = false | ||
| 290 | lastNodeCannotMerge = false | ||
| 291 | } | ||
| 292 | break | ||
| 293 | } | ||
| 294 | } | ||
| 295 | |||
| 296 | if j < i { | ||
| 297 | n.removeChildren(j, i) | ||
| 298 | } | ||
| 299 | |||
| 300 | return n.stripEnation(ntNothing) | ||
| 301 | } | ||
| 302 | |||
| 303 | // Basic optimization. Adjacent strings can be concatenated. | ||
| 304 | // | ||
| 305 | // (?:abc)(?:def) -> abcdef | ||
| 306 | func (n *regexNode) reduceConcatenation() *regexNode { | ||
| 307 | // Eliminate empties and concat adjacent strings/chars | ||
| 308 | |||
| 309 | var optionsLast RegexOptions | ||
| 310 | var optionsAt RegexOptions | ||
| 311 | var i, j int | ||
| 312 | |||
| 313 | if len(n.children) == 0 { | ||
| 314 | return newRegexNode(ntEmpty, n.options) | ||
| 315 | } | ||
| 316 | |||
| 317 | wasLastString := false | ||
| 318 | |||
| 319 | for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 { | ||
| 320 | var at, prev *regexNode | ||
| 321 | |||
| 322 | at = n.children[i] | ||
| 323 | |||
| 324 | if j < i { | ||
| 325 | n.children[j] = at | ||
| 326 | } | ||
| 327 | |||
| 328 | if at.t == ntConcatenate && | ||
| 329 | ((at.options & RightToLeft) == (n.options & RightToLeft)) { | ||
| 330 | for k := 0; k < len(at.children); k++ { | ||
| 331 | at.children[k].next = n | ||
| 332 | } | ||
| 333 | |||
| 334 | //insert at.children at i+1 index in n.children | ||
| 335 | n.insertChildren(i+1, at.children) | ||
| 336 | |||
| 337 | j-- | ||
| 338 | } else if at.t == ntMulti || at.t == ntOne { | ||
| 339 | // Cannot merge strings if L or I options differ | ||
| 340 | optionsAt = at.options & (RightToLeft | IgnoreCase) | ||
| 341 | |||
| 342 | if !wasLastString || optionsLast != optionsAt { | ||
| 343 | wasLastString = true | ||
| 344 | optionsLast = optionsAt | ||
| 345 | continue | ||
| 346 | } | ||
| 347 | |||
| 348 | j-- | ||
| 349 | prev = n.children[j] | ||
| 350 | |||
| 351 | if prev.t == ntOne { | ||
| 352 | prev.t = ntMulti | ||
| 353 | prev.str = []rune{prev.ch} | ||
| 354 | } | ||
| 355 | |||
| 356 | if (optionsAt & RightToLeft) == 0 { | ||
| 357 | if at.t == ntOne { | ||
| 358 | prev.str = append(prev.str, at.ch) | ||
| 359 | } else { | ||
| 360 | prev.str = append(prev.str, at.str...) | ||
| 361 | } | ||
| 362 | } else { | ||
| 363 | if at.t == ntOne { | ||
| 364 | // insert at the front by expanding our slice, copying the data over, and then setting the value | ||
| 365 | prev.str = append(prev.str, 0) | ||
| 366 | copy(prev.str[1:], prev.str) | ||
| 367 | prev.str[0] = at.ch | ||
| 368 | } else { | ||
| 369 | //insert at the front...this one we'll make a new slice and copy both into it | ||
| 370 | merge := make([]rune, len(prev.str)+len(at.str)) | ||
| 371 | copy(merge, at.str) | ||
| 372 | copy(merge[len(at.str):], prev.str) | ||
| 373 | prev.str = merge | ||
| 374 | } | ||
| 375 | } | ||
| 376 | } else if at.t == ntEmpty { | ||
| 377 | j-- | ||
| 378 | } else { | ||
| 379 | wasLastString = false | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | if j < i { | ||
| 384 | // remove indices j through i from the children | ||
| 385 | n.removeChildren(j, i) | ||
| 386 | } | ||
| 387 | |||
| 388 | return n.stripEnation(ntEmpty) | ||
| 389 | } | ||
| 390 | |||
| 391 | // Nested repeaters just get multiplied with each other if they're not | ||
| 392 | // too lumpy | ||
| 393 | func (n *regexNode) reduceRep() *regexNode { | ||
| 394 | |||
| 395 | u := n | ||
| 396 | t := n.t | ||
| 397 | min := n.m | ||
| 398 | max := n.n | ||
| 399 | |||
| 400 | for { | ||
| 401 | if len(u.children) == 0 { | ||
| 402 | break | ||
| 403 | } | ||
| 404 | |||
| 405 | child := u.children[0] | ||
| 406 | |||
| 407 | // multiply reps of the same type only | ||
| 408 | if child.t != t { | ||
| 409 | childType := child.t | ||
| 410 | |||
| 411 | if !(childType >= ntOneloop && childType <= ntSetloop && t == ntLoop || | ||
| 412 | childType >= ntOnelazy && childType <= ntSetlazy && t == ntLazyloop) { | ||
| 413 | break | ||
| 414 | } | ||
| 415 | } | ||
| 416 | |||
| 417 | // child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})? | ||
| 418 | // [but things like (a {2,})+ are not too lumpy...] | ||
| 419 | if u.m == 0 && child.m > 1 || child.n < child.m*2 { | ||
| 420 | break | ||
| 421 | } | ||
| 422 | |||
| 423 | u = child | ||
| 424 | if u.m > 0 { | ||
| 425 | if (math.MaxInt32-1)/u.m < min { | ||
| 426 | u.m = math.MaxInt32 | ||
| 427 | } else { | ||
| 428 | u.m = u.m * min | ||
| 429 | } | ||
| 430 | } | ||
| 431 | if u.n > 0 { | ||
| 432 | if (math.MaxInt32-1)/u.n < max { | ||
| 433 | u.n = math.MaxInt32 | ||
| 434 | } else { | ||
| 435 | u.n = u.n * max | ||
| 436 | } | ||
| 437 | } | ||
| 438 | } | ||
| 439 | |||
| 440 | if math.MaxInt32 == min { | ||
| 441 | return newRegexNode(ntNothing, n.options) | ||
| 442 | } | ||
| 443 | return u | ||
| 444 | |||
| 445 | } | ||
| 446 | |||
| 447 | // Simple optimization. If a concatenation or alternation has only | ||
| 448 | // one child strip out the intermediate node. If it has zero children, | ||
| 449 | // turn it into an empty. | ||
| 450 | func (n *regexNode) stripEnation(emptyType nodeType) *regexNode { | ||
| 451 | switch len(n.children) { | ||
| 452 | case 0: | ||
| 453 | return newRegexNode(emptyType, n.options) | ||
| 454 | case 1: | ||
| 455 | return n.children[0] | ||
| 456 | default: | ||
| 457 | return n | ||
| 458 | } | ||
| 459 | } | ||
| 460 | |||
| 461 | func (n *regexNode) reduceGroup() *regexNode { | ||
| 462 | u := n | ||
| 463 | |||
| 464 | for u.t == ntGroup { | ||
| 465 | u = u.children[0] | ||
| 466 | } | ||
| 467 | |||
| 468 | return u | ||
| 469 | } | ||
| 470 | |||
| 471 | // Simple optimization. If a set is a singleton, an inverse singleton, | ||
| 472 | // or empty, it's transformed accordingly. | ||
| 473 | func (n *regexNode) reduceSet() *regexNode { | ||
| 474 | // Extract empty-set, one and not-one case as special | ||
| 475 | |||
| 476 | if n.set == nil { | ||
| 477 | n.t = ntNothing | ||
| 478 | } else if n.set.IsSingleton() { | ||
| 479 | n.ch = n.set.SingletonChar() | ||
| 480 | n.set = nil | ||
| 481 | n.t += (ntOne - ntSet) | ||
| 482 | } else if n.set.IsSingletonInverse() { | ||
| 483 | n.ch = n.set.SingletonChar() | ||
| 484 | n.set = nil | ||
| 485 | n.t += (ntNotone - ntSet) | ||
| 486 | } | ||
| 487 | |||
| 488 | return n | ||
| 489 | } | ||
| 490 | |||
| 491 | func (n *regexNode) reverseLeft() *regexNode { | ||
| 492 | if n.options&RightToLeft != 0 && n.t == ntConcatenate && len(n.children) > 0 { | ||
| 493 | //reverse children order | ||
| 494 | for left, right := 0, len(n.children)-1; left < right; left, right = left+1, right-1 { | ||
| 495 | n.children[left], n.children[right] = n.children[right], n.children[left] | ||
| 496 | } | ||
| 497 | } | ||
| 498 | |||
| 499 | return n | ||
| 500 | } | ||
| 501 | |||
| 502 | func (n *regexNode) makeQuantifier(lazy bool, min, max int) *regexNode { | ||
| 503 | if min == 0 && max == 0 { | ||
| 504 | return newRegexNode(ntEmpty, n.options) | ||
| 505 | } | ||
| 506 | |||
| 507 | if min == 1 && max == 1 { | ||
| 508 | return n | ||
| 509 | } | ||
| 510 | |||
| 511 | switch n.t { | ||
| 512 | case ntOne, ntNotone, ntSet: | ||
| 513 | if lazy { | ||
| 514 | n.makeRep(Onelazy, min, max) | ||
| 515 | } else { | ||
| 516 | n.makeRep(Oneloop, min, max) | ||
| 517 | } | ||
| 518 | return n | ||
| 519 | |||
| 520 | default: | ||
| 521 | var t nodeType | ||
| 522 | if lazy { | ||
| 523 | t = ntLazyloop | ||
| 524 | } else { | ||
| 525 | t = ntLoop | ||
| 526 | } | ||
| 527 | result := newRegexNodeMN(t, n.options, min, max) | ||
| 528 | result.addChild(n) | ||
| 529 | return result | ||
| 530 | } | ||
| 531 | } | ||
| 532 | |||
| 533 | // debug functions | ||
| 534 | |||
| 535 | var typeStr = []string{ | ||
| 536 | "Onerep", "Notonerep", "Setrep", | ||
| 537 | "Oneloop", "Notoneloop", "Setloop", | ||
| 538 | "Onelazy", "Notonelazy", "Setlazy", | ||
| 539 | "One", "Notone", "Set", | ||
| 540 | "Multi", "Ref", | ||
| 541 | "Bol", "Eol", "Boundary", "Nonboundary", | ||
| 542 | "Beginning", "Start", "EndZ", "End", | ||
| 543 | "Nothing", "Empty", | ||
| 544 | "Alternate", "Concatenate", | ||
| 545 | "Loop", "Lazyloop", | ||
| 546 | "Capture", "Group", "Require", "Prevent", "Greedy", | ||
| 547 | "Testref", "Testgroup", | ||
| 548 | "Unknown", "Unknown", "Unknown", | ||
| 549 | "Unknown", "Unknown", "Unknown", | ||
| 550 | "ECMABoundary", "NonECMABoundary", | ||
| 551 | } | ||
| 552 | |||
| 553 | func (n *regexNode) description() string { | ||
| 554 | buf := &bytes.Buffer{} | ||
| 555 | |||
| 556 | buf.WriteString(typeStr[n.t]) | ||
| 557 | |||
| 558 | if (n.options & ExplicitCapture) != 0 { | ||
| 559 | buf.WriteString("-C") | ||
| 560 | } | ||
| 561 | if (n.options & IgnoreCase) != 0 { | ||
| 562 | buf.WriteString("-I") | ||
| 563 | } | ||
| 564 | if (n.options & RightToLeft) != 0 { | ||
| 565 | buf.WriteString("-L") | ||
| 566 | } | ||
| 567 | if (n.options & Multiline) != 0 { | ||
| 568 | buf.WriteString("-M") | ||
| 569 | } | ||
| 570 | if (n.options & Singleline) != 0 { | ||
| 571 | buf.WriteString("-S") | ||
| 572 | } | ||
| 573 | if (n.options & IgnorePatternWhitespace) != 0 { | ||
| 574 | buf.WriteString("-X") | ||
| 575 | } | ||
| 576 | if (n.options & ECMAScript) != 0 { | ||
| 577 | buf.WriteString("-E") | ||
| 578 | } | ||
| 579 | |||
| 580 | switch n.t { | ||
| 581 | case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntOne, ntNotone: | ||
| 582 | buf.WriteString("(Ch = " + CharDescription(n.ch) + ")") | ||
| 583 | break | ||
| 584 | case ntCapture: | ||
| 585 | buf.WriteString("(index = " + strconv.Itoa(n.m) + ", unindex = " + strconv.Itoa(n.n) + ")") | ||
| 586 | break | ||
| 587 | case ntRef, ntTestref: | ||
| 588 | buf.WriteString("(index = " + strconv.Itoa(n.m) + ")") | ||
| 589 | break | ||
| 590 | case ntMulti: | ||
| 591 | fmt.Fprintf(buf, "(String = %s)", string(n.str)) | ||
| 592 | break | ||
| 593 | case ntSet, ntSetloop, ntSetlazy: | ||
| 594 | buf.WriteString("(Set = " + n.set.String() + ")") | ||
| 595 | break | ||
| 596 | } | ||
| 597 | |||
| 598 | switch n.t { | ||
| 599 | case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntSetloop, ntSetlazy, ntLoop, ntLazyloop: | ||
| 600 | buf.WriteString("(Min = ") | ||
| 601 | buf.WriteString(strconv.Itoa(n.m)) | ||
| 602 | buf.WriteString(", Max = ") | ||
| 603 | if n.n == math.MaxInt32 { | ||
| 604 | buf.WriteString("inf") | ||
| 605 | } else { | ||
| 606 | buf.WriteString(strconv.Itoa(n.n)) | ||
| 607 | } | ||
| 608 | buf.WriteString(")") | ||
| 609 | |||
| 610 | break | ||
| 611 | } | ||
| 612 | |||
| 613 | return buf.String() | ||
| 614 | } | ||
| 615 | |||
| 616 | var padSpace = []byte(" ") | ||
| 617 | |||
| 618 | func (t *RegexTree) Dump() string { | ||
| 619 | return t.root.dump() | ||
| 620 | } | ||
| 621 | |||
| 622 | func (n *regexNode) dump() string { | ||
| 623 | var stack []int | ||
| 624 | CurNode := n | ||
| 625 | CurChild := 0 | ||
| 626 | |||
| 627 | buf := bytes.NewBufferString(CurNode.description()) | ||
| 628 | buf.WriteRune('\n') | ||
| 629 | |||
| 630 | for { | ||
| 631 | if CurNode.children != nil && CurChild < len(CurNode.children) { | ||
| 632 | stack = append(stack, CurChild+1) | ||
| 633 | CurNode = CurNode.children[CurChild] | ||
| 634 | CurChild = 0 | ||
| 635 | |||
| 636 | Depth := len(stack) | ||
| 637 | if Depth > 32 { | ||
| 638 | Depth = 32 | ||
| 639 | } | ||
| 640 | buf.Write(padSpace[:Depth]) | ||
| 641 | buf.WriteString(CurNode.description()) | ||
| 642 | buf.WriteRune('\n') | ||
| 643 | } else { | ||
| 644 | if len(stack) == 0 { | ||
| 645 | break | ||
| 646 | } | ||
| 647 | |||
| 648 | CurChild = stack[len(stack)-1] | ||
| 649 | stack = stack[:len(stack)-1] | ||
| 650 | CurNode = CurNode.next | ||
| 651 | } | ||
| 652 | } | ||
| 653 | return buf.String() | ||
| 654 | } | ||
