1package regexp2
   2
   3import (
   4	"bytes"
   5	"errors"
   6	"fmt"
   7	"math"
   8	"strconv"
   9	"strings"
  10	"time"
  11	"unicode"
  12
  13	"github.com/dlclark/regexp2/syntax"
  14)
  15
  16type runner struct {
  17	re   *Regexp
  18	code *syntax.Code
  19
  20	runtextstart int // starting point for search
  21
  22	runtext    []rune // text to search
  23	runtextpos int    // current position in text
  24	runtextend int
  25
  26	// The backtracking stack.  Opcodes use this to store data regarding
  27	// what they have matched and where to backtrack to.  Each "frame" on
  28	// the stack takes the form of [CodePosition Data1 Data2...], where
  29	// CodePosition is the position of the current opcode and
  30	// the data values are all optional.  The CodePosition can be negative, and
  31	// these values (also called "back2") are used by the BranchMark family of opcodes
  32	// to indicate whether they are backtracking after a successful or failed
  33	// match.
  34	// When we backtrack, we pop the CodePosition off the stack, set the current
  35	// instruction pointer to that code position, and mark the opcode
  36	// with a backtracking flag ("Back").  Each opcode then knows how to
  37	// handle its own data.
  38	runtrack    []int
  39	runtrackpos int
  40
  41	// This stack is used to track text positions across different opcodes.
  42	// For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
  43	// pair. SetMark records the text position before we match a*b.  Then
  44	// CaptureMark uses that position to figure out where the capture starts.
  45	// Opcodes which push onto this stack are always paired with other opcodes
  46	// which will pop the value from it later.  A successful match should mean
  47	// that this stack is empty.
  48	runstack    []int
  49	runstackpos int
  50
  51	// The crawl stack is used to keep track of captures.  Every time a group
  52	// has a capture, we push its group number onto the runcrawl stack.  In
  53	// the case of a balanced match, we push BOTH groups onto the stack.
  54	runcrawl    []int
  55	runcrawlpos int
  56
  57	runtrackcount int // count of states that may do backtracking
  58
  59	runmatch *Match // result object
  60
  61	ignoreTimeout bool
  62	timeout       time.Duration // timeout in milliseconds (needed for actual)
  63	deadline      fasttime
  64
  65	operator        syntax.InstOp
  66	codepos         int
  67	rightToLeft     bool
  68	caseInsensitive bool
  69}
  70
  71// run searches for matches and can continue from the previous match
  72//
  73// quick is usually false, but can be true to not return matches, just put it in caches
  74// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
  75// input is the string to search for our regex pattern
  76func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
  77
  78	// get a cached runner
  79	runner := re.getRunner()
  80	defer re.putRunner(runner)
  81
  82	if textstart < 0 {
  83		if re.RightToLeft() {
  84			textstart = len(input)
  85		} else {
  86			textstart = 0
  87		}
  88	}
  89
  90	return runner.scan(input, textstart, quick, re.MatchTimeout)
  91}
  92
  93// Scans the string to find the first match. Uses the Match object
  94// both to feed text in and as a place to store matches that come out.
  95//
  96// All the action is in the Go() method. Our
  97// responsibility is to load up the class members before
  98// calling Go.
  99//
 100// The optimizer can compute a set of candidate starting characters,
 101// and we could use a separate method Skip() that will quickly scan past
 102// any characters that we know can't match.
 103func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
 104	r.timeout = timeout
 105	r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
 106	r.runtextstart = textstart
 107	r.runtext = rt
 108	r.runtextend = len(rt)
 109
 110	stoppos := r.runtextend
 111	bump := 1
 112
 113	if r.re.RightToLeft() {
 114		bump = -1
 115		stoppos = 0
 116	}
 117
 118	r.runtextpos = textstart
 119	initted := false
 120
 121	r.startTimeoutWatch()
 122	for {
 123		if r.re.Debug() {
 124			//fmt.Printf("\nSearch content: %v\n", string(r.runtext))
 125			fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
 126			fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
 127		}
 128
 129		if r.findFirstChar() {
 130			if err := r.checkTimeout(); err != nil {
 131				return nil, err
 132			}
 133
 134			if !initted {
 135				r.initMatch()
 136				initted = true
 137			}
 138
 139			if r.re.Debug() {
 140				fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
 141			}
 142
 143			if err := r.execute(); err != nil {
 144				return nil, err
 145			}
 146
 147			if r.runmatch.matchcount[0] > 0 {
 148				// We'll return a match even if it touches a previous empty match
 149				return r.tidyMatch(quick), nil
 150			}
 151
 152			// reset state for another go
 153			r.runtrackpos = len(r.runtrack)
 154			r.runstackpos = len(r.runstack)
 155			r.runcrawlpos = len(r.runcrawl)
 156		}
 157
 158		// failure!
 159
 160		if r.runtextpos == stoppos {
 161			r.tidyMatch(true)
 162			return nil, nil
 163		}
 164
 165		// Recognize leading []* and various anchors, and bump on failure accordingly
 166
 167		// r.bump by one and start again
 168
 169		r.runtextpos += bump
 170	}
 171	// We never get here
 172}
 173
 174func (r *runner) execute() error {
 175
 176	r.goTo(0)
 177
 178	for {
 179
 180		if r.re.Debug() {
 181			r.dumpState()
 182		}
 183
 184		if err := r.checkTimeout(); err != nil {
 185			return err
 186		}
 187
 188		switch r.operator {
 189		case syntax.Stop:
 190			return nil
 191
 192		case syntax.Nothing:
 193			break
 194
 195		case syntax.Goto:
 196			r.goTo(r.operand(0))
 197			continue
 198
 199		case syntax.Testref:
 200			if !r.runmatch.isMatched(r.operand(0)) {
 201				break
 202			}
 203			r.advance(1)
 204			continue
 205
 206		case syntax.Lazybranch:
 207			r.trackPush1(r.textPos())
 208			r.advance(1)
 209			continue
 210
 211		case syntax.Lazybranch | syntax.Back:
 212			r.trackPop()
 213			r.textto(r.trackPeek())
 214			r.goTo(r.operand(0))
 215			continue
 216
 217		case syntax.Setmark:
 218			r.stackPush(r.textPos())
 219			r.trackPush()
 220			r.advance(0)
 221			continue
 222
 223		case syntax.Nullmark:
 224			r.stackPush(-1)
 225			r.trackPush()
 226			r.advance(0)
 227			continue
 228
 229		case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
 230			r.stackPop()
 231			break
 232
 233		case syntax.Getmark:
 234			r.stackPop()
 235			r.trackPush1(r.stackPeek())
 236			r.textto(r.stackPeek())
 237			r.advance(0)
 238			continue
 239
 240		case syntax.Getmark | syntax.Back:
 241			r.trackPop()
 242			r.stackPush(r.trackPeek())
 243			break
 244
 245		case syntax.Capturemark:
 246			if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
 247				break
 248			}
 249			r.stackPop()
 250			if r.operand(1) != -1 {
 251				r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
 252			} else {
 253				r.capture(r.operand(0), r.stackPeek(), r.textPos())
 254			}
 255			r.trackPush1(r.stackPeek())
 256
 257			r.advance(2)
 258
 259			continue
 260
 261		case syntax.Capturemark | syntax.Back:
 262			r.trackPop()
 263			r.stackPush(r.trackPeek())
 264			r.uncapture()
 265			if r.operand(0) != -1 && r.operand(1) != -1 {
 266				r.uncapture()
 267			}
 268
 269			break
 270
 271		case syntax.Branchmark:
 272			r.stackPop()
 273
 274			matched := r.textPos() - r.stackPeek()
 275
 276			if matched != 0 { // Nonempty match -> loop now
 277				r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
 278				r.stackPush(r.textPos())                 // Make new mark
 279				r.goTo(r.operand(0))                     // Loop
 280			} else { // Empty match -> straight now
 281				r.trackPushNeg1(r.stackPeek()) // Save old mark
 282				r.advance(1)                   // Straight
 283			}
 284			continue
 285
 286		case syntax.Branchmark | syntax.Back:
 287			r.trackPopN(2)
 288			r.stackPop()
 289			r.textto(r.trackPeekN(1))      // Recall position
 290			r.trackPushNeg1(r.trackPeek()) // Save old mark
 291			r.advance(1)                   // Straight
 292			continue
 293
 294		case syntax.Branchmark | syntax.Back2:
 295			r.trackPop()
 296			r.stackPush(r.trackPeek()) // Recall old mark
 297			break                      // Backtrack
 298
 299		case syntax.Lazybranchmark:
 300			{
 301				// We hit this the first time through a lazy loop and after each
 302				// successful match of the inner expression.  It simply continues
 303				// on and doesn't loop.
 304				r.stackPop()
 305
 306				oldMarkPos := r.stackPeek()
 307
 308				if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
 309					if oldMarkPos != -1 {
 310						r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
 311					} else {
 312						r.trackPush2(r.textPos(), r.textPos())
 313					}
 314				} else {
 315					// The inner expression found an empty match, so we'll go directly to 'back2' if we
 316					// backtrack. Don't touch the grouping stack here; instead, record the old mark and
 317					// a flag indicating that backtracking doesn't need to pop a grouping stack frame.
 318					r.trackPushNeg2(oldMarkPos, 0)
 319				}
 320				r.advance(1)
 321				continue
 322			}
 323
 324		case syntax.Lazybranchmark | syntax.Back:
 325
 326			// After the first time, Lazybranchmark | syntax.Back occurs
 327			// with each iteration of the loop, and therefore with every attempted
 328			// match of the inner expression.  We'll try to match the inner expression,
 329			// then go back to Lazybranchmark if successful.  If the inner expression
 330			// fails, we go to Lazybranchmark | syntax.Back2
 331
 332			r.trackPopN(2)
 333			pos := r.trackPeekN(1)
 334			r.trackPushNeg2(r.trackPeek(), 1) // Save old mark, note that we pushed a new mark
 335			r.stackPush(pos)                  // Make new mark
 336			r.textto(pos)                     // Recall position
 337			r.goTo(r.operand(0))              // Loop
 338			continue
 339
 340		case syntax.Lazybranchmark | syntax.Back2:
 341			// The lazy loop has failed.  We'll do a true backtrack and
 342			// start over before the lazy loop.
 343			r.trackPopN(2)
 344			oldMark := r.trackPeek()
 345			needsPop := r.trackPeekN(1)
 346			if needsPop != 0 {
 347				r.stackPop()
 348			}
 349			r.stackPush(oldMark) // Recall old mark
 350			break
 351
 352		case syntax.Setcount:
 353			r.stackPush2(r.textPos(), r.operand(0))
 354			r.trackPush()
 355			r.advance(1)
 356			continue
 357
 358		case syntax.Nullcount:
 359			r.stackPush2(-1, r.operand(0))
 360			r.trackPush()
 361			r.advance(1)
 362			continue
 363
 364		case syntax.Setcount | syntax.Back:
 365			r.stackPopN(2)
 366			break
 367
 368		case syntax.Nullcount | syntax.Back:
 369			r.stackPopN(2)
 370			break
 371
 372		case syntax.Branchcount:
 373			// r.stackPush:
 374			//  0: Mark
 375			//  1: Count
 376
 377			r.stackPopN(2)
 378			mark := r.stackPeek()
 379			count := r.stackPeekN(1)
 380			matched := r.textPos() - mark
 381
 382			if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
 383				r.trackPushNeg2(mark, count) // Save old mark, count
 384				r.advance(2)                 // Straight
 385			} else { // Nonempty match -> count+loop now
 386				r.trackPush1(mark)                 // remember mark
 387				r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
 388				r.goTo(r.operand(0))               // Loop
 389			}
 390			continue
 391
 392		case syntax.Branchcount | syntax.Back:
 393			// r.trackPush:
 394			//  0: Previous mark
 395			// r.stackPush:
 396			//  0: Mark (= current pos, discarded)
 397			//  1: Count
 398			r.trackPop()
 399			r.stackPopN(2)
 400			if r.stackPeekN(1) > 0 { // Positive -> can go straight
 401				r.textto(r.stackPeek())                           // Zap to mark
 402				r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
 403				r.advance(2)                                      // Straight
 404				continue
 405			}
 406			r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
 407			break
 408
 409		case syntax.Branchcount | syntax.Back2:
 410			// r.trackPush:
 411			//  0: Previous mark
 412			//  1: Previous count
 413			r.trackPopN(2)
 414			r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
 415			break                                        // Backtrack
 416
 417		case syntax.Lazybranchcount:
 418			// r.stackPush:
 419			//  0: Mark
 420			//  1: Count
 421
 422			r.stackPopN(2)
 423			mark := r.stackPeek()
 424			count := r.stackPeekN(1)
 425
 426			if count < 0 { // Negative count -> loop now
 427				r.trackPushNeg1(mark)              // Save old mark
 428				r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
 429				r.goTo(r.operand(0))               // Loop
 430			} else { // Nonneg count -> straight now
 431				r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
 432				r.advance(2)                           // Straight
 433			}
 434			continue
 435
 436		case syntax.Lazybranchcount | syntax.Back:
 437			// r.trackPush:
 438			//  0: Mark
 439			//  1: Count
 440			//  2: r.textPos
 441
 442			r.trackPopN(3)
 443			mark := r.trackPeek()
 444			textpos := r.trackPeekN(2)
 445
 446			if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
 447				r.textto(textpos)                        // Recall position
 448				r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
 449				r.trackPushNeg1(mark)                    // Save old mark
 450				r.goTo(r.operand(0))                     // Loop
 451				continue
 452			} else { // Max loops or empty match -> backtrack
 453				r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
 454				break                                        // backtrack
 455			}
 456
 457		case syntax.Lazybranchcount | syntax.Back2:
 458			// r.trackPush:
 459			//  0: Previous mark
 460			// r.stackPush:
 461			//  0: Mark (== current pos, discarded)
 462			//  1: Count
 463			r.trackPop()
 464			r.stackPopN(2)
 465			r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
 466			break                                          // Backtrack
 467
 468		case syntax.Setjump:
 469			r.stackPush2(r.trackpos(), r.crawlpos())
 470			r.trackPush()
 471			r.advance(0)
 472			continue
 473
 474		case syntax.Setjump | syntax.Back:
 475			r.stackPopN(2)
 476			break
 477
 478		case syntax.Backjump:
 479			// r.stackPush:
 480			//  0: Saved trackpos
 481			//  1: r.crawlpos
 482			r.stackPopN(2)
 483			r.trackto(r.stackPeek())
 484
 485			for r.crawlpos() != r.stackPeekN(1) {
 486				r.uncapture()
 487			}
 488
 489			break
 490
 491		case syntax.Forejump:
 492			// r.stackPush:
 493			//  0: Saved trackpos
 494			//  1: r.crawlpos
 495			r.stackPopN(2)
 496			r.trackto(r.stackPeek())
 497			r.trackPush1(r.stackPeekN(1))
 498			r.advance(0)
 499			continue
 500
 501		case syntax.Forejump | syntax.Back:
 502			// r.trackPush:
 503			//  0: r.crawlpos
 504			r.trackPop()
 505
 506			for r.crawlpos() != r.trackPeek() {
 507				r.uncapture()
 508			}
 509
 510			break
 511
 512		case syntax.Bol:
 513			if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
 514				break
 515			}
 516			r.advance(0)
 517			continue
 518
 519		case syntax.Eol:
 520			if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
 521				break
 522			}
 523			r.advance(0)
 524			continue
 525
 526		case syntax.Boundary:
 527			if !r.isBoundary(r.textPos(), 0, r.runtextend) {
 528				break
 529			}
 530			r.advance(0)
 531			continue
 532
 533		case syntax.Nonboundary:
 534			if r.isBoundary(r.textPos(), 0, r.runtextend) {
 535				break
 536			}
 537			r.advance(0)
 538			continue
 539
 540		case syntax.ECMABoundary:
 541			if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
 542				break
 543			}
 544			r.advance(0)
 545			continue
 546
 547		case syntax.NonECMABoundary:
 548			if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
 549				break
 550			}
 551			r.advance(0)
 552			continue
 553
 554		case syntax.Beginning:
 555			if r.leftchars() > 0 {
 556				break
 557			}
 558			r.advance(0)
 559			continue
 560
 561		case syntax.Start:
 562			if r.textPos() != r.textstart() {
 563				break
 564			}
 565			r.advance(0)
 566			continue
 567
 568		case syntax.EndZ:
 569			rchars := r.rightchars()
 570			if rchars > 1 {
 571				break
 572			}
 573			// RE2 and EcmaScript define $ as "asserts position at the end of the string"
 574			// PCRE/.NET adds "or before the line terminator right at the end of the string (if any)"
 575			if (r.re.options & (RE2 | ECMAScript)) != 0 {
 576				// RE2/Ecmascript mode
 577				if rchars > 0 {
 578					break
 579				}
 580			} else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
 581				// "regular" mode
 582				break
 583			}
 584
 585			r.advance(0)
 586			continue
 587
 588		case syntax.End:
 589			if r.rightchars() > 0 {
 590				break
 591			}
 592			r.advance(0)
 593			continue
 594
 595		case syntax.One:
 596			if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
 597				break
 598			}
 599
 600			r.advance(1)
 601			continue
 602
 603		case syntax.Notone:
 604			if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
 605				break
 606			}
 607
 608			r.advance(1)
 609			continue
 610
 611		case syntax.Set:
 612
 613			if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
 614				break
 615			}
 616
 617			r.advance(1)
 618			continue
 619
 620		case syntax.Multi:
 621			if !r.runematch(r.code.Strings[r.operand(0)]) {
 622				break
 623			}
 624
 625			r.advance(1)
 626			continue
 627
 628		case syntax.Ref:
 629
 630			capnum := r.operand(0)
 631
 632			if r.runmatch.isMatched(capnum) {
 633				if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
 634					break
 635				}
 636			} else {
 637				if (r.re.options & ECMAScript) == 0 {
 638					break
 639				}
 640			}
 641
 642			r.advance(1)
 643			continue
 644
 645		case syntax.Onerep:
 646
 647			c := r.operand(1)
 648
 649			if r.forwardchars() < c {
 650				break
 651			}
 652
 653			ch := rune(r.operand(0))
 654
 655			for c > 0 {
 656				if r.forwardcharnext() != ch {
 657					goto BreakBackward
 658				}
 659				c--
 660			}
 661
 662			r.advance(2)
 663			continue
 664
 665		case syntax.Notonerep:
 666
 667			c := r.operand(1)
 668
 669			if r.forwardchars() < c {
 670				break
 671			}
 672			ch := rune(r.operand(0))
 673
 674			for c > 0 {
 675				if r.forwardcharnext() == ch {
 676					goto BreakBackward
 677				}
 678				c--
 679			}
 680
 681			r.advance(2)
 682			continue
 683
 684		case syntax.Setrep:
 685
 686			c := r.operand(1)
 687
 688			if r.forwardchars() < c {
 689				break
 690			}
 691
 692			set := r.code.Sets[r.operand(0)]
 693
 694			for c > 0 {
 695				if !set.CharIn(r.forwardcharnext()) {
 696					goto BreakBackward
 697				}
 698				c--
 699			}
 700
 701			r.advance(2)
 702			continue
 703
 704		case syntax.Oneloop:
 705
 706			c := r.operand(1)
 707
 708			if c > r.forwardchars() {
 709				c = r.forwardchars()
 710			}
 711
 712			ch := rune(r.operand(0))
 713			i := c
 714
 715			for ; i > 0; i-- {
 716				if r.forwardcharnext() != ch {
 717					r.backwardnext()
 718					break
 719				}
 720			}
 721
 722			if c > i {
 723				r.trackPush2(c-i-1, r.textPos()-r.bump())
 724			}
 725
 726			r.advance(2)
 727			continue
 728
 729		case syntax.Notoneloop:
 730
 731			c := r.operand(1)
 732
 733			if c > r.forwardchars() {
 734				c = r.forwardchars()
 735			}
 736
 737			ch := rune(r.operand(0))
 738			i := c
 739
 740			for ; i > 0; i-- {
 741				if r.forwardcharnext() == ch {
 742					r.backwardnext()
 743					break
 744				}
 745			}
 746
 747			if c > i {
 748				r.trackPush2(c-i-1, r.textPos()-r.bump())
 749			}
 750
 751			r.advance(2)
 752			continue
 753
 754		case syntax.Setloop:
 755
 756			c := r.operand(1)
 757
 758			if c > r.forwardchars() {
 759				c = r.forwardchars()
 760			}
 761
 762			set := r.code.Sets[r.operand(0)]
 763			i := c
 764
 765			for ; i > 0; i-- {
 766				if !set.CharIn(r.forwardcharnext()) {
 767					r.backwardnext()
 768					break
 769				}
 770			}
 771
 772			if c > i {
 773				r.trackPush2(c-i-1, r.textPos()-r.bump())
 774			}
 775
 776			r.advance(2)
 777			continue
 778
 779		case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
 780
 781			r.trackPopN(2)
 782			i := r.trackPeek()
 783			pos := r.trackPeekN(1)
 784
 785			r.textto(pos)
 786
 787			if i > 0 {
 788				r.trackPush2(i-1, pos-r.bump())
 789			}
 790
 791			r.advance(2)
 792			continue
 793
 794		case syntax.Setloop | syntax.Back:
 795
 796			r.trackPopN(2)
 797			i := r.trackPeek()
 798			pos := r.trackPeekN(1)
 799
 800			r.textto(pos)
 801
 802			if i > 0 {
 803				r.trackPush2(i-1, pos-r.bump())
 804			}
 805
 806			r.advance(2)
 807			continue
 808
 809		case syntax.Onelazy, syntax.Notonelazy:
 810
 811			c := r.operand(1)
 812
 813			if c > r.forwardchars() {
 814				c = r.forwardchars()
 815			}
 816
 817			if c > 0 {
 818				r.trackPush2(c-1, r.textPos())
 819			}
 820
 821			r.advance(2)
 822			continue
 823
 824		case syntax.Setlazy:
 825
 826			c := r.operand(1)
 827
 828			if c > r.forwardchars() {
 829				c = r.forwardchars()
 830			}
 831
 832			if c > 0 {
 833				r.trackPush2(c-1, r.textPos())
 834			}
 835
 836			r.advance(2)
 837			continue
 838
 839		case syntax.Onelazy | syntax.Back:
 840
 841			r.trackPopN(2)
 842			pos := r.trackPeekN(1)
 843			r.textto(pos)
 844
 845			if r.forwardcharnext() != rune(r.operand(0)) {
 846				break
 847			}
 848
 849			i := r.trackPeek()
 850
 851			if i > 0 {
 852				r.trackPush2(i-1, pos+r.bump())
 853			}
 854
 855			r.advance(2)
 856			continue
 857
 858		case syntax.Notonelazy | syntax.Back:
 859
 860			r.trackPopN(2)
 861			pos := r.trackPeekN(1)
 862			r.textto(pos)
 863
 864			if r.forwardcharnext() == rune(r.operand(0)) {
 865				break
 866			}
 867
 868			i := r.trackPeek()
 869
 870			if i > 0 {
 871				r.trackPush2(i-1, pos+r.bump())
 872			}
 873
 874			r.advance(2)
 875			continue
 876
 877		case syntax.Setlazy | syntax.Back:
 878
 879			r.trackPopN(2)
 880			pos := r.trackPeekN(1)
 881			r.textto(pos)
 882
 883			if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
 884				break
 885			}
 886
 887			i := r.trackPeek()
 888
 889			if i > 0 {
 890				r.trackPush2(i-1, pos+r.bump())
 891			}
 892
 893			r.advance(2)
 894			continue
 895
 896		default:
 897			return errors.New("unknown state in regex runner")
 898		}
 899
 900	BreakBackward:
 901		;
 902
 903		// "break Backward" comes here:
 904		r.backtrack()
 905	}
 906}
 907
 908// increase the size of stack and track storage
 909func (r *runner) ensureStorage() {
 910	if r.runstackpos < r.runtrackcount*4 {
 911		doubleIntSlice(&r.runstack, &r.runstackpos)
 912	}
 913	if r.runtrackpos < r.runtrackcount*4 {
 914		doubleIntSlice(&r.runtrack, &r.runtrackpos)
 915	}
 916}
 917
 918func doubleIntSlice(s *[]int, pos *int) {
 919	oldLen := len(*s)
 920	newS := make([]int, oldLen*2)
 921
 922	copy(newS[oldLen:], *s)
 923	*pos += oldLen
 924	*s = newS
 925}
 926
 927// Save a number on the longjump unrolling stack
 928func (r *runner) crawl(i int) {
 929	if r.runcrawlpos == 0 {
 930		doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
 931	}
 932	r.runcrawlpos--
 933	r.runcrawl[r.runcrawlpos] = i
 934}
 935
 936// Remove a number from the longjump unrolling stack
 937func (r *runner) popcrawl() int {
 938	val := r.runcrawl[r.runcrawlpos]
 939	r.runcrawlpos++
 940	return val
 941}
 942
 943// Get the height of the stack
 944func (r *runner) crawlpos() int {
 945	return len(r.runcrawl) - r.runcrawlpos
 946}
 947
 948func (r *runner) advance(i int) {
 949	r.codepos += (i + 1)
 950	r.setOperator(r.code.Codes[r.codepos])
 951}
 952
 953func (r *runner) goTo(newpos int) {
 954	// when branching backward or in place, ensure storage
 955	if newpos <= r.codepos {
 956		r.ensureStorage()
 957	}
 958
 959	r.setOperator(r.code.Codes[newpos])
 960	r.codepos = newpos
 961}
 962
 963func (r *runner) textto(newpos int) {
 964	r.runtextpos = newpos
 965}
 966
 967func (r *runner) trackto(newpos int) {
 968	r.runtrackpos = len(r.runtrack) - newpos
 969}
 970
 971func (r *runner) textstart() int {
 972	return r.runtextstart
 973}
 974
 975func (r *runner) textPos() int {
 976	return r.runtextpos
 977}
 978
 979// push onto the backtracking stack
 980func (r *runner) trackpos() int {
 981	return len(r.runtrack) - r.runtrackpos
 982}
 983
 984func (r *runner) trackPush() {
 985	r.runtrackpos--
 986	r.runtrack[r.runtrackpos] = r.codepos
 987}
 988
 989func (r *runner) trackPush1(I1 int) {
 990	r.runtrackpos--
 991	r.runtrack[r.runtrackpos] = I1
 992	r.runtrackpos--
 993	r.runtrack[r.runtrackpos] = r.codepos
 994}
 995
 996func (r *runner) trackPush2(I1, I2 int) {
 997	r.runtrackpos--
 998	r.runtrack[r.runtrackpos] = I1
 999	r.runtrackpos--
1000	r.runtrack[r.runtrackpos] = I2
1001	r.runtrackpos--
1002	r.runtrack[r.runtrackpos] = r.codepos
1003}
1004
1005func (r *runner) trackPush3(I1, I2, I3 int) {
1006	r.runtrackpos--
1007	r.runtrack[r.runtrackpos] = I1
1008	r.runtrackpos--
1009	r.runtrack[r.runtrackpos] = I2
1010	r.runtrackpos--
1011	r.runtrack[r.runtrackpos] = I3
1012	r.runtrackpos--
1013	r.runtrack[r.runtrackpos] = r.codepos
1014}
1015
1016func (r *runner) trackPushNeg1(I1 int) {
1017	r.runtrackpos--
1018	r.runtrack[r.runtrackpos] = I1
1019	r.runtrackpos--
1020	r.runtrack[r.runtrackpos] = -r.codepos
1021}
1022
1023func (r *runner) trackPushNeg2(I1, I2 int) {
1024	r.runtrackpos--
1025	r.runtrack[r.runtrackpos] = I1
1026	r.runtrackpos--
1027	r.runtrack[r.runtrackpos] = I2
1028	r.runtrackpos--
1029	r.runtrack[r.runtrackpos] = -r.codepos
1030}
1031
1032func (r *runner) backtrack() {
1033	newpos := r.runtrack[r.runtrackpos]
1034	r.runtrackpos++
1035
1036	if r.re.Debug() {
1037		if newpos < 0 {
1038			fmt.Printf("       Backtracking (back2) to code position %v\n", -newpos)
1039		} else {
1040			fmt.Printf("       Backtracking to code position %v\n", newpos)
1041		}
1042	}
1043
1044	if newpos < 0 {
1045		newpos = -newpos
1046		r.setOperator(r.code.Codes[newpos] | syntax.Back2)
1047	} else {
1048		r.setOperator(r.code.Codes[newpos] | syntax.Back)
1049	}
1050
1051	// When branching backward, ensure storage
1052	if newpos < r.codepos {
1053		r.ensureStorage()
1054	}
1055
1056	r.codepos = newpos
1057}
1058
1059func (r *runner) setOperator(op int) {
1060	r.caseInsensitive = (0 != (op & syntax.Ci))
1061	r.rightToLeft = (0 != (op & syntax.Rtl))
1062	r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
1063}
1064
1065func (r *runner) trackPop() {
1066	r.runtrackpos++
1067}
1068
1069// pop framesize items from the backtracking stack
1070func (r *runner) trackPopN(framesize int) {
1071	r.runtrackpos += framesize
1072}
1073
1074// Technically we are actually peeking at items already popped.  So if you want to
1075// get and pop the top item from the stack, you do
1076// r.trackPop();
1077// r.trackPeek();
1078func (r *runner) trackPeek() int {
1079	return r.runtrack[r.runtrackpos-1]
1080}
1081
1082// get the ith element down on the backtracking stack
1083func (r *runner) trackPeekN(i int) int {
1084	return r.runtrack[r.runtrackpos-i-1]
1085}
1086
1087// Push onto the grouping stack
1088func (r *runner) stackPush(I1 int) {
1089	r.runstackpos--
1090	r.runstack[r.runstackpos] = I1
1091}
1092
1093func (r *runner) stackPush2(I1, I2 int) {
1094	r.runstackpos--
1095	r.runstack[r.runstackpos] = I1
1096	r.runstackpos--
1097	r.runstack[r.runstackpos] = I2
1098}
1099
1100func (r *runner) stackPop() {
1101	r.runstackpos++
1102}
1103
1104// pop framesize items from the grouping stack
1105func (r *runner) stackPopN(framesize int) {
1106	r.runstackpos += framesize
1107}
1108
1109// Technically we are actually peeking at items already popped.  So if you want to
1110// get and pop the top item from the stack, you do
1111// r.stackPop();
1112// r.stackPeek();
1113func (r *runner) stackPeek() int {
1114	return r.runstack[r.runstackpos-1]
1115}
1116
1117// get the ith element down on the grouping stack
1118func (r *runner) stackPeekN(i int) int {
1119	return r.runstack[r.runstackpos-i-1]
1120}
1121
1122func (r *runner) operand(i int) int {
1123	return r.code.Codes[r.codepos+i+1]
1124}
1125
1126func (r *runner) leftchars() int {
1127	return r.runtextpos
1128}
1129
1130func (r *runner) rightchars() int {
1131	return r.runtextend - r.runtextpos
1132}
1133
1134func (r *runner) bump() int {
1135	if r.rightToLeft {
1136		return -1
1137	}
1138	return 1
1139}
1140
1141func (r *runner) forwardchars() int {
1142	if r.rightToLeft {
1143		return r.runtextpos
1144	}
1145	return r.runtextend - r.runtextpos
1146}
1147
1148func (r *runner) forwardcharnext() rune {
1149	var ch rune
1150	if r.rightToLeft {
1151		r.runtextpos--
1152		ch = r.runtext[r.runtextpos]
1153	} else {
1154		ch = r.runtext[r.runtextpos]
1155		r.runtextpos++
1156	}
1157
1158	if r.caseInsensitive {
1159		return unicode.ToLower(ch)
1160	}
1161	return ch
1162}
1163
1164func (r *runner) runematch(str []rune) bool {
1165	var pos int
1166
1167	c := len(str)
1168	if !r.rightToLeft {
1169		if r.runtextend-r.runtextpos < c {
1170			return false
1171		}
1172
1173		pos = r.runtextpos + c
1174	} else {
1175		if r.runtextpos-0 < c {
1176			return false
1177		}
1178
1179		pos = r.runtextpos
1180	}
1181
1182	if !r.caseInsensitive {
1183		for c != 0 {
1184			c--
1185			pos--
1186			if str[c] != r.runtext[pos] {
1187				return false
1188			}
1189		}
1190	} else {
1191		for c != 0 {
1192			c--
1193			pos--
1194			if str[c] != unicode.ToLower(r.runtext[pos]) {
1195				return false
1196			}
1197		}
1198	}
1199
1200	if !r.rightToLeft {
1201		pos += len(str)
1202	}
1203
1204	r.runtextpos = pos
1205
1206	return true
1207}
1208
1209func (r *runner) refmatch(index, len int) bool {
1210	var c, pos, cmpos int
1211
1212	if !r.rightToLeft {
1213		if r.runtextend-r.runtextpos < len {
1214			return false
1215		}
1216
1217		pos = r.runtextpos + len
1218	} else {
1219		if r.runtextpos-0 < len {
1220			return false
1221		}
1222
1223		pos = r.runtextpos
1224	}
1225	cmpos = index + len
1226
1227	c = len
1228
1229	if !r.caseInsensitive {
1230		for c != 0 {
1231			c--
1232			cmpos--
1233			pos--
1234			if r.runtext[cmpos] != r.runtext[pos] {
1235				return false
1236			}
1237
1238		}
1239	} else {
1240		for c != 0 {
1241			c--
1242			cmpos--
1243			pos--
1244
1245			if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
1246				return false
1247			}
1248		}
1249	}
1250
1251	if !r.rightToLeft {
1252		pos += len
1253	}
1254
1255	r.runtextpos = pos
1256
1257	return true
1258}
1259
1260func (r *runner) backwardnext() {
1261	if r.rightToLeft {
1262		r.runtextpos++
1263	} else {
1264		r.runtextpos--
1265	}
1266}
1267
1268func (r *runner) charAt(j int) rune {
1269	return r.runtext[j]
1270}
1271
1272func (r *runner) findFirstChar() bool {
1273
1274	if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
1275		if !r.code.RightToLeft {
1276			if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
1277				(0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
1278				r.runtextpos = r.runtextend
1279				return false
1280			}
1281			if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
1282				r.runtextpos = r.runtextend - 1
1283			} else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
1284				r.runtextpos = r.runtextend
1285			}
1286		} else {
1287			if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
1288				(0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
1289					(r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
1290				(0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
1291				r.runtextpos = 0
1292				return false
1293			}
1294			if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
1295				r.runtextpos = 0
1296			}
1297		}
1298
1299		if r.code.BmPrefix != nil {
1300			return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
1301		}
1302
1303		return true // found a valid start or end anchor
1304	} else if r.code.BmPrefix != nil {
1305		r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
1306
1307		if r.runtextpos == -1 {
1308			if r.code.RightToLeft {
1309				r.runtextpos = 0
1310			} else {
1311				r.runtextpos = r.runtextend
1312			}
1313			return false
1314		}
1315
1316		return true
1317	} else if r.code.FcPrefix == nil {
1318		return true
1319	}
1320
1321	r.rightToLeft = r.code.RightToLeft
1322	r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
1323
1324	set := r.code.FcPrefix.PrefixSet
1325	if set.IsSingleton() {
1326		ch := set.SingletonChar()
1327		for i := r.forwardchars(); i > 0; i-- {
1328			if ch == r.forwardcharnext() {
1329				r.backwardnext()
1330				return true
1331			}
1332		}
1333	} else {
1334		for i := r.forwardchars(); i > 0; i-- {
1335			n := r.forwardcharnext()
1336			//fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
1337			if set.CharIn(n) {
1338				r.backwardnext()
1339				return true
1340			}
1341		}
1342	}
1343
1344	return false
1345}
1346
1347func (r *runner) initMatch() {
1348	// Use a hashtable'ed Match object if the capture numbers are sparse
1349
1350	if r.runmatch == nil {
1351		if r.re.caps != nil {
1352			r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
1353		} else {
1354			r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
1355		}
1356	} else {
1357		r.runmatch.reset(r.runtext, r.runtextstart)
1358	}
1359
1360	// note we test runcrawl, because it is the last one to be allocated
1361	// If there is an alloc failure in the middle of the three allocations,
1362	// we may still return to reuse this instance, and we want to behave
1363	// as if the allocations didn't occur. (we used to test _trackcount != 0)
1364
1365	if r.runcrawl != nil {
1366		r.runtrackpos = len(r.runtrack)
1367		r.runstackpos = len(r.runstack)
1368		r.runcrawlpos = len(r.runcrawl)
1369		return
1370	}
1371
1372	r.initTrackCount()
1373
1374	tracksize := r.runtrackcount * 8
1375	stacksize := r.runtrackcount * 8
1376
1377	if tracksize < 32 {
1378		tracksize = 32
1379	}
1380	if stacksize < 16 {
1381		stacksize = 16
1382	}
1383
1384	r.runtrack = make([]int, tracksize)
1385	r.runtrackpos = tracksize
1386
1387	r.runstack = make([]int, stacksize)
1388	r.runstackpos = stacksize
1389
1390	r.runcrawl = make([]int, 32)
1391	r.runcrawlpos = 32
1392}
1393
1394func (r *runner) tidyMatch(quick bool) *Match {
1395	if !quick {
1396		match := r.runmatch
1397
1398		r.runmatch = nil
1399
1400		match.tidy(r.runtextpos)
1401		return match
1402	} else {
1403		// send back our match -- it's not leaving the package, so it's safe to not clean it up
1404		// this reduces allocs for frequent calls to the "IsMatch" bool-only functions
1405		return r.runmatch
1406	}
1407}
1408
1409// capture captures a subexpression. Note that the
1410// capnum used here has already been mapped to a non-sparse
1411// index (by the code generator RegexWriter).
1412func (r *runner) capture(capnum, start, end int) {
1413	if end < start {
1414		T := end
1415		end = start
1416		start = T
1417	}
1418
1419	r.crawl(capnum)
1420	r.runmatch.addMatch(capnum, start, end-start)
1421}
1422
1423// transferCapture captures a subexpression. Note that the
1424// capnum used here has already been mapped to a non-sparse
1425// index (by the code generator RegexWriter).
1426func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
1427	var start2, end2 int
1428
1429	// these are the two intervals that are cancelling each other
1430
1431	if end < start {
1432		T := end
1433		end = start
1434		start = T
1435	}
1436
1437	start2 = r.runmatch.matchIndex(uncapnum)
1438	end2 = start2 + r.runmatch.matchLength(uncapnum)
1439
1440	// The new capture gets the innermost defined interval
1441
1442	if start >= end2 {
1443		end = start
1444		start = end2
1445	} else if end <= start2 {
1446		start = start2
1447	} else {
1448		if end > end2 {
1449			end = end2
1450		}
1451		if start2 > start {
1452			start = start2
1453		}
1454	}
1455
1456	r.crawl(uncapnum)
1457	r.runmatch.balanceMatch(uncapnum)
1458
1459	if capnum != -1 {
1460		r.crawl(capnum)
1461		r.runmatch.addMatch(capnum, start, end-start)
1462	}
1463}
1464
1465// revert the last capture
1466func (r *runner) uncapture() {
1467	capnum := r.popcrawl()
1468	r.runmatch.removeMatch(capnum)
1469}
1470
1471//debug
1472
1473func (r *runner) dumpState() {
1474	back := ""
1475	if r.operator&syntax.Back != 0 {
1476		back = " Back"
1477	}
1478	if r.operator&syntax.Back2 != 0 {
1479		back += " Back2"
1480	}
1481	fmt.Printf("Text:  %v\nTrack: %v\nStack: %v\n       %s%s\n\n",
1482		r.textposDescription(),
1483		r.stackDescription(r.runtrack, r.runtrackpos),
1484		r.stackDescription(r.runstack, r.runstackpos),
1485		r.code.OpcodeDescription(r.codepos),
1486		back)
1487}
1488
1489func (r *runner) stackDescription(a []int, index int) string {
1490	buf := &bytes.Buffer{}
1491
1492	fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
1493	if buf.Len() < 8 {
1494		buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1495	}
1496
1497	buf.WriteRune('(')
1498	for i := index; i < len(a); i++ {
1499		if i > index {
1500			buf.WriteRune(' ')
1501		}
1502
1503		buf.WriteString(strconv.Itoa(a[i]))
1504	}
1505
1506	buf.WriteRune(')')
1507
1508	return buf.String()
1509}
1510
1511func (r *runner) textposDescription() string {
1512	buf := &bytes.Buffer{}
1513
1514	buf.WriteString(strconv.Itoa(r.runtextpos))
1515
1516	if buf.Len() < 8 {
1517		buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1518	}
1519
1520	if r.runtextpos > 0 {
1521		buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
1522	} else {
1523		buf.WriteRune('^')
1524	}
1525
1526	buf.WriteRune('>')
1527
1528	for i := r.runtextpos; i < r.runtextend; i++ {
1529		buf.WriteString(syntax.CharDescription(r.runtext[i]))
1530	}
1531	if buf.Len() >= 64 {
1532		buf.Truncate(61)
1533		buf.WriteString("...")
1534	} else {
1535		buf.WriteRune('$')
1536	}
1537
1538	return buf.String()
1539}
1540
1541// decide whether the pos
1542// at the specified index is a boundary or not. It's just not worth
1543// emitting inline code for this logic.
1544func (r *runner) isBoundary(index, startpos, endpos int) bool {
1545	return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
1546		(index < endpos && syntax.IsWordChar(r.runtext[index]))
1547}
1548
1549func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
1550	return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
1551		(index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
1552}
1553
1554func (r *runner) startTimeoutWatch() {
1555	if r.ignoreTimeout {
1556		return
1557	}
1558	r.deadline = makeDeadline(r.timeout)
1559}
1560
1561func (r *runner) checkTimeout() error {
1562	if r.ignoreTimeout || !r.deadline.reached() {
1563		return nil
1564	}
1565
1566	if r.re.Debug() {
1567		//Debug.WriteLine("")
1568		//Debug.WriteLine("RegEx match timeout occurred!")
1569		//Debug.WriteLine("Specified timeout:       " + TimeSpan.FromMilliseconds(_timeout).ToString())
1570		//Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
1571		//Debug.WriteLine("Search pattern:          " + _runregex._pattern)
1572		//Debug.WriteLine("Input:                   " + r.runtext)
1573		//Debug.WriteLine("About to throw RegexMatchTimeoutException.")
1574	}
1575
1576	return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
1577}
1578
1579func (r *runner) initTrackCount() {
1580	r.runtrackcount = r.code.TrackCount
1581}
1582
1583// getRunner returns a run to use for matching re.
1584// It uses the re's runner cache if possible, to avoid
1585// unnecessary allocation.
1586func (re *Regexp) getRunner() *runner {
1587	re.muRun.Lock()
1588	if n := len(re.runner); n > 0 {
1589		z := re.runner[n-1]
1590		re.runner = re.runner[:n-1]
1591		re.muRun.Unlock()
1592		return z
1593	}
1594	re.muRun.Unlock()
1595	z := &runner{
1596		re:   re,
1597		code: re.code,
1598	}
1599	return z
1600}
1601
1602// putRunner returns a runner to the re's cache.
1603// There is no attempt to limit the size of the cache, so it will
1604// grow to the maximum number of simultaneous matches
1605// run using re.  (The cache empties when re gets garbage collected.)
1606func (re *Regexp) putRunner(r *runner) {
1607	re.muRun.Lock()
1608	r.runtext = nil
1609	if r.runmatch != nil {
1610		r.runmatch.text = nil
1611	}
1612	re.runner = append(re.runner, r)
1613	re.muRun.Unlock()
1614}