1// Package util provides utility functions for the goldmark.
   2package util
   3
   4import (
   5	"bytes"
   6	"io"
   7	"net/url"
   8	"regexp"
   9	"slices"
  10	"sort"
  11	"strconv"
  12	"unicode"
  13	"unicode/utf8"
  14)
  15
  16// A CopyOnWriteBuffer is a byte buffer that copies buffer when
  17// it need to be changed.
  18type CopyOnWriteBuffer struct {
  19	buffer []byte
  20	copied bool
  21}
  22
  23// NewCopyOnWriteBuffer returns a new CopyOnWriteBuffer.
  24func NewCopyOnWriteBuffer(buffer []byte) CopyOnWriteBuffer {
  25	return CopyOnWriteBuffer{
  26		buffer: buffer,
  27		copied: false,
  28	}
  29}
  30
  31// Write writes given bytes to the buffer.
  32// Write allocate new buffer and clears it at the first time.
  33func (b *CopyOnWriteBuffer) Write(value []byte) {
  34	if !b.copied {
  35		b.buffer = make([]byte, 0, len(b.buffer)+20)
  36		b.copied = true
  37	}
  38	b.buffer = append(b.buffer, value...)
  39}
  40
  41// WriteString writes given string to the buffer.
  42// WriteString allocate new buffer and clears it at the first time.
  43func (b *CopyOnWriteBuffer) WriteString(value string) {
  44	b.Write(StringToReadOnlyBytes(value))
  45}
  46
  47// Append appends given bytes to the buffer.
  48// Append copy buffer at the first time.
  49func (b *CopyOnWriteBuffer) Append(value []byte) {
  50	if !b.copied {
  51		tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
  52		copy(tmp, b.buffer)
  53		b.buffer = tmp
  54		b.copied = true
  55	}
  56	b.buffer = append(b.buffer, value...)
  57}
  58
  59// AppendString appends given string to the buffer.
  60// AppendString copy buffer at the first time.
  61func (b *CopyOnWriteBuffer) AppendString(value string) {
  62	b.Append(StringToReadOnlyBytes(value))
  63}
  64
  65// WriteByte writes the given byte to the buffer.
  66// WriteByte allocate new buffer and clears it at the first time.
  67func (b *CopyOnWriteBuffer) WriteByte(c byte) error {
  68	if !b.copied {
  69		b.buffer = make([]byte, 0, len(b.buffer)+20)
  70		b.copied = true
  71	}
  72	b.buffer = append(b.buffer, c)
  73	return nil
  74}
  75
  76// AppendByte appends given bytes to the buffer.
  77// AppendByte copy buffer at the first time.
  78func (b *CopyOnWriteBuffer) AppendByte(c byte) {
  79	if !b.copied {
  80		tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
  81		copy(tmp, b.buffer)
  82		b.buffer = tmp
  83		b.copied = true
  84	}
  85	b.buffer = append(b.buffer, c)
  86}
  87
  88// Bytes returns bytes of this buffer.
  89func (b *CopyOnWriteBuffer) Bytes() []byte {
  90	return b.buffer
  91}
  92
  93// IsCopied returns true if buffer has been copied, otherwise false.
  94func (b *CopyOnWriteBuffer) IsCopied() bool {
  95	return b.copied
  96}
  97
  98// IsEscapedPunctuation returns true if character at a given index i
  99// is an escaped punctuation, otherwise false.
 100func IsEscapedPunctuation(source []byte, i int) bool {
 101	return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1])
 102}
 103
 104// ReadWhile read the given source while pred is true.
 105func ReadWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) {
 106	j := index[0]
 107	ok := false
 108	for ; j < index[1]; j++ {
 109		c1 := source[j]
 110		if pred(c1) {
 111			ok = true
 112			continue
 113		}
 114		break
 115	}
 116	return j, ok
 117}
 118
 119// IsBlank returns true if the given string is all space characters.
 120func IsBlank(bs []byte) bool {
 121	for _, b := range bs {
 122		if !IsSpace(b) {
 123			return false
 124		}
 125	}
 126	return true
 127}
 128
 129// VisualizeSpaces visualize invisible space characters.
 130func VisualizeSpaces(bs []byte) []byte {
 131	bs = bytes.ReplaceAll(bs, []byte(" "), []byte("[SPACE]"))
 132	bs = bytes.ReplaceAll(bs, []byte("\t"), []byte("[TAB]"))
 133	bs = bytes.ReplaceAll(bs, []byte("\n"), []byte("[NEWLINE]\n"))
 134	bs = bytes.ReplaceAll(bs, []byte("\r"), []byte("[CR]"))
 135	bs = bytes.ReplaceAll(bs, []byte("\v"), []byte("[VTAB]"))
 136	bs = bytes.ReplaceAll(bs, []byte("\x00"), []byte("[NUL]"))
 137	bs = bytes.ReplaceAll(bs, []byte("\ufffd"), []byte("[U+FFFD]"))
 138	return bs
 139}
 140
 141// TabWidth calculates actual width of a tab at the given position.
 142func TabWidth(currentPos int) int {
 143	return 4 - currentPos%4
 144}
 145
 146// IndentPosition searches an indent position with the given width for the given line.
 147// If the line contains tab characters, paddings may be not zero.
 148// currentPos==0 and width==2:
 149//
 150//	position: 0    1
 151//	          [TAB]aaaa
 152//	width:    1234 5678
 153//
 154// width=2 is in the tab character. In this case, IndentPosition returns
 155// (pos=1, padding=2).
 156func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
 157	return IndentPositionPadding(bs, currentPos, 0, width)
 158}
 159
 160// IndentPositionPadding searches an indent position with the given width for the given line.
 161// This function is mostly same as IndentPosition except this function
 162// takes account into additional paddings.
 163func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
 164	if width == 0 {
 165		return 0, paddingv
 166	}
 167	w := 0
 168	i := 0
 169	l := len(bs)
 170	p := paddingv
 171	for ; i < l; i++ {
 172		if p > 0 {
 173			p--
 174			w++
 175			continue
 176		}
 177		if bs[i] == '\t' && w < width {
 178			w += TabWidth(currentPos + w)
 179		} else if bs[i] == ' ' && w < width {
 180			w++
 181		} else {
 182			break
 183		}
 184	}
 185	if w >= width {
 186		return i - paddingv, w - width
 187	}
 188	return -1, -1
 189}
 190
 191// DedentPosition dedents lines by the given width.
 192//
 193// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
 194func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
 195	if width == 0 {
 196		return 0, 0
 197	}
 198	w := 0
 199	l := len(bs)
 200	i := 0
 201loop:
 202	for ; i < l; i++ {
 203		switch bs[i] {
 204		case '\t':
 205			w += TabWidth(currentPos + w)
 206		case ' ':
 207			w++
 208		default:
 209			break loop
 210		}
 211	}
 212	if w >= width {
 213		return i, w - width
 214	}
 215	return i, 0
 216}
 217
 218// DedentPositionPadding dedents lines by the given width.
 219// This function is mostly same as DedentPosition except this function
 220// takes account into additional paddings.
 221//
 222// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
 223func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
 224	if width == 0 {
 225		return 0, paddingv
 226	}
 227
 228	w := 0
 229	i := 0
 230	l := len(bs)
 231loop:
 232	for ; i < l; i++ {
 233		switch bs[i] {
 234		case '\t':
 235			w += TabWidth(currentPos + w)
 236		case ' ':
 237			w++
 238		default:
 239			break loop
 240		}
 241	}
 242	if w >= width {
 243		return i - paddingv, w - width
 244	}
 245	return i - paddingv, 0
 246}
 247
 248// IndentWidth calculate an indent width for the given line.
 249func IndentWidth(bs []byte, currentPos int) (width, pos int) {
 250	for i := range len(bs) {
 251		switch bs[i] {
 252		case ' ':
 253			width++
 254			pos++
 255		case '\t':
 256			width += TabWidth(currentPos + width)
 257			pos++
 258		default:
 259			return
 260		}
 261	}
 262	return
 263}
 264
 265// FirstNonSpacePosition returns a position line that is a first nonspace
 266// character.
 267func FirstNonSpacePosition(bs []byte) int {
 268	i := 0
 269	for ; i < len(bs); i++ {
 270		c := bs[i]
 271		if c == ' ' || c == '\t' {
 272			continue
 273		}
 274		if c == '\n' {
 275			return -1
 276		}
 277		return i
 278	}
 279	return -1
 280}
 281
 282// FindClosure returns a position that closes the given opener.
 283// If codeSpan is set true, it ignores characters in code spans.
 284// If allowNesting is set true, closures correspond to nested opener will be
 285// ignored.
 286//
 287// Deprecated: This function can not handle newlines. Many elements
 288// can be existed over multiple lines(e.g. link labels).
 289// Use text.Reader.FindClosure.
 290func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int {
 291	i := 0
 292	opened := 1
 293	codeSpanOpener := 0
 294	for i < len(bs) {
 295		c := bs[i]
 296		if codeSpan && codeSpanOpener != 0 && c == '`' {
 297			codeSpanCloser := 0
 298			for ; i < len(bs); i++ {
 299				if bs[i] == '`' {
 300					codeSpanCloser++
 301				} else {
 302					i--
 303					break
 304				}
 305			}
 306			if codeSpanCloser == codeSpanOpener {
 307				codeSpanOpener = 0
 308			}
 309		} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && IsPunct(bs[i+1]) {
 310			i += 2
 311			continue
 312		} else if codeSpan && codeSpanOpener == 0 && c == '`' {
 313			for ; i < len(bs); i++ {
 314				if bs[i] == '`' {
 315					codeSpanOpener++
 316				} else {
 317					i--
 318					break
 319				}
 320			}
 321		} else if (codeSpan && codeSpanOpener == 0) || !codeSpan {
 322			switch c {
 323			case closure:
 324				opened--
 325				if opened == 0 {
 326					return i
 327				}
 328			case opener:
 329				if !allowNesting {
 330					return -1
 331				}
 332				opened++
 333			}
 334		}
 335		i++
 336	}
 337	return -1
 338}
 339
 340// TrimLeft trims characters in the given s from head of the source.
 341// bytes.TrimLeft offers same functionalities, but bytes.TrimLeft
 342// allocates new buffer for the result.
 343func TrimLeft(source, b []byte) []byte {
 344	i := 0
 345	for ; i < len(source); i++ {
 346		c := source[i]
 347		found := false
 348		for j := range len(b) {
 349			if c == b[j] {
 350				found = true
 351				break
 352			}
 353		}
 354		if !found {
 355			break
 356		}
 357	}
 358	return source[i:]
 359}
 360
 361// TrimRight trims characters in the given s from tail of the source.
 362func TrimRight(source, b []byte) []byte {
 363	i := len(source) - 1
 364	for ; i >= 0; i-- {
 365		c := source[i]
 366		found := false
 367		for j := range len(b) {
 368			if c == b[j] {
 369				found = true
 370				break
 371			}
 372		}
 373		if !found {
 374			break
 375		}
 376	}
 377	return source[:i+1]
 378}
 379
 380// TrimLeftLength returns a length of leading specified characters.
 381func TrimLeftLength(source, s []byte) int {
 382	return len(source) - len(TrimLeft(source, s))
 383}
 384
 385// TrimRightLength returns a length of trailing specified characters.
 386func TrimRightLength(source, s []byte) int {
 387	return len(source) - len(TrimRight(source, s))
 388}
 389
 390// TrimLeftSpaceLength returns a length of leading space characters.
 391func TrimLeftSpaceLength(source []byte) int {
 392	i := 0
 393	for ; i < len(source); i++ {
 394		if !IsSpace(source[i]) {
 395			break
 396		}
 397	}
 398	return i
 399}
 400
 401// TrimRightSpaceLength returns a length of trailing space characters.
 402func TrimRightSpaceLength(source []byte) int {
 403	l := len(source)
 404	i := l - 1
 405	for ; i >= 0; i-- {
 406		if !IsSpace(source[i]) {
 407			break
 408		}
 409	}
 410	if i < 0 {
 411		return l
 412	}
 413	return l - 1 - i
 414}
 415
 416// TrimLeftSpace returns a subslice of the given string by slicing off all leading
 417// space characters.
 418func TrimLeftSpace(source []byte) []byte {
 419	return TrimLeft(source, spaces)
 420}
 421
 422// TrimRightSpace returns a subslice of the given string by slicing off all trailing
 423// space characters.
 424func TrimRightSpace(source []byte) []byte {
 425	return TrimRight(source, spaces)
 426}
 427
 428// DoFullUnicodeCaseFolding performs full unicode case folding to given bytes.
 429func DoFullUnicodeCaseFolding(v []byte) []byte {
 430	var rbuf []byte
 431	cob := NewCopyOnWriteBuffer(v)
 432	n := 0
 433	for i := 0; i < len(v); i++ {
 434		c := v[i]
 435		if c < 0xb5 {
 436			if c >= 0x41 && c <= 0x5a {
 437				// A-Z to a-z
 438				cob.Write(v[n:i])
 439				_ = cob.WriteByte(c + 32)
 440				n = i + 1
 441			}
 442			continue
 443		}
 444
 445		if !utf8.RuneStart(c) {
 446			continue
 447		}
 448		r, length := utf8.DecodeRune(v[i:])
 449		if r == utf8.RuneError {
 450			continue
 451		}
 452		folded, ok := unicodeCaseFoldings[r]
 453		if !ok {
 454			continue
 455		}
 456
 457		cob.Write(v[n:i])
 458		if rbuf == nil {
 459			rbuf = make([]byte, 4)
 460		}
 461		for _, f := range folded {
 462			l := utf8.EncodeRune(rbuf, f)
 463			cob.Write(rbuf[:l])
 464		}
 465		i += length - 1
 466		n = i + 1
 467	}
 468	if cob.IsCopied() {
 469		cob.Write(v[n:])
 470	}
 471	return cob.Bytes()
 472}
 473
 474// ReplaceSpaces replaces sequence of spaces with the given repl.
 475func ReplaceSpaces(source []byte, repl byte) []byte {
 476	var ret []byte
 477	start := -1
 478	for i, c := range source {
 479		iss := IsSpace(c)
 480		if start < 0 && iss {
 481			start = i
 482			continue
 483		} else if start >= 0 && iss {
 484			continue
 485		} else if start >= 0 {
 486			if ret == nil {
 487				ret = make([]byte, 0, len(source))
 488				ret = append(ret, source[:start]...)
 489			}
 490			ret = append(ret, repl)
 491			start = -1
 492		}
 493		if ret != nil {
 494			ret = append(ret, c)
 495		}
 496	}
 497	if start >= 0 && ret != nil {
 498		ret = append(ret, repl)
 499	}
 500	if ret == nil {
 501		return source
 502	}
 503	return ret
 504}
 505
 506// ToRune decode given bytes start at pos and returns a rune.
 507func ToRune(source []byte, pos int) rune {
 508	i := pos
 509	for ; i >= 0; i-- {
 510		if utf8.RuneStart(source[i]) {
 511			break
 512		}
 513	}
 514	r, _ := utf8.DecodeRune(source[i:])
 515	return r
 516}
 517
 518// ToValidRune returns 0xFFFD if the given rune is invalid, otherwise v.
 519func ToValidRune(v rune) rune {
 520	if v == 0 || !utf8.ValidRune(v) {
 521		return rune(0xFFFD)
 522	}
 523	return v
 524}
 525
 526// ToLinkReference converts given bytes into a valid link reference string.
 527// ToLinkReference performs unicode case folding, trims leading and trailing spaces,  converts into lower
 528// case and replace spaces with a single space character.
 529func ToLinkReference(v []byte) string {
 530	v = TrimLeftSpace(v)
 531	v = TrimRightSpace(v)
 532	v = DoFullUnicodeCaseFolding(v)
 533	return string(ReplaceSpaces(v, ' '))
 534}
 535
 536var htmlQuote = []byte("&quot;")
 537var htmlAmp = []byte("&amp;")
 538var htmlLess = []byte("&lt;")
 539var htmlGreater = []byte("&gt;")
 540var htmlNull = []byte("\ufffd")
 541
 542var htmlEscapeTable = [256]*[]byte{&htmlNull, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, &htmlQuote, nil, nil, nil, &htmlAmp, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, &htmlLess, nil, &htmlGreater, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} //nolint:golint,lll
 543
 544// EscapeHTMLByte returns HTML escaped bytes if the given byte should be escaped,
 545// otherwise nil.
 546func EscapeHTMLByte(b byte) []byte {
 547	v := htmlEscapeTable[b]
 548	if v != nil {
 549		return *v
 550	}
 551	return nil
 552}
 553
 554// EscapeHTML escapes characters that should be escaped in HTML text.
 555func EscapeHTML(v []byte) []byte {
 556	cob := NewCopyOnWriteBuffer(v)
 557	n := 0
 558	for i := range len(v) {
 559		c := v[i]
 560		escaped := htmlEscapeTable[c]
 561		if escaped != nil {
 562			cob.Write(v[n:i])
 563			cob.Write(*escaped)
 564			n = i + 1
 565		}
 566	}
 567	if cob.IsCopied() {
 568		cob.Write(v[n:])
 569	}
 570	return cob.Bytes()
 571}
 572
 573// UnescapePunctuations unescapes blackslash escaped punctuations.
 574func UnescapePunctuations(source []byte) []byte {
 575	cob := NewCopyOnWriteBuffer(source)
 576	limit := len(source)
 577	n := 0
 578	for i := 0; i < limit; {
 579		c := source[i]
 580		if i < limit-1 && c == '\\' && IsPunct(source[i+1]) {
 581			cob.Write(source[n:i])
 582			_ = cob.WriteByte(source[i+1])
 583			i += 2
 584			n = i
 585			continue
 586		}
 587		i++
 588	}
 589	if cob.IsCopied() {
 590		cob.Write(source[n:])
 591	}
 592	return cob.Bytes()
 593}
 594
 595// ResolveNumericReferences resolve numeric references like '&#1234;" .
 596func ResolveNumericReferences(source []byte) []byte {
 597	cob := NewCopyOnWriteBuffer(source)
 598	buf := make([]byte, 6)
 599	limit := len(source)
 600	var ok bool
 601	n := 0
 602	for i := 0; i < limit; i++ {
 603		if source[i] == '&' {
 604			pos := i
 605			next := i + 1
 606			if next < limit && source[next] == '#' {
 607				nnext := next + 1
 608				if nnext < limit {
 609					nc := source[nnext]
 610					// code point like #x22;
 611					if nnext < limit && nc == 'x' || nc == 'X' {
 612						start := nnext + 1
 613						i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal)
 614						if ok && i < limit && source[i] == ';' {
 615							v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32)
 616							cob.Write(source[n:pos])
 617							n = i + 1
 618							runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
 619							cob.Write(buf[:runeSize])
 620							continue
 621						}
 622						// code point like #1234;
 623					} else if nc >= '0' && nc <= '9' {
 624						start := nnext
 625						i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric)
 626						if ok && i < limit && i-start < 8 && source[i] == ';' {
 627							v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32)
 628							cob.Write(source[n:pos])
 629							n = i + 1
 630							runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
 631							cob.Write(buf[:runeSize])
 632							continue
 633						}
 634					}
 635				}
 636			}
 637			i = next - 1
 638		}
 639	}
 640	if cob.IsCopied() {
 641		cob.Write(source[n:])
 642	}
 643	return cob.Bytes()
 644}
 645
 646// ResolveEntityNames resolve entity references like '&ouml;" .
 647func ResolveEntityNames(source []byte) []byte {
 648	cob := NewCopyOnWriteBuffer(source)
 649	limit := len(source)
 650	var ok bool
 651	n := 0
 652	for i := 0; i < limit; i++ {
 653		if source[i] == '&' {
 654			pos := i
 655			next := i + 1
 656			if !(next < limit && source[next] == '#') {
 657				start := next
 658				i, ok = ReadWhile(source, [2]int{start, limit}, IsAlphaNumeric)
 659				if ok && i < limit && source[i] == ';' {
 660					name := BytesToReadOnlyString(source[start:i])
 661					entity, ok := LookUpHTML5EntityByName(name)
 662					if ok {
 663						cob.Write(source[n:pos])
 664						n = i + 1
 665						cob.Write(entity.Characters)
 666						continue
 667					}
 668				}
 669			}
 670			i = next - 1
 671		}
 672	}
 673	if cob.IsCopied() {
 674		cob.Write(source[n:])
 675	}
 676	return cob.Bytes()
 677}
 678
 679var htmlSpace = []byte("%20")
 680
 681// URLEscape escape the given URL.
 682// If resolveReference is set true:
 683//  1. unescape punctuations
 684//  2. resolve numeric references
 685//  3. resolve entity references
 686//
 687// URL encoded values (%xx) are kept as is.
 688func URLEscape(v []byte, resolveReference bool) []byte {
 689	if resolveReference {
 690		v = UnescapePunctuations(v)
 691		v = ResolveNumericReferences(v)
 692		v = ResolveEntityNames(v)
 693	}
 694	cob := NewCopyOnWriteBuffer(v)
 695	limit := len(v)
 696	n := 0
 697
 698	for i := 0; i < limit; {
 699		c := v[i]
 700		if urlEscapeTable[c] == 1 {
 701			i++
 702			continue
 703		}
 704		if c == '%' && i+2 < limit && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) {
 705			i += 3
 706			continue
 707		}
 708		u8len := utf8lenTable[c]
 709		if u8len == 99 { // invalid utf8 leading byte, skip it
 710			i++
 711			continue
 712		}
 713		if c == ' ' {
 714			cob.Write(v[n:i])
 715			cob.Write(htmlSpace)
 716			i++
 717			n = i
 718			continue
 719		}
 720		if int(u8len) > len(v) {
 721			u8len = int8(len(v) - 1)
 722		}
 723		if u8len == 0 {
 724			i++
 725			n = i
 726			continue
 727		}
 728		cob.Write(v[n:i])
 729		stop := i + int(u8len)
 730		if stop > len(v) {
 731			i++
 732			n = i
 733			continue
 734		}
 735		cob.Write(StringToReadOnlyBytes(url.QueryEscape(string(v[i:stop]))))
 736		i += int(u8len)
 737		n = i
 738	}
 739	if cob.IsCopied() && n < limit {
 740		cob.Write(v[n:])
 741	}
 742	return cob.Bytes()
 743}
 744
 745// FindURLIndex returns a stop index value if the given bytes seem an URL.
 746// This function is equivalent to [A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]* .
 747func FindURLIndex(b []byte) int {
 748	i := 0
 749	if !(len(b) > 0 && urlTable[b[i]]&7 == 7) {
 750		return -1
 751	}
 752	i++
 753	for ; i < len(b); i++ {
 754		c := b[i]
 755		if urlTable[c]&4 != 4 {
 756			break
 757		}
 758	}
 759	if i == 1 || i > 33 || i >= len(b) {
 760		return -1
 761	}
 762	if b[i] != ':' {
 763		return -1
 764	}
 765	i++
 766	for ; i < len(b); i++ {
 767		c := b[i]
 768		if urlTable[c]&1 != 1 {
 769			break
 770		}
 771	}
 772	return i
 773}
 774
 775var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`) //nolint:golint,lll
 776
 777// FindEmailIndex returns a stop index value if the given bytes seem an email address.
 778func FindEmailIndex(b []byte) int {
 779	// TODO: eliminate regexps
 780	i := 0
 781	for ; i < len(b); i++ {
 782		c := b[i]
 783		if emailTable[c]&1 != 1 {
 784			break
 785		}
 786	}
 787	if i == 0 {
 788		return -1
 789	}
 790	if i >= len(b) || b[i] != '@' {
 791		return -1
 792	}
 793	i++
 794	if i >= len(b) {
 795		return -1
 796	}
 797	match := emailDomainRegexp.FindSubmatchIndex(b[i:])
 798	if match == nil {
 799		return -1
 800	}
 801	return i + match[1]
 802}
 803
 804var spaces = []byte(" \t\n\x0b\x0c\x0d")
 805
 806var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
 807
 808var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
 809
 810// a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()#
 811
 812var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
 813
 814var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99} //nolint:golint,lll
 815
 816var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} //nolint:golint,lll
 817
 818var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
 819
 820// UTF8Len returns a byte length of the utf-8 character.
 821func UTF8Len(b byte) int8 {
 822	return utf8lenTable[b]
 823}
 824
 825// IsPunct returns true if the given character is a punctuation, otherwise false.
 826func IsPunct(c byte) bool {
 827	return punctTable[c] == 1
 828}
 829
 830// IsPunctRune returns true if the given rune is a punctuation, otherwise false.
 831func IsPunctRune(r rune) bool {
 832	return unicode.IsSymbol(r) || unicode.IsPunct(r)
 833}
 834
 835// IsSpace returns true if the given character is a space, otherwise false.
 836func IsSpace(c byte) bool {
 837	return spaceTable[c] == 1
 838}
 839
 840// IsSpaceRune returns true if the given rune is a space, otherwise false.
 841func IsSpaceRune(r rune) bool {
 842	return int32(r) <= 256 && IsSpace(byte(r)) || unicode.IsSpace(r)
 843}
 844
 845// IsNumeric returns true if the given character is a numeric, otherwise false.
 846func IsNumeric(c byte) bool {
 847	return c >= '0' && c <= '9'
 848}
 849
 850// IsHexDecimal returns true if the given character is a hexdecimal, otherwise false.
 851func IsHexDecimal(c byte) bool {
 852	return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
 853}
 854
 855// IsAlphaNumeric returns true if the given character is a alphabet or a numeric, otherwise false.
 856func IsAlphaNumeric(c byte) bool {
 857	return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
 858}
 859
 860// A BufWriter is a subset of the bufio.Writer .
 861type BufWriter interface {
 862	io.Writer
 863	Available() int
 864	Buffered() int
 865	Flush() error
 866	WriteByte(c byte) error
 867	WriteRune(r rune) (size int, err error)
 868	WriteString(s string) (int, error)
 869}
 870
 871// A PrioritizedValue struct holds pair of an arbitrary value and a priority.
 872type PrioritizedValue struct {
 873	// Value is an arbitrary value that you want to prioritize.
 874	Value any
 875	// Priority is a priority of the value.
 876	Priority int
 877}
 878
 879// PrioritizedSlice is a slice of the PrioritizedValues.
 880type PrioritizedSlice []PrioritizedValue
 881
 882// Sort sorts the PrioritizedSlice in ascending order.
 883func (s PrioritizedSlice) Sort() {
 884	sort.Slice(s, func(i, j int) bool {
 885		return s[i].Priority < s[j].Priority
 886	})
 887}
 888
 889// Remove removes the given value from this slice.
 890func (s PrioritizedSlice) Remove(v any) PrioritizedSlice {
 891	i := 0
 892	found := false
 893	for ; i < len(s); i++ {
 894		if s[i].Value == v {
 895			found = true
 896			break
 897		}
 898	}
 899	if !found {
 900		return s
 901	}
 902	return slices.Delete(s, i, i+1)
 903}
 904
 905// Prioritized returns a new PrioritizedValue.
 906func Prioritized(v any, priority int) PrioritizedValue {
 907	return PrioritizedValue{v, priority}
 908}
 909
 910func bytesHash(b []byte) uint64 {
 911	var hash uint64 = 5381
 912	for _, c := range b {
 913		hash = ((hash << 5) + hash) + uint64(c)
 914	}
 915	return hash
 916}
 917
 918// BytesFilter is a efficient data structure for checking whether bytes exist or not.
 919// BytesFilter is thread-safe.
 920type BytesFilter interface {
 921	// Add adds given bytes to this set.
 922	Add([]byte)
 923
 924	// Contains return true if this set contains given bytes, otherwise false.
 925	Contains([]byte) bool
 926
 927	// Extend copies this filter and adds given bytes to new filter.
 928	Extend(...[]byte) BytesFilter
 929
 930	// ExtendString copies this filter and adds given bytes to new filter.
 931	// Given string must be separated by a comma.
 932	ExtendString(string) BytesFilter
 933}
 934
 935type bytesFilter struct {
 936	chars     [256]uint8
 937	threshold int
 938	slots     [][][]byte
 939}
 940
 941// NewBytesFilter returns a new BytesFilter.
 942func NewBytesFilter(elements ...[]byte) BytesFilter {
 943	s := &bytesFilter{
 944		threshold: 3,
 945		slots:     make([][][]byte, 64),
 946	}
 947	for _, element := range elements {
 948		s.Add(element)
 949	}
 950	return s
 951}
 952
 953// NewBytesFilterString returns a new BytesFilter.
 954// Given string must be separated by a comma.
 955func NewBytesFilterString(elements string) BytesFilter {
 956	s := &bytesFilter{
 957		threshold: 3,
 958		slots:     make([][][]byte, 64),
 959	}
 960	start := 0
 961	for i := range len(elements) {
 962		if elements[i] == ',' {
 963			s.Add(StringToReadOnlyBytes(elements[start:i]))
 964			start = i + 1
 965		}
 966	}
 967	if start < len(elements) {
 968		s.Add(StringToReadOnlyBytes(elements[start:]))
 969	}
 970	return s
 971
 972}
 973
 974func (s *bytesFilter) Add(b []byte) {
 975	l := len(b)
 976	m := min(l, s.threshold)
 977	for i := range m {
 978		s.chars[b[i]] |= 1 << uint8(i)
 979	}
 980	h := bytesHash(b) % uint64(len(s.slots))
 981	slot := s.slots[h]
 982	if slot == nil {
 983		slot = [][]byte{}
 984	}
 985	s.slots[h] = append(slot, b)
 986}
 987
 988func (s *bytesFilter) Extend(bs ...[]byte) BytesFilter {
 989	newFilter := NewBytesFilter().(*bytesFilter)
 990	newFilter.chars = s.chars
 991	newFilter.threshold = s.threshold
 992	for k, v := range s.slots {
 993		newSlot := make([][]byte, len(v))
 994		copy(newSlot, v)
 995		newFilter.slots[k] = v
 996	}
 997	for _, b := range bs {
 998		newFilter.Add(b)
 999	}
1000	return newFilter
1001}
1002
1003func (s *bytesFilter) ExtendString(elements string) BytesFilter {
1004	newFilter := NewBytesFilter().(*bytesFilter)
1005	newFilter.chars = s.chars
1006	newFilter.threshold = s.threshold
1007	for k, v := range s.slots {
1008		newSlot := make([][]byte, len(v))
1009		copy(newSlot, v)
1010		newFilter.slots[k] = v
1011	}
1012	start := 0
1013	for i := range len(elements) {
1014		if elements[i] == ',' {
1015			newFilter.Add(StringToReadOnlyBytes(elements[start:i]))
1016			start = i + 1
1017		}
1018	}
1019	if start < len(elements) {
1020		newFilter.Add(StringToReadOnlyBytes(elements[start:]))
1021	}
1022	return newFilter
1023}
1024
1025func (s *bytesFilter) Contains(b []byte) bool {
1026	l := len(b)
1027	m := min(l, s.threshold)
1028	for i := range m {
1029		if (s.chars[b[i]] & (1 << uint8(i))) == 0 {
1030			return false
1031		}
1032	}
1033	h := bytesHash(b) % uint64(len(s.slots))
1034	slot := s.slots[h]
1035	if len(slot) == 0 {
1036		return false
1037	}
1038	for _, element := range slot {
1039		if bytes.Equal(element, b) {
1040			return true
1041		}
1042	}
1043	return false
1044}