jbmafp - vendor/github.com/yuin/goldmark/util/util.go

Path: jbmafp / vendor / github.com / yuin / goldmark / util / util.go (raw)
  1// Package util provides utility functions for the goldmark.
  2package util
  3
  4import (
  5	"bytes"
  6	"io"
  7	"net/url"
  8	"regexp"
  9	"sort"
 10	"strconv"
 11	"unicode"
 12	"unicode/utf8"
 13)
 14
 15// A CopyOnWriteBuffer is a byte buffer that copies buffer when
 16// it need to be changed.
 17type CopyOnWriteBuffer struct {
 18	buffer []byte
 19	copied bool
 20}
 21
 22// NewCopyOnWriteBuffer returns a new CopyOnWriteBuffer.
 23func NewCopyOnWriteBuffer(buffer []byte) CopyOnWriteBuffer {
 24	return CopyOnWriteBuffer{
 25		buffer: buffer,
 26		copied: false,
 27	}
 28}
 29
 30// Write writes given bytes to the buffer.
 31// Write allocate new buffer and clears it at the first time.
 32func (b *CopyOnWriteBuffer) Write(value []byte) {
 33	if !b.copied {
 34		b.buffer = make([]byte, 0, len(b.buffer)+20)
 35		b.copied = true
 36	}
 37	b.buffer = append(b.buffer, value...)
 38}
 39
 40// WriteString writes given string to the buffer.
 41// WriteString allocate new buffer and clears it at the first time.
 42func (b *CopyOnWriteBuffer) WriteString(value string) {
 43	b.Write(StringToReadOnlyBytes(value))
 44}
 45
 46// Append appends given bytes to the buffer.
 47// Append copy buffer at the first time.
 48func (b *CopyOnWriteBuffer) Append(value []byte) {
 49	if !b.copied {
 50		tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
 51		copy(tmp, b.buffer)
 52		b.buffer = tmp
 53		b.copied = true
 54	}
 55	b.buffer = append(b.buffer, value...)
 56}
 57
 58// AppendString appends given string to the buffer.
 59// AppendString copy buffer at the first time.
 60func (b *CopyOnWriteBuffer) AppendString(value string) {
 61	b.Append(StringToReadOnlyBytes(value))
 62}
 63
 64// WriteByte writes the given byte to the buffer.
 65// WriteByte allocate new buffer and clears it at the first time.
 66func (b *CopyOnWriteBuffer) WriteByte(c byte) {
 67	if !b.copied {
 68		b.buffer = make([]byte, 0, len(b.buffer)+20)
 69		b.copied = true
 70	}
 71	b.buffer = append(b.buffer, c)
 72}
 73
 74// AppendByte appends given bytes to the buffer.
 75// AppendByte copy buffer at the first time.
 76func (b *CopyOnWriteBuffer) AppendByte(c byte) {
 77	if !b.copied {
 78		tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
 79		copy(tmp, b.buffer)
 80		b.buffer = tmp
 81		b.copied = true
 82	}
 83	b.buffer = append(b.buffer, c)
 84}
 85
 86// Bytes returns bytes of this buffer.
 87func (b *CopyOnWriteBuffer) Bytes() []byte {
 88	return b.buffer
 89}
 90
 91// IsCopied returns true if buffer has been copied, otherwise false.
 92func (b *CopyOnWriteBuffer) IsCopied() bool {
 93	return b.copied
 94}
 95
 96// IsEscapedPunctuation returns true if character at a given index i
 97// is an escaped punctuation, otherwise false.
 98func IsEscapedPunctuation(source []byte, i int) bool {
 99	return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1])
100}
101
102// ReadWhile read the given source while pred is true.
103func ReadWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) {
104	j := index[0]
105	ok := false
106	for ; j < index[1]; j++ {
107		c1 := source[j]
108		if pred(c1) {
109			ok = true
110			continue
111		}
112		break
113	}
114	return j, ok
115}
116
117// IsBlank returns true if the given string is all space characters.
118func IsBlank(bs []byte) bool {
119	for _, b := range bs {
120		if !IsSpace(b) {
121			return false
122		}
123	}
124	return true
125}
126
127// VisualizeSpaces visualize invisible space characters.
128func VisualizeSpaces(bs []byte) []byte {
129	bs = bytes.Replace(bs, []byte(" "), []byte("[SPACE]"), -1)
130	bs = bytes.Replace(bs, []byte("\t"), []byte("[TAB]"), -1)
131	bs = bytes.Replace(bs, []byte("\n"), []byte("[NEWLINE]\n"), -1)
132	bs = bytes.Replace(bs, []byte("\r"), []byte("[CR]"), -1)
133	bs = bytes.Replace(bs, []byte("\v"), []byte("[VTAB]"), -1)
134	bs = bytes.Replace(bs, []byte("\x00"), []byte("[NUL]"), -1)
135	bs = bytes.Replace(bs, []byte("\ufffd"), []byte("[U+FFFD]"), -1)
136	return bs
137}
138
139// TabWidth calculates actual width of a tab at the given position.
140func TabWidth(currentPos int) int {
141	return 4 - currentPos%4
142}
143
144// IndentPosition searches an indent position with the given width for the given line.
145// If the line contains tab characters, paddings may be not zero.
146// currentPos==0 and width==2:
147//
148//     position: 0    1
149//               [TAB]aaaa
150//     width:    1234 5678
151//
152// width=2 is in the tab character. In this case, IndentPosition returns
153// (pos=1, padding=2)
154func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
155	return IndentPositionPadding(bs, currentPos, 0, width)
156}
157
158// IndentPositionPadding searches an indent position with the given width for the given line.
159// This function is mostly same as IndentPosition except this function
160// takes account into additional paddings.
161func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
162	if width == 0 {
163		return 0, paddingv
164	}
165	w := 0
166	i := 0
167	l := len(bs)
168	for ; i < l; i++ {
169		if bs[i] == '\t' && w < width {
170			w += TabWidth(currentPos + w)
171		} else if bs[i] == ' ' && w < width {
172			w++
173		} else {
174			break
175		}
176	}
177	if w >= width {
178		return i - paddingv, w - width
179	}
180	return -1, -1
181}
182
183// DedentPosition dedents lines by the given width.
184//
185// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
186func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
187	if width == 0 {
188		return 0, 0
189	}
190	w := 0
191	l := len(bs)
192	i := 0
193	for ; i < l; i++ {
194		if bs[i] == '\t' {
195			w += TabWidth(currentPos + w)
196		} else if bs[i] == ' ' {
197			w++
198		} else {
199			break
200		}
201	}
202	if w >= width {
203		return i, w - width
204	}
205	return i, 0
206}
207
208// DedentPositionPadding dedents lines by the given width.
209// This function is mostly same as DedentPosition except this function
210// takes account into additional paddings.
211//
212// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
213func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
214	if width == 0 {
215		return 0, paddingv
216	}
217
218	w := 0
219	i := 0
220	l := len(bs)
221	for ; i < l; i++ {
222		if bs[i] == '\t' {
223			w += TabWidth(currentPos + w)
224		} else if bs[i] == ' ' {
225			w++
226		} else {
227			break
228		}
229	}
230	if w >= width {
231		return i - paddingv, w - width
232	}
233	return i - paddingv, 0
234}
235
236// IndentWidth calculate an indent width for the given line.
237func IndentWidth(bs []byte, currentPos int) (width, pos int) {
238	l := len(bs)
239	for i := 0; i < l; i++ {
240		b := bs[i]
241		if b == ' ' {
242			width++
243			pos++
244		} else if b == '\t' {
245			width += TabWidth(currentPos + width)
246			pos++
247		} else {
248			break
249		}
250	}
251	return
252}
253
254// FirstNonSpacePosition returns a position line that is a first nonspace
255// character.
256func FirstNonSpacePosition(bs []byte) int {
257	i := 0
258	for ; i < len(bs); i++ {
259		c := bs[i]
260		if c == ' ' || c == '\t' {
261			continue
262		}
263		if c == '\n' {
264			return -1
265		}
266		return i
267	}
268	return -1
269}
270
271// FindClosure returns a position that closes the given opener.
272// If codeSpan is set true, it ignores characters in code spans.
273// If allowNesting is set true, closures correspond to nested opener will be
274// ignored.
275//
276// Deprecated: This function can not handle newlines. Many elements
277// can be existed over multiple lines(e.g. link labels).
278// Use text.Reader.FindClosure.
279func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int {
280	i := 0
281	opened := 1
282	codeSpanOpener := 0
283	for i < len(bs) {
284		c := bs[i]
285		if codeSpan && codeSpanOpener != 0 && c == '`' {
286			codeSpanCloser := 0
287			for ; i < len(bs); i++ {
288				if bs[i] == '`' {
289					codeSpanCloser++
290				} else {
291					i--
292					break
293				}
294			}
295			if codeSpanCloser == codeSpanOpener {
296				codeSpanOpener = 0
297			}
298		} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && IsPunct(bs[i+1]) {
299			i += 2
300			continue
301		} else if codeSpan && codeSpanOpener == 0 && c == '`' {
302			for ; i < len(bs); i++ {
303				if bs[i] == '`' {
304					codeSpanOpener++
305				} else {
306					i--
307					break
308				}
309			}
310		} else if (codeSpan && codeSpanOpener == 0) || !codeSpan {
311			if c == closure {
312				opened--
313				if opened == 0 {
314					return i
315				}
316			} else if c == opener {
317				if !allowNesting {
318					return -1
319				}
320				opened++
321			}
322		}
323		i++
324	}
325	return -1
326}
327
328// TrimLeft trims characters in the given s from head of the source.
329// bytes.TrimLeft offers same functionalities, but bytes.TrimLeft
330// allocates new buffer for the result.
331func TrimLeft(source, b []byte) []byte {
332	i := 0
333	for ; i < len(source); i++ {
334		c := source[i]
335		found := false
336		for j := 0; j < len(b); j++ {
337			if c == b[j] {
338				found = true
339				break
340			}
341		}
342		if !found {
343			break
344		}
345	}
346	return source[i:]
347}
348
349// TrimRight trims characters in the given s from tail of the source.
350func TrimRight(source, b []byte) []byte {
351	i := len(source) - 1
352	for ; i >= 0; i-- {
353		c := source[i]
354		found := false
355		for j := 0; j < len(b); j++ {
356			if c == b[j] {
357				found = true
358				break
359			}
360		}
361		if !found {
362			break
363		}
364	}
365	return source[:i+1]
366}
367
368// TrimLeftLength returns a length of leading specified characters.
369func TrimLeftLength(source, s []byte) int {
370	return len(source) - len(TrimLeft(source, s))
371}
372
373// TrimRightLength returns a length of trailing specified characters.
374func TrimRightLength(source, s []byte) int {
375	return len(source) - len(TrimRight(source, s))
376}
377
378// TrimLeftSpaceLength returns a length of leading space characters.
379func TrimLeftSpaceLength(source []byte) int {
380	i := 0
381	for ; i < len(source); i++ {
382		if !IsSpace(source[i]) {
383			break
384		}
385	}
386	return i
387}
388
389// TrimRightSpaceLength returns a length of trailing space characters.
390func TrimRightSpaceLength(source []byte) int {
391	l := len(source)
392	i := l - 1
393	for ; i >= 0; i-- {
394		if !IsSpace(source[i]) {
395			break
396		}
397	}
398	if i < 0 {
399		return l
400	}
401	return l - 1 - i
402}
403
404// TrimLeftSpace returns a subslice of the given string by slicing off all leading
405// space characters.
406func TrimLeftSpace(source []byte) []byte {
407	return TrimLeft(source, spaces)
408}
409
410// TrimRightSpace returns a subslice of the given string by slicing off all trailing
411// space characters.
412func TrimRightSpace(source []byte) []byte {
413	return TrimRight(source, spaces)
414}
415
416// DoFullUnicodeCaseFolding performs full unicode case folding to given bytes.
417func DoFullUnicodeCaseFolding(v []byte) []byte {
418	var rbuf []byte
419	cob := NewCopyOnWriteBuffer(v)
420	n := 0
421	for i := 0; i < len(v); i++ {
422		c := v[i]
423		if c < 0xb5 {
424			if c >= 0x41 && c <= 0x5a {
425				// A-Z to a-z
426				cob.Write(v[n:i])
427				cob.WriteByte(c + 32)
428				n = i + 1
429			}
430			continue
431		}
432
433		if !utf8.RuneStart(c) {
434			continue
435		}
436		r, length := utf8.DecodeRune(v[i:])
437		if r == utf8.RuneError {
438			continue
439		}
440		folded, ok := unicodeCaseFoldings[r]
441		if !ok {
442			continue
443		}
444
445		cob.Write(v[n:i])
446		if rbuf == nil {
447			rbuf = make([]byte, 4)
448		}
449		for _, f := range folded {
450			l := utf8.EncodeRune(rbuf, f)
451			cob.Write(rbuf[:l])
452		}
453		i += length - 1
454		n = i + 1
455	}
456	if cob.IsCopied() {
457		cob.Write(v[n:])
458	}
459	return cob.Bytes()
460}
461
462// ReplaceSpaces replaces sequence of spaces with the given repl.
463func ReplaceSpaces(source []byte, repl byte) []byte {
464	var ret []byte
465	start := -1
466	for i, c := range source {
467		iss := IsSpace(c)
468		if start < 0 && iss {
469			start = i
470			continue
471		} else if start >= 0 && iss {
472			continue
473		} else if start >= 0 {
474			if ret == nil {
475				ret = make([]byte, 0, len(source))
476				ret = append(ret, source[:start]...)
477			}
478			ret = append(ret, repl)
479			start = -1
480		}
481		if ret != nil {
482			ret = append(ret, c)
483		}
484	}
485	if start >= 0 && ret != nil {
486		ret = append(ret, repl)
487	}
488	if ret == nil {
489		return source
490	}
491	return ret
492}
493
494// ToRune decode given bytes start at pos and returns a rune.
495func ToRune(source []byte, pos int) rune {
496	i := pos
497	for ; i >= 0; i-- {
498		if utf8.RuneStart(source[i]) {
499			break
500		}
501	}
502	r, _ := utf8.DecodeRune(source[i:])
503	return r
504}
505
506// ToValidRune returns 0xFFFD if the given rune is invalid, otherwise v.
507func ToValidRune(v rune) rune {
508	if v == 0 || !utf8.ValidRune(v) {
509		return rune(0xFFFD)
510	}
511	return v
512}
513
514// ToLinkReference converts given bytes into a valid link reference string.
515// ToLinkReference performs unicode case folding, trims leading and trailing spaces,  converts into lower
516// case and replace spaces with a single space character.
517func ToLinkReference(v []byte) string {
518	v = TrimLeftSpace(v)
519	v = TrimRightSpace(v)
520	v = DoFullUnicodeCaseFolding(v)
521	return string(ReplaceSpaces(v, ' '))
522}
523
524var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&quot;"), nil, nil, nil, []byte("&amp;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&lt;"), nil, []byte("&gt;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil}
525
526// EscapeHTMLByte returns HTML escaped bytes if the given byte should be escaped,
527// otherwise nil.
528func EscapeHTMLByte(b byte) []byte {
529	return htmlEscapeTable[b]
530}
531
532// EscapeHTML escapes characters that should be escaped in HTML text.
533func EscapeHTML(v []byte) []byte {
534	cob := NewCopyOnWriteBuffer(v)
535	n := 0
536	for i := 0; i < len(v); i++ {
537		c := v[i]
538		escaped := htmlEscapeTable[c]
539		if escaped != nil {
540			cob.Write(v[n:i])
541			cob.Write(escaped)
542			n = i + 1
543		}
544	}
545	if cob.IsCopied() {
546		cob.Write(v[n:])
547	}
548	return cob.Bytes()
549}
550
551// UnescapePunctuations unescapes blackslash escaped punctuations.
552func UnescapePunctuations(source []byte) []byte {
553	cob := NewCopyOnWriteBuffer(source)
554	limit := len(source)
555	n := 0
556	for i := 0; i < limit; {
557		c := source[i]
558		if i < limit-1 && c == '\\' && IsPunct(source[i+1]) {
559			cob.Write(source[n:i])
560			cob.WriteByte(source[i+1])
561			i += 2
562			n = i
563			continue
564		}
565		i++
566	}
567	if cob.IsCopied() {
568		cob.Write(source[n:])
569	}
570	return cob.Bytes()
571}
572
573// ResolveNumericReferences resolve numeric references like '&#1234;" .
574func ResolveNumericReferences(source []byte) []byte {
575	cob := NewCopyOnWriteBuffer(source)
576	buf := make([]byte, 6, 6)
577	limit := len(source)
578	ok := false
579	n := 0
580	for i := 0; i < limit; i++ {
581		if source[i] == '&' {
582			pos := i
583			next := i + 1
584			if next < limit && source[next] == '#' {
585				nnext := next + 1
586				if nnext < limit {
587					nc := source[nnext]
588					// code point like #x22;
589					if nnext < limit && nc == 'x' || nc == 'X' {
590						start := nnext + 1
591						i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal)
592						if ok && i < limit && source[i] == ';' {
593							v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32)
594							cob.Write(source[n:pos])
595							n = i + 1
596							runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
597							cob.Write(buf[:runeSize])
598							continue
599						}
600						// code point like #1234;
601					} else if nc >= '0' && nc <= '9' {
602						start := nnext
603						i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric)
604						if ok && i < limit && i-start < 8 && source[i] == ';' {
605							v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32)
606							cob.Write(source[n:pos])
607							n = i + 1
608							runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
609							cob.Write(buf[:runeSize])
610							continue
611						}
612					}
613				}
614			}
615			i = next - 1
616		}
617	}
618	if cob.IsCopied() {
619		cob.Write(source[n:])
620	}
621	return cob.Bytes()
622}
623
624// ResolveEntityNames resolve entity references like '&ouml;" .
625func ResolveEntityNames(source []byte) []byte {
626	cob := NewCopyOnWriteBuffer(source)
627	limit := len(source)
628	ok := false
629	n := 0
630	for i := 0; i < limit; i++ {
631		if source[i] == '&' {
632			pos := i
633			next := i + 1
634			if !(next < limit && source[next] == '#') {
635				start := next
636				i, ok = ReadWhile(source, [2]int{start, limit}, IsAlphaNumeric)
637				if ok && i < limit && source[i] == ';' {
638					name := BytesToReadOnlyString(source[start:i])
639					entity, ok := LookUpHTML5EntityByName(name)
640					if ok {
641						cob.Write(source[n:pos])
642						n = i + 1
643						cob.Write(entity.Characters)
644						continue
645					}
646				}
647			}
648			i = next - 1
649		}
650	}
651	if cob.IsCopied() {
652		cob.Write(source[n:])
653	}
654	return cob.Bytes()
655}
656
657var htmlSpace = []byte("%20")
658
659// URLEscape escape the given URL.
660// If resolveReference is set true:
661//   1. unescape punctuations
662//   2. resolve numeric references
663//   3. resolve entity references
664//
665// URL encoded values (%xx) are kept as is.
666func URLEscape(v []byte, resolveReference bool) []byte {
667	if resolveReference {
668		v = UnescapePunctuations(v)
669		v = ResolveNumericReferences(v)
670		v = ResolveEntityNames(v)
671	}
672	cob := NewCopyOnWriteBuffer(v)
673	limit := len(v)
674	n := 0
675
676	for i := 0; i < limit; {
677		c := v[i]
678		if urlEscapeTable[c] == 1 {
679			i++
680			continue
681		}
682		if c == '%' && i+2 < limit && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) {
683			i += 3
684			continue
685		}
686		u8len := utf8lenTable[c]
687		if u8len == 99 { // invalid utf8 leading byte, skip it
688			i++
689			continue
690		}
691		if c == ' ' {
692			cob.Write(v[n:i])
693			cob.Write(htmlSpace)
694			i++
695			n = i
696			continue
697		}
698		if int(u8len) > len(v) {
699			u8len = int8(len(v) - 1)
700		}
701		if u8len == 0 {
702			i++
703			n = i
704			continue
705		}
706		cob.Write(v[n:i])
707		stop := i + int(u8len)
708		if stop > len(v) {
709			i++
710			n = i
711			continue
712		}
713		cob.Write(StringToReadOnlyBytes(url.QueryEscape(string(v[i:stop]))))
714		i += int(u8len)
715		n = i
716	}
717	if cob.IsCopied() && n < limit {
718		cob.Write(v[n:])
719	}
720	return cob.Bytes()
721}
722
723// FindURLIndex returns a stop index value if the given bytes seem an URL.
724// This function is equivalent to [A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]* .
725func FindURLIndex(b []byte) int {
726	i := 0
727	if !(len(b) > 0 && urlTable[b[i]]&7 == 7) {
728		return -1
729	}
730	i++
731	for ; i < len(b); i++ {
732		c := b[i]
733		if urlTable[c]&4 != 4 {
734			break
735		}
736	}
737	if i == 1 || i > 33 || i >= len(b) {
738		return -1
739	}
740	if b[i] != ':' {
741		return -1
742	}
743	i++
744	for ; i < len(b); i++ {
745		c := b[i]
746		if urlTable[c]&1 != 1 {
747			break
748		}
749	}
750	return i
751}
752
753var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`)
754
755// FindEmailIndex returns a stop index value if the given bytes seem an email address.
756func FindEmailIndex(b []byte) int {
757	// TODO: eliminate regexps
758	i := 0
759	for ; i < len(b); i++ {
760		c := b[i]
761		if emailTable[c]&1 != 1 {
762			break
763		}
764	}
765	if i == 0 {
766		return -1
767	}
768	if i >= len(b) || b[i] != '@' {
769		return -1
770	}
771	i++
772	if i >= len(b) {
773		return -1
774	}
775	match := emailDomainRegexp.FindSubmatchIndex(b[i:])
776	if match == nil {
777		return -1
778	}
779	return i + match[1]
780}
781
782var spaces = []byte(" \t\n\x0b\x0c\x0d")
783
784var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
785
786var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
787
788// a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()#
789var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
790
791var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99}
792
793var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
794
795var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
796
797// UTF8Len returns a byte length of the utf-8 character.
798func UTF8Len(b byte) int8 {
799	return utf8lenTable[b]
800}
801
802// IsPunct returns true if the given character is a punctuation, otherwise false.
803func IsPunct(c byte) bool {
804	return punctTable[c] == 1
805}
806
807// IsPunctRune returns true if the given rune is a punctuation, otherwise false.
808func IsPunctRune(r rune) bool {
809	return int32(r) <= 256 && IsPunct(byte(r)) || unicode.IsPunct(r)
810}
811
812// IsSpace returns true if the given character is a space, otherwise false.
813func IsSpace(c byte) bool {
814	return spaceTable[c] == 1
815}
816
817// IsSpaceRune returns true if the given rune is a space, otherwise false.
818func IsSpaceRune(r rune) bool {
819	return int32(r) <= 256 && IsSpace(byte(r)) || unicode.IsSpace(r)
820}
821
822// IsNumeric returns true if the given character is a numeric, otherwise false.
823func IsNumeric(c byte) bool {
824	return c >= '0' && c <= '9'
825}
826
827// IsHexDecimal returns true if the given character is a hexdecimal, otherwise false.
828func IsHexDecimal(c byte) bool {
829	return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
830}
831
832// IsAlphaNumeric returns true if the given character is a alphabet or a numeric, otherwise false.
833func IsAlphaNumeric(c byte) bool {
834	return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
835}
836
837// IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false.
838func IsEastAsianWideRune(r rune) bool {
839	return unicode.Is(unicode.Hiragana, r) ||
840		unicode.Is(unicode.Katakana, r) ||
841		unicode.Is(unicode.Han, r) ||
842		unicode.Is(unicode.Lm, r) ||
843		unicode.Is(unicode.Hangul, r)
844}
845
846// A BufWriter is a subset of the bufio.Writer .
847type BufWriter interface {
848	io.Writer
849	Available() int
850	Buffered() int
851	Flush() error
852	WriteByte(c byte) error
853	WriteRune(r rune) (size int, err error)
854	WriteString(s string) (int, error)
855}
856
857// A PrioritizedValue struct holds pair of an arbitrary value and a priority.
858type PrioritizedValue struct {
859	// Value is an arbitrary value that you want to prioritize.
860	Value interface{}
861	// Priority is a priority of the value.
862	Priority int
863}
864
865// PrioritizedSlice is a slice of the PrioritizedValues
866type PrioritizedSlice []PrioritizedValue
867
868// Sort sorts the PrioritizedSlice in ascending order.
869func (s PrioritizedSlice) Sort() {
870	sort.Slice(s, func(i, j int) bool {
871		return s[i].Priority < s[j].Priority
872	})
873}
874
875// Remove removes the given value from this slice.
876func (s PrioritizedSlice) Remove(v interface{}) PrioritizedSlice {
877	i := 0
878	found := false
879	for ; i < len(s); i++ {
880		if s[i].Value == v {
881			found = true
882			break
883		}
884	}
885	if !found {
886		return s
887	}
888	return append(s[:i], s[i+1:]...)
889}
890
891// Prioritized returns a new PrioritizedValue.
892func Prioritized(v interface{}, priority int) PrioritizedValue {
893	return PrioritizedValue{v, priority}
894}
895
896func bytesHash(b []byte) uint64 {
897	var hash uint64 = 5381
898	for _, c := range b {
899		hash = ((hash << 5) + hash) + uint64(c)
900	}
901	return hash
902}
903
904// BytesFilter is a efficient data structure for checking whether bytes exist or not.
905// BytesFilter is thread-safe.
906type BytesFilter interface {
907	// Add adds given bytes to this set.
908	Add([]byte)
909
910	// Contains return true if this set contains given bytes, otherwise false.
911	Contains([]byte) bool
912
913	// Extend copies this filter and adds given bytes to new filter.
914	Extend(...[]byte) BytesFilter
915}
916
917type bytesFilter struct {
918	chars     [256]uint8
919	threshold int
920	slots     [][][]byte
921}
922
923// NewBytesFilter returns a new BytesFilter.
924func NewBytesFilter(elements ...[]byte) BytesFilter {
925	s := &bytesFilter{
926		threshold: 3,
927		slots:     make([][][]byte, 64),
928	}
929	for _, element := range elements {
930		s.Add(element)
931	}
932	return s
933}
934
935func (s *bytesFilter) Add(b []byte) {
936	l := len(b)
937	m := s.threshold
938	if l < s.threshold {
939		m = l
940	}
941	for i := 0; i < m; i++ {
942		s.chars[b[i]] |= 1 << uint8(i)
943	}
944	h := bytesHash(b) % uint64(len(s.slots))
945	slot := s.slots[h]
946	if slot == nil {
947		slot = [][]byte{}
948	}
949	s.slots[h] = append(slot, b)
950}
951
952func (s *bytesFilter) Extend(bs ...[]byte) BytesFilter {
953	newFilter := NewBytesFilter().(*bytesFilter)
954	newFilter.chars = s.chars
955	newFilter.threshold = s.threshold
956	for k, v := range s.slots {
957		newSlot := make([][]byte, len(v))
958		copy(newSlot, v)
959		newFilter.slots[k] = v
960	}
961	for _, b := range bs {
962		newFilter.Add(b)
963	}
964	return newFilter
965}
966
967func (s *bytesFilter) Contains(b []byte) bool {
968	l := len(b)
969	m := s.threshold
970	if l < s.threshold {
971		m = l
972	}
973	for i := 0; i < m; i++ {
974		if (s.chars[b[i]] & (1 << uint8(i))) == 0 {
975			return false
976		}
977	}
978	h := bytesHash(b) % uint64(len(s.slots))
979	slot := s.slots[h]
980	if slot == nil || len(slot) == 0 {
981		return false
982	}
983	for _, element := range slot {
984		if bytes.Equal(element, b) {
985			return true
986		}
987	}
988	return false
989}