1// Package util provides utility functions for the goldmark.
2package util
3
4import (
5 "bytes"
6 "io"
7 "net/url"
8 "regexp"
9 "sort"
10 "strconv"
11 "unicode"
12 "unicode/utf8"
13)
14
15// A CopyOnWriteBuffer is a byte buffer that copies buffer when
16// it need to be changed.
17type CopyOnWriteBuffer struct {
18 buffer []byte
19 copied bool
20}
21
22// NewCopyOnWriteBuffer returns a new CopyOnWriteBuffer.
23func NewCopyOnWriteBuffer(buffer []byte) CopyOnWriteBuffer {
24 return CopyOnWriteBuffer{
25 buffer: buffer,
26 copied: false,
27 }
28}
29
30// Write writes given bytes to the buffer.
31// Write allocate new buffer and clears it at the first time.
32func (b *CopyOnWriteBuffer) Write(value []byte) {
33 if !b.copied {
34 b.buffer = make([]byte, 0, len(b.buffer)+20)
35 b.copied = true
36 }
37 b.buffer = append(b.buffer, value...)
38}
39
40// WriteString writes given string to the buffer.
41// WriteString allocate new buffer and clears it at the first time.
42func (b *CopyOnWriteBuffer) WriteString(value string) {
43 b.Write(StringToReadOnlyBytes(value))
44}
45
46// Append appends given bytes to the buffer.
47// Append copy buffer at the first time.
48func (b *CopyOnWriteBuffer) Append(value []byte) {
49 if !b.copied {
50 tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
51 copy(tmp, b.buffer)
52 b.buffer = tmp
53 b.copied = true
54 }
55 b.buffer = append(b.buffer, value...)
56}
57
58// AppendString appends given string to the buffer.
59// AppendString copy buffer at the first time.
60func (b *CopyOnWriteBuffer) AppendString(value string) {
61 b.Append(StringToReadOnlyBytes(value))
62}
63
64// WriteByte writes the given byte to the buffer.
65// WriteByte allocate new buffer and clears it at the first time.
66func (b *CopyOnWriteBuffer) WriteByte(c byte) {
67 if !b.copied {
68 b.buffer = make([]byte, 0, len(b.buffer)+20)
69 b.copied = true
70 }
71 b.buffer = append(b.buffer, c)
72}
73
74// AppendByte appends given bytes to the buffer.
75// AppendByte copy buffer at the first time.
76func (b *CopyOnWriteBuffer) AppendByte(c byte) {
77 if !b.copied {
78 tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
79 copy(tmp, b.buffer)
80 b.buffer = tmp
81 b.copied = true
82 }
83 b.buffer = append(b.buffer, c)
84}
85
86// Bytes returns bytes of this buffer.
87func (b *CopyOnWriteBuffer) Bytes() []byte {
88 return b.buffer
89}
90
91// IsCopied returns true if buffer has been copied, otherwise false.
92func (b *CopyOnWriteBuffer) IsCopied() bool {
93 return b.copied
94}
95
96// IsEscapedPunctuation returns true if character at a given index i
97// is an escaped punctuation, otherwise false.
98func IsEscapedPunctuation(source []byte, i int) bool {
99 return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1])
100}
101
102// ReadWhile read the given source while pred is true.
103func ReadWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) {
104 j := index[0]
105 ok := false
106 for ; j < index[1]; j++ {
107 c1 := source[j]
108 if pred(c1) {
109 ok = true
110 continue
111 }
112 break
113 }
114 return j, ok
115}
116
117// IsBlank returns true if the given string is all space characters.
118func IsBlank(bs []byte) bool {
119 for _, b := range bs {
120 if !IsSpace(b) {
121 return false
122 }
123 }
124 return true
125}
126
127// VisualizeSpaces visualize invisible space characters.
128func VisualizeSpaces(bs []byte) []byte {
129 bs = bytes.Replace(bs, []byte(" "), []byte("[SPACE]"), -1)
130 bs = bytes.Replace(bs, []byte("\t"), []byte("[TAB]"), -1)
131 bs = bytes.Replace(bs, []byte("\n"), []byte("[NEWLINE]\n"), -1)
132 bs = bytes.Replace(bs, []byte("\r"), []byte("[CR]"), -1)
133 bs = bytes.Replace(bs, []byte("\v"), []byte("[VTAB]"), -1)
134 bs = bytes.Replace(bs, []byte("\x00"), []byte("[NUL]"), -1)
135 bs = bytes.Replace(bs, []byte("\ufffd"), []byte("[U+FFFD]"), -1)
136 return bs
137}
138
139// TabWidth calculates actual width of a tab at the given position.
140func TabWidth(currentPos int) int {
141 return 4 - currentPos%4
142}
143
144// IndentPosition searches an indent position with the given width for the given line.
145// If the line contains tab characters, paddings may be not zero.
146// currentPos==0 and width==2:
147//
148// position: 0 1
149// [TAB]aaaa
150// width: 1234 5678
151//
152// width=2 is in the tab character. In this case, IndentPosition returns
153// (pos=1, padding=2)
154func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
155 return IndentPositionPadding(bs, currentPos, 0, width)
156}
157
158// IndentPositionPadding searches an indent position with the given width for the given line.
159// This function is mostly same as IndentPosition except this function
160// takes account into additional paddings.
161func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
162 if width == 0 {
163 return 0, paddingv
164 }
165 w := 0
166 i := 0
167 l := len(bs)
168 for ; i < l; i++ {
169 if bs[i] == '\t' && w < width {
170 w += TabWidth(currentPos + w)
171 } else if bs[i] == ' ' && w < width {
172 w++
173 } else {
174 break
175 }
176 }
177 if w >= width {
178 return i - paddingv, w - width
179 }
180 return -1, -1
181}
182
183// DedentPosition dedents lines by the given width.
184//
185// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
186func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
187 if width == 0 {
188 return 0, 0
189 }
190 w := 0
191 l := len(bs)
192 i := 0
193 for ; i < l; i++ {
194 if bs[i] == '\t' {
195 w += TabWidth(currentPos + w)
196 } else if bs[i] == ' ' {
197 w++
198 } else {
199 break
200 }
201 }
202 if w >= width {
203 return i, w - width
204 }
205 return i, 0
206}
207
208// DedentPositionPadding dedents lines by the given width.
209// This function is mostly same as DedentPosition except this function
210// takes account into additional paddings.
211//
212// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
213func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
214 if width == 0 {
215 return 0, paddingv
216 }
217
218 w := 0
219 i := 0
220 l := len(bs)
221 for ; i < l; i++ {
222 if bs[i] == '\t' {
223 w += TabWidth(currentPos + w)
224 } else if bs[i] == ' ' {
225 w++
226 } else {
227 break
228 }
229 }
230 if w >= width {
231 return i - paddingv, w - width
232 }
233 return i - paddingv, 0
234}
235
236// IndentWidth calculate an indent width for the given line.
237func IndentWidth(bs []byte, currentPos int) (width, pos int) {
238 l := len(bs)
239 for i := 0; i < l; i++ {
240 b := bs[i]
241 if b == ' ' {
242 width++
243 pos++
244 } else if b == '\t' {
245 width += TabWidth(currentPos + width)
246 pos++
247 } else {
248 break
249 }
250 }
251 return
252}
253
254// FirstNonSpacePosition returns a position line that is a first nonspace
255// character.
256func FirstNonSpacePosition(bs []byte) int {
257 i := 0
258 for ; i < len(bs); i++ {
259 c := bs[i]
260 if c == ' ' || c == '\t' {
261 continue
262 }
263 if c == '\n' {
264 return -1
265 }
266 return i
267 }
268 return -1
269}
270
271// FindClosure returns a position that closes the given opener.
272// If codeSpan is set true, it ignores characters in code spans.
273// If allowNesting is set true, closures correspond to nested opener will be
274// ignored.
275//
276// Deprecated: This function can not handle newlines. Many elements
277// can be existed over multiple lines(e.g. link labels).
278// Use text.Reader.FindClosure.
279func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int {
280 i := 0
281 opened := 1
282 codeSpanOpener := 0
283 for i < len(bs) {
284 c := bs[i]
285 if codeSpan && codeSpanOpener != 0 && c == '`' {
286 codeSpanCloser := 0
287 for ; i < len(bs); i++ {
288 if bs[i] == '`' {
289 codeSpanCloser++
290 } else {
291 i--
292 break
293 }
294 }
295 if codeSpanCloser == codeSpanOpener {
296 codeSpanOpener = 0
297 }
298 } else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && IsPunct(bs[i+1]) {
299 i += 2
300 continue
301 } else if codeSpan && codeSpanOpener == 0 && c == '`' {
302 for ; i < len(bs); i++ {
303 if bs[i] == '`' {
304 codeSpanOpener++
305 } else {
306 i--
307 break
308 }
309 }
310 } else if (codeSpan && codeSpanOpener == 0) || !codeSpan {
311 if c == closure {
312 opened--
313 if opened == 0 {
314 return i
315 }
316 } else if c == opener {
317 if !allowNesting {
318 return -1
319 }
320 opened++
321 }
322 }
323 i++
324 }
325 return -1
326}
327
328// TrimLeft trims characters in the given s from head of the source.
329// bytes.TrimLeft offers same functionalities, but bytes.TrimLeft
330// allocates new buffer for the result.
331func TrimLeft(source, b []byte) []byte {
332 i := 0
333 for ; i < len(source); i++ {
334 c := source[i]
335 found := false
336 for j := 0; j < len(b); j++ {
337 if c == b[j] {
338 found = true
339 break
340 }
341 }
342 if !found {
343 break
344 }
345 }
346 return source[i:]
347}
348
349// TrimRight trims characters in the given s from tail of the source.
350func TrimRight(source, b []byte) []byte {
351 i := len(source) - 1
352 for ; i >= 0; i-- {
353 c := source[i]
354 found := false
355 for j := 0; j < len(b); j++ {
356 if c == b[j] {
357 found = true
358 break
359 }
360 }
361 if !found {
362 break
363 }
364 }
365 return source[:i+1]
366}
367
368// TrimLeftLength returns a length of leading specified characters.
369func TrimLeftLength(source, s []byte) int {
370 return len(source) - len(TrimLeft(source, s))
371}
372
373// TrimRightLength returns a length of trailing specified characters.
374func TrimRightLength(source, s []byte) int {
375 return len(source) - len(TrimRight(source, s))
376}
377
378// TrimLeftSpaceLength returns a length of leading space characters.
379func TrimLeftSpaceLength(source []byte) int {
380 i := 0
381 for ; i < len(source); i++ {
382 if !IsSpace(source[i]) {
383 break
384 }
385 }
386 return i
387}
388
389// TrimRightSpaceLength returns a length of trailing space characters.
390func TrimRightSpaceLength(source []byte) int {
391 l := len(source)
392 i := l - 1
393 for ; i >= 0; i-- {
394 if !IsSpace(source[i]) {
395 break
396 }
397 }
398 if i < 0 {
399 return l
400 }
401 return l - 1 - i
402}
403
404// TrimLeftSpace returns a subslice of the given string by slicing off all leading
405// space characters.
406func TrimLeftSpace(source []byte) []byte {
407 return TrimLeft(source, spaces)
408}
409
410// TrimRightSpace returns a subslice of the given string by slicing off all trailing
411// space characters.
412func TrimRightSpace(source []byte) []byte {
413 return TrimRight(source, spaces)
414}
415
416// DoFullUnicodeCaseFolding performs full unicode case folding to given bytes.
417func DoFullUnicodeCaseFolding(v []byte) []byte {
418 var rbuf []byte
419 cob := NewCopyOnWriteBuffer(v)
420 n := 0
421 for i := 0; i < len(v); i++ {
422 c := v[i]
423 if c < 0xb5 {
424 if c >= 0x41 && c <= 0x5a {
425 // A-Z to a-z
426 cob.Write(v[n:i])
427 cob.WriteByte(c + 32)
428 n = i + 1
429 }
430 continue
431 }
432
433 if !utf8.RuneStart(c) {
434 continue
435 }
436 r, length := utf8.DecodeRune(v[i:])
437 if r == utf8.RuneError {
438 continue
439 }
440 folded, ok := unicodeCaseFoldings[r]
441 if !ok {
442 continue
443 }
444
445 cob.Write(v[n:i])
446 if rbuf == nil {
447 rbuf = make([]byte, 4)
448 }
449 for _, f := range folded {
450 l := utf8.EncodeRune(rbuf, f)
451 cob.Write(rbuf[:l])
452 }
453 i += length - 1
454 n = i + 1
455 }
456 if cob.IsCopied() {
457 cob.Write(v[n:])
458 }
459 return cob.Bytes()
460}
461
462// ReplaceSpaces replaces sequence of spaces with the given repl.
463func ReplaceSpaces(source []byte, repl byte) []byte {
464 var ret []byte
465 start := -1
466 for i, c := range source {
467 iss := IsSpace(c)
468 if start < 0 && iss {
469 start = i
470 continue
471 } else if start >= 0 && iss {
472 continue
473 } else if start >= 0 {
474 if ret == nil {
475 ret = make([]byte, 0, len(source))
476 ret = append(ret, source[:start]...)
477 }
478 ret = append(ret, repl)
479 start = -1
480 }
481 if ret != nil {
482 ret = append(ret, c)
483 }
484 }
485 if start >= 0 && ret != nil {
486 ret = append(ret, repl)
487 }
488 if ret == nil {
489 return source
490 }
491 return ret
492}
493
494// ToRune decode given bytes start at pos and returns a rune.
495func ToRune(source []byte, pos int) rune {
496 i := pos
497 for ; i >= 0; i-- {
498 if utf8.RuneStart(source[i]) {
499 break
500 }
501 }
502 r, _ := utf8.DecodeRune(source[i:])
503 return r
504}
505
506// ToValidRune returns 0xFFFD if the given rune is invalid, otherwise v.
507func ToValidRune(v rune) rune {
508 if v == 0 || !utf8.ValidRune(v) {
509 return rune(0xFFFD)
510 }
511 return v
512}
513
514// ToLinkReference converts given bytes into a valid link reference string.
515// ToLinkReference performs unicode case folding, trims leading and trailing spaces, converts into lower
516// case and replace spaces with a single space character.
517func ToLinkReference(v []byte) string {
518 v = TrimLeftSpace(v)
519 v = TrimRightSpace(v)
520 v = DoFullUnicodeCaseFolding(v)
521 return string(ReplaceSpaces(v, ' '))
522}
523
524var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("""), nil, nil, nil, []byte("&"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("<"), nil, []byte(">"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil}
525
526// EscapeHTMLByte returns HTML escaped bytes if the given byte should be escaped,
527// otherwise nil.
528func EscapeHTMLByte(b byte) []byte {
529 return htmlEscapeTable[b]
530}
531
532// EscapeHTML escapes characters that should be escaped in HTML text.
533func EscapeHTML(v []byte) []byte {
534 cob := NewCopyOnWriteBuffer(v)
535 n := 0
536 for i := 0; i < len(v); i++ {
537 c := v[i]
538 escaped := htmlEscapeTable[c]
539 if escaped != nil {
540 cob.Write(v[n:i])
541 cob.Write(escaped)
542 n = i + 1
543 }
544 }
545 if cob.IsCopied() {
546 cob.Write(v[n:])
547 }
548 return cob.Bytes()
549}
550
551// UnescapePunctuations unescapes blackslash escaped punctuations.
552func UnescapePunctuations(source []byte) []byte {
553 cob := NewCopyOnWriteBuffer(source)
554 limit := len(source)
555 n := 0
556 for i := 0; i < limit; {
557 c := source[i]
558 if i < limit-1 && c == '\\' && IsPunct(source[i+1]) {
559 cob.Write(source[n:i])
560 cob.WriteByte(source[i+1])
561 i += 2
562 n = i
563 continue
564 }
565 i++
566 }
567 if cob.IsCopied() {
568 cob.Write(source[n:])
569 }
570 return cob.Bytes()
571}
572
573// ResolveNumericReferences resolve numeric references like 'Ӓ" .
574func ResolveNumericReferences(source []byte) []byte {
575 cob := NewCopyOnWriteBuffer(source)
576 buf := make([]byte, 6, 6)
577 limit := len(source)
578 ok := false
579 n := 0
580 for i := 0; i < limit; i++ {
581 if source[i] == '&' {
582 pos := i
583 next := i + 1
584 if next < limit && source[next] == '#' {
585 nnext := next + 1
586 if nnext < limit {
587 nc := source[nnext]
588 // code point like #x22;
589 if nnext < limit && nc == 'x' || nc == 'X' {
590 start := nnext + 1
591 i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal)
592 if ok && i < limit && source[i] == ';' {
593 v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32)
594 cob.Write(source[n:pos])
595 n = i + 1
596 runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
597 cob.Write(buf[:runeSize])
598 continue
599 }
600 // code point like #1234;
601 } else if nc >= '0' && nc <= '9' {
602 start := nnext
603 i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric)
604 if ok && i < limit && i-start < 8 && source[i] == ';' {
605 v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32)
606 cob.Write(source[n:pos])
607 n = i + 1
608 runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
609 cob.Write(buf[:runeSize])
610 continue
611 }
612 }
613 }
614 }
615 i = next - 1
616 }
617 }
618 if cob.IsCopied() {
619 cob.Write(source[n:])
620 }
621 return cob.Bytes()
622}
623
624// ResolveEntityNames resolve entity references like 'ö" .
625func ResolveEntityNames(source []byte) []byte {
626 cob := NewCopyOnWriteBuffer(source)
627 limit := len(source)
628 ok := false
629 n := 0
630 for i := 0; i < limit; i++ {
631 if source[i] == '&' {
632 pos := i
633 next := i + 1
634 if !(next < limit && source[next] == '#') {
635 start := next
636 i, ok = ReadWhile(source, [2]int{start, limit}, IsAlphaNumeric)
637 if ok && i < limit && source[i] == ';' {
638 name := BytesToReadOnlyString(source[start:i])
639 entity, ok := LookUpHTML5EntityByName(name)
640 if ok {
641 cob.Write(source[n:pos])
642 n = i + 1
643 cob.Write(entity.Characters)
644 continue
645 }
646 }
647 }
648 i = next - 1
649 }
650 }
651 if cob.IsCopied() {
652 cob.Write(source[n:])
653 }
654 return cob.Bytes()
655}
656
657var htmlSpace = []byte("%20")
658
659// URLEscape escape the given URL.
660// If resolveReference is set true:
661// 1. unescape punctuations
662// 2. resolve numeric references
663// 3. resolve entity references
664//
665// URL encoded values (%xx) are kept as is.
666func URLEscape(v []byte, resolveReference bool) []byte {
667 if resolveReference {
668 v = UnescapePunctuations(v)
669 v = ResolveNumericReferences(v)
670 v = ResolveEntityNames(v)
671 }
672 cob := NewCopyOnWriteBuffer(v)
673 limit := len(v)
674 n := 0
675
676 for i := 0; i < limit; {
677 c := v[i]
678 if urlEscapeTable[c] == 1 {
679 i++
680 continue
681 }
682 if c == '%' && i+2 < limit && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) {
683 i += 3
684 continue
685 }
686 u8len := utf8lenTable[c]
687 if u8len == 99 { // invalid utf8 leading byte, skip it
688 i++
689 continue
690 }
691 if c == ' ' {
692 cob.Write(v[n:i])
693 cob.Write(htmlSpace)
694 i++
695 n = i
696 continue
697 }
698 if int(u8len) > len(v) {
699 u8len = int8(len(v) - 1)
700 }
701 if u8len == 0 {
702 i++
703 n = i
704 continue
705 }
706 cob.Write(v[n:i])
707 stop := i + int(u8len)
708 if stop > len(v) {
709 i++
710 n = i
711 continue
712 }
713 cob.Write(StringToReadOnlyBytes(url.QueryEscape(string(v[i:stop]))))
714 i += int(u8len)
715 n = i
716 }
717 if cob.IsCopied() && n < limit {
718 cob.Write(v[n:])
719 }
720 return cob.Bytes()
721}
722
723// FindURLIndex returns a stop index value if the given bytes seem an URL.
724// This function is equivalent to [A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]* .
725func FindURLIndex(b []byte) int {
726 i := 0
727 if !(len(b) > 0 && urlTable[b[i]]&7 == 7) {
728 return -1
729 }
730 i++
731 for ; i < len(b); i++ {
732 c := b[i]
733 if urlTable[c]&4 != 4 {
734 break
735 }
736 }
737 if i == 1 || i > 33 || i >= len(b) {
738 return -1
739 }
740 if b[i] != ':' {
741 return -1
742 }
743 i++
744 for ; i < len(b); i++ {
745 c := b[i]
746 if urlTable[c]&1 != 1 {
747 break
748 }
749 }
750 return i
751}
752
753var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`)
754
755// FindEmailIndex returns a stop index value if the given bytes seem an email address.
756func FindEmailIndex(b []byte) int {
757 // TODO: eliminate regexps
758 i := 0
759 for ; i < len(b); i++ {
760 c := b[i]
761 if emailTable[c]&1 != 1 {
762 break
763 }
764 }
765 if i == 0 {
766 return -1
767 }
768 if i >= len(b) || b[i] != '@' {
769 return -1
770 }
771 i++
772 if i >= len(b) {
773 return -1
774 }
775 match := emailDomainRegexp.FindSubmatchIndex(b[i:])
776 if match == nil {
777 return -1
778 }
779 return i + match[1]
780}
781
782var spaces = []byte(" \t\n\x0b\x0c\x0d")
783
784var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
785
786var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
787
788// a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()#
789var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
790
791var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99}
792
793var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
794
795var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
796
797// UTF8Len returns a byte length of the utf-8 character.
798func UTF8Len(b byte) int8 {
799 return utf8lenTable[b]
800}
801
802// IsPunct returns true if the given character is a punctuation, otherwise false.
803func IsPunct(c byte) bool {
804 return punctTable[c] == 1
805}
806
807// IsPunctRune returns true if the given rune is a punctuation, otherwise false.
808func IsPunctRune(r rune) bool {
809 return int32(r) <= 256 && IsPunct(byte(r)) || unicode.IsPunct(r)
810}
811
812// IsSpace returns true if the given character is a space, otherwise false.
813func IsSpace(c byte) bool {
814 return spaceTable[c] == 1
815}
816
817// IsSpaceRune returns true if the given rune is a space, otherwise false.
818func IsSpaceRune(r rune) bool {
819 return int32(r) <= 256 && IsSpace(byte(r)) || unicode.IsSpace(r)
820}
821
822// IsNumeric returns true if the given character is a numeric, otherwise false.
823func IsNumeric(c byte) bool {
824 return c >= '0' && c <= '9'
825}
826
827// IsHexDecimal returns true if the given character is a hexdecimal, otherwise false.
828func IsHexDecimal(c byte) bool {
829 return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
830}
831
832// IsAlphaNumeric returns true if the given character is a alphabet or a numeric, otherwise false.
833func IsAlphaNumeric(c byte) bool {
834 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
835}
836
837// IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false.
838func IsEastAsianWideRune(r rune) bool {
839 return unicode.Is(unicode.Hiragana, r) ||
840 unicode.Is(unicode.Katakana, r) ||
841 unicode.Is(unicode.Han, r) ||
842 unicode.Is(unicode.Lm, r) ||
843 unicode.Is(unicode.Hangul, r)
844}
845
846// A BufWriter is a subset of the bufio.Writer .
847type BufWriter interface {
848 io.Writer
849 Available() int
850 Buffered() int
851 Flush() error
852 WriteByte(c byte) error
853 WriteRune(r rune) (size int, err error)
854 WriteString(s string) (int, error)
855}
856
857// A PrioritizedValue struct holds pair of an arbitrary value and a priority.
858type PrioritizedValue struct {
859 // Value is an arbitrary value that you want to prioritize.
860 Value interface{}
861 // Priority is a priority of the value.
862 Priority int
863}
864
865// PrioritizedSlice is a slice of the PrioritizedValues
866type PrioritizedSlice []PrioritizedValue
867
868// Sort sorts the PrioritizedSlice in ascending order.
869func (s PrioritizedSlice) Sort() {
870 sort.Slice(s, func(i, j int) bool {
871 return s[i].Priority < s[j].Priority
872 })
873}
874
875// Remove removes the given value from this slice.
876func (s PrioritizedSlice) Remove(v interface{}) PrioritizedSlice {
877 i := 0
878 found := false
879 for ; i < len(s); i++ {
880 if s[i].Value == v {
881 found = true
882 break
883 }
884 }
885 if !found {
886 return s
887 }
888 return append(s[:i], s[i+1:]...)
889}
890
891// Prioritized returns a new PrioritizedValue.
892func Prioritized(v interface{}, priority int) PrioritizedValue {
893 return PrioritizedValue{v, priority}
894}
895
896func bytesHash(b []byte) uint64 {
897 var hash uint64 = 5381
898 for _, c := range b {
899 hash = ((hash << 5) + hash) + uint64(c)
900 }
901 return hash
902}
903
904// BytesFilter is a efficient data structure for checking whether bytes exist or not.
905// BytesFilter is thread-safe.
906type BytesFilter interface {
907 // Add adds given bytes to this set.
908 Add([]byte)
909
910 // Contains return true if this set contains given bytes, otherwise false.
911 Contains([]byte) bool
912
913 // Extend copies this filter and adds given bytes to new filter.
914 Extend(...[]byte) BytesFilter
915}
916
917type bytesFilter struct {
918 chars [256]uint8
919 threshold int
920 slots [][][]byte
921}
922
923// NewBytesFilter returns a new BytesFilter.
924func NewBytesFilter(elements ...[]byte) BytesFilter {
925 s := &bytesFilter{
926 threshold: 3,
927 slots: make([][][]byte, 64),
928 }
929 for _, element := range elements {
930 s.Add(element)
931 }
932 return s
933}
934
935func (s *bytesFilter) Add(b []byte) {
936 l := len(b)
937 m := s.threshold
938 if l < s.threshold {
939 m = l
940 }
941 for i := 0; i < m; i++ {
942 s.chars[b[i]] |= 1 << uint8(i)
943 }
944 h := bytesHash(b) % uint64(len(s.slots))
945 slot := s.slots[h]
946 if slot == nil {
947 slot = [][]byte{}
948 }
949 s.slots[h] = append(slot, b)
950}
951
952func (s *bytesFilter) Extend(bs ...[]byte) BytesFilter {
953 newFilter := NewBytesFilter().(*bytesFilter)
954 newFilter.chars = s.chars
955 newFilter.threshold = s.threshold
956 for k, v := range s.slots {
957 newSlot := make([][]byte, len(v))
958 copy(newSlot, v)
959 newFilter.slots[k] = v
960 }
961 for _, b := range bs {
962 newFilter.Add(b)
963 }
964 return newFilter
965}
966
967func (s *bytesFilter) Contains(b []byte) bool {
968 l := len(b)
969 m := s.threshold
970 if l < s.threshold {
971 m = l
972 }
973 for i := 0; i < m; i++ {
974 if (s.chars[b[i]] & (1 << uint8(i))) == 0 {
975 return false
976 }
977 }
978 h := bytesHash(b) % uint64(len(s.slots))
979 slot := s.slots[h]
980 if slot == nil || len(slot) == 0 {
981 return false
982 }
983 for _, element := range slot {
984 if bytes.Equal(element, b) {
985 return true
986 }
987 }
988 return false
989}