1 files changed, 494 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/parse/v2/html/lex.go b/vendor/github.com/tdewolff/parse/v2/html/lex.go
new file mode 100644
index 0000000..4325024
--- /dev/null
+++ b/vendor/github.com/tdewolff/parse/v2/html/lex.go
@@ -0,0 +1,494 @@
+// Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html.
+package html
+import (
+        "strconv"
+        "github.com/tdewolff/parse/v2"
+)
+// TokenType determines the type of token, eg. a number or a semicolon.
+type TokenType uint32
+// TokenType values.
+const (
+        ErrorToken TokenType = iota // extra token when errors occur
+        CommentToken
+        DoctypeToken
+        StartTagToken
+        StartTagCloseToken
+        StartTagVoidToken
+        EndTagToken
+        AttributeToken
+        TextToken
+        SvgToken
+        MathToken
+)
+// String returns the string representation of a TokenType.
+func (tt TokenType) String() string {
+        switch tt {
+        case ErrorToken:
+                return "Error"
+        case CommentToken:
+                return "Comment"
+        case DoctypeToken:
+                return "Doctype"
+        case StartTagToken:
+                return "StartTag"
+        case StartTagCloseToken:
+                return "StartTagClose"
+        case StartTagVoidToken:
+                return "StartTagVoid"
+        case EndTagToken:
+                return "EndTag"
+        case AttributeToken:
+                return "Attribute"
+        case TextToken:
+                return "Text"
+        case SvgToken:
+                return "Svg"
+        case MathToken:
+                return "Math"
+        }
+        return "Invalid(" + strconv.Itoa(int(tt)) + ")"
+}
+////////////////////////////////////////////////////////////////
+// Lexer is the state for the lexer.
+type Lexer struct {
+        r   *parse.Input
+        err error
+        rawTag Hash
+        inTag  bool
+        text    []byte
+        attrVal []byte
+}
+// NewLexer returns a new Lexer for a given io.Reader.
+func NewLexer(r *parse.Input) *Lexer {
+        return &Lexer{
+                r: r,
+        }
+}
+// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
+func (l *Lexer) Err() error {
+        if l.err != nil {
+                return l.err
+        }
+        return l.r.Err()
+}
+// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
+func (l *Lexer) Text() []byte {
+        return l.text
+}
+// AttrVal returns the attribute value when an AttributeToken was returned from Next.
+func (l *Lexer) AttrVal() []byte {
+        return l.attrVal
+}
+// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
+func (l *Lexer) Next() (TokenType, []byte) {
+        l.text = nil
+        var c byte
+        if l.inTag {
+                l.attrVal = nil
+                for { // before attribute name state
+                        if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
+                                l.r.Move(1)
+                                continue
+                        }
+                        break
+                }
+                if c == 0 && l.r.Err() != nil {
+                        return ErrorToken, nil
+                } else if c != '>' && (c != '/' || l.r.Peek(1) != '>') {
+                        return AttributeToken, l.shiftAttribute()
+                }
+                l.r.Skip()
+                l.inTag = false
+                if c == '/' {
+                        l.r.Move(2)
+                        return StartTagVoidToken, l.r.Shift()
+                }
+                l.r.Move(1)
+                return StartTagCloseToken, l.r.Shift()
+        }
+        if l.rawTag != 0 {
+                if rawText := l.shiftRawText(); len(rawText) > 0 {
+                        l.text = rawText
+                        l.rawTag = 0
+                        return TextToken, rawText
+                }
+                l.rawTag = 0
+        }
+        for {
+                c = l.r.Peek(0)
+                if c == '<' {
+                        c = l.r.Peek(1)
+                        isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 || l.r.PeekErr(2) == nil)
+                        if l.r.Pos() > 0 {
+                                if isEndTag || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' {
+                                        // return currently buffered texttoken so that we can return tag next iteration
+                                        l.text = l.r.Shift()
+                                        return TextToken, l.text
+                                }
+                        } else if isEndTag {
+                                l.r.Move(2)
+                                // only endtags that are not followed by > or EOF arrive here
+                                if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
+                                        return CommentToken, l.shiftBogusComment()
+                                }
+                                return EndTagToken, l.shiftEndTag()
+                        } else if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
+                                l.r.Move(1)
+                                l.inTag = true
+                                return l.shiftStartTag()
+                        } else if c == '!' {
+                                l.r.Move(2)
+                                return l.readMarkup()
+                        } else if c == '?' {
+                                l.r.Move(1)
+                                return CommentToken, l.shiftBogusComment()
+                        }
+                } else if c == 0 && l.r.Err() != nil {
+                        if l.r.Pos() > 0 {
+                                l.text = l.r.Shift()
+                                return TextToken, l.text
+                        }
+                        return ErrorToken, nil
+                }
+                l.r.Move(1)
+        }
+}
+////////////////////////////////////////////////////////////////
+// The following functions follow the specifications at https://html.spec.whatwg.org/multipage/parsing.html
+func (l *Lexer) shiftRawText() []byte {
+        if l.rawTag == Plaintext {
+                for {
+                        if l.r.Peek(0) == 0 && l.r.Err() != nil {
+                                return l.r.Shift()
+                        }
+                        l.r.Move(1)
+                }
+        } else { // RCDATA, RAWTEXT and SCRIPT
+                for {
+                        c := l.r.Peek(0)
+                        if c == '<' {
+                                if l.r.Peek(1) == '/' {
+                                        mark := l.r.Pos()
+                                        l.r.Move(2)
+                                        for {
+                                                if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
+                                                        break
+                                                }
+                                                l.r.Move(1)
+                                        }
+                                        if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == l.rawTag { // copy so that ToLower doesn't change the case of the underlying slice
+                                                l.r.Rewind(mark)
+                                                return l.r.Shift()
+                                        }
+                                } else if l.rawTag == Script && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' {
+                                        l.r.Move(4)
+                                        inScript := false
+                                        for {
+                                                c := l.r.Peek(0)
+                                                if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' {
+                                                        l.r.Move(3)
+                                                        break
+                                                } else if c == '<' {
+                                                        isEnd := l.r.Peek(1) == '/'
+                                                        if isEnd {
+                                                                l.r.Move(2)
+                                                        } else {
+                                                                l.r.Move(1)
+                                                        }
+                                                        mark := l.r.Pos()
+                                                        for {
+                                                                if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
+                                                                        break
+                                                                }
+                                                                l.r.Move(1)
+                                                        }
+                                                        if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark:]))); h == Script { // copy so that ToLower doesn't change the case of the underlying slice
+                                                                if !isEnd {
+                                                                        inScript = true
+                                                                } else {
+                                                                        if !inScript {
+                                                                                l.r.Rewind(mark - 2)
+                                                                                return l.r.Shift()
+                                                                        }
+                                                                        inScript = false
+                                                                }
+                                                        }
+                                                } else if c == 0 && l.r.Err() != nil {
+                                                        return l.r.Shift()
+                                                } else {
+                                                        l.r.Move(1)
+                                                }
+                                        }
+                                } else {
+                                        l.r.Move(1)
+                                }
+                        } else if c == 0 && l.r.Err() != nil {
+                                return l.r.Shift()
+                        } else {
+                                l.r.Move(1)
+                        }
+                }
+        }
+}
+func (l *Lexer) readMarkup() (TokenType, []byte) {
+        if l.at('-', '-') {
+                l.r.Move(2)
+                for {
+                        if l.r.Peek(0) == 0 && l.r.Err() != nil {
+                                l.text = l.r.Lexeme()[4:]
+                                return CommentToken, l.r.Shift()
+                        } else if l.at('-', '-', '>') {
+                                l.text = l.r.Lexeme()[4:]
+                                l.r.Move(3)
+                                return CommentToken, l.r.Shift()
+                        } else if l.at('-', '-', '!', '>') {
+                                l.text = l.r.Lexeme()[4:]
+                                l.r.Move(4)
+                                return CommentToken, l.r.Shift()
+                        }
+                        l.r.Move(1)
+                }
+        } else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') {
+                l.r.Move(7)
+                for {
+                        if l.r.Peek(0) == 0 && l.r.Err() != nil {
+                                l.text = l.r.Lexeme()[9:]
+                                return TextToken, l.r.Shift()
+                        } else if l.at(']', ']', '>') {
+                                l.text = l.r.Lexeme()[9:]
+                                l.r.Move(3)
+                                return TextToken, l.r.Shift()
+                        }
+                        l.r.Move(1)
+                }
+        } else {
+                if l.atCaseInsensitive('d', 'o', 'c', 't', 'y', 'p', 'e') {
+                        l.r.Move(7)
+                        if l.r.Peek(0) == ' ' {
+                                l.r.Move(1)
+                        }
+                        for {
+                                if c := l.r.Peek(0); c == '>' || c == 0 && l.r.Err() != nil {
+                                        l.text = l.r.Lexeme()[9:]
+                                        if c == '>' {
+                                                l.r.Move(1)
+                                        }
+                                        return DoctypeToken, l.r.Shift()
+                                }
+                                l.r.Move(1)
+                        }
+                }
+        }
+        return CommentToken, l.shiftBogusComment()
+}
+func (l *Lexer) shiftBogusComment() []byte {
+        for {
+                c := l.r.Peek(0)
+                if c == '>' {
+                        l.text = l.r.Lexeme()[2:]
+                        l.r.Move(1)
+                        return l.r.Shift()
+                } else if c == 0 && l.r.Err() != nil {
+                        l.text = l.r.Lexeme()[2:]
+                        return l.r.Shift()
+                }
+                l.r.Move(1)
+        }
+}
+func (l *Lexer) shiftStartTag() (TokenType, []byte) {
+        for {
+                if c := l.r.Peek(0); c == ' ' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
+                        break
+                }
+                l.r.Move(1)
+        }
+        l.text = parse.ToLower(l.r.Lexeme()[1:])
+        if h := ToHash(l.text); h == Textarea || h == Title || h == Style || h == Xmp || h == Iframe || h == Script || h == Plaintext || h == Svg || h == Math {
+                if h == Svg || h == Math {
+                        data := l.shiftXML(h)
+                        if l.err != nil {
+                                return ErrorToken, nil
+                        }
+                        l.inTag = false
+                        if h == Svg {
+                                return SvgToken, data
+                        }
+                        return MathToken, data
+                }
+                l.rawTag = h
+        }
+        return StartTagToken, l.r.Shift()
+}
+func (l *Lexer) shiftAttribute() []byte {
+        nameStart := l.r.Pos()
+        var c byte
+        for { // attribute name state
+                if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
+                        break
+                }
+                l.r.Move(1)
+        }
+        nameEnd := l.r.Pos()
+        for { // after attribute name state
+                if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
+                        l.r.Move(1)
+                        continue
+                }
+                break
+        }
+        if c == '=' {
+                l.r.Move(1)
+                for { // before attribute value state
+                        if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
+                                l.r.Move(1)
+                                continue
+                        }
+                        break
+                }
+                attrPos := l.r.Pos()
+                delim := c
+                if delim == '"' || delim == '\'' { // attribute value single- and double-quoted state
+                        l.r.Move(1)
+                        for {
+                                c := l.r.Peek(0)
+                                if c == delim {
+                                        l.r.Move(1)
+                                        break
+                                } else if c == 0 && l.r.Err() != nil {
+                                        break
+                                }
+                                l.r.Move(1)
+                        }
+                } else { // attribute value unquoted state
+                        for {
+                                if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
+                                        break
+                                }
+                                l.r.Move(1)
+                        }
+                }
+                l.attrVal = l.r.Lexeme()[attrPos:]
+        } else {
+                l.r.Rewind(nameEnd)
+                l.attrVal = nil
+        }
+        l.text = parse.ToLower(l.r.Lexeme()[nameStart:nameEnd])
+        return l.r.Shift()
+}
+func (l *Lexer) shiftEndTag() []byte {
+        for {
+                c := l.r.Peek(0)
+                if c == '>' {
+                        l.text = l.r.Lexeme()[2:]
+                        l.r.Move(1)
+                        break
+                } else if c == 0 && l.r.Err() != nil {
+                        l.text = l.r.Lexeme()[2:]
+                        break
+                }
+                l.r.Move(1)
+        }
+        end := len(l.text)
+        for end > 0 {
+                if c := l.text[end-1]; c == ' ' || c == '\t' || c == '\n' || c == '\r' {
+                        end--
+                        continue
+                }
+                break
+        }
+        l.text = l.text[:end]
+        return parse.ToLower(l.r.Shift())
+}
+// shiftXML parses the content of a svg or math tag according to the XML 1.1 specifications, including the tag itself.
+// So far we have already parsed `<svg` or `<math`.
+func (l *Lexer) shiftXML(rawTag Hash) []byte {
+        inQuote := false
+        for {
+                c := l.r.Peek(0)
+                if c == '"' {
+                        inQuote = !inQuote
+                        l.r.Move(1)
+                } else if c == '<' && !inQuote && l.r.Peek(1) == '/' {
+                        mark := l.r.Pos()
+                        l.r.Move(2)
+                        for {
+                                if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
+                                        break
+                                }
+                                l.r.Move(1)
+                        }
+                        if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == rawTag { // copy so that ToLower doesn't change the case of the underlying slice
+                                break
+                        }
+                } else if c == 0 {
+                        if l.r.Err() == nil {
+                                l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character")
+                        }
+                        return l.r.Shift()
+                } else {
+                        l.r.Move(1)
+                }
+        }
+        for {
+                c := l.r.Peek(0)
+                if c == '>' {
+                        l.r.Move(1)
+                        break
+                } else if c == 0 {
+                        if l.r.Err() == nil {
+                                l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character")
+                        }
+                        return l.r.Shift()
+                }
+                l.r.Move(1)
+        }
+        return l.r.Shift()
+}
+////////////////////////////////////////////////////////////////
+func (l *Lexer) at(b ...byte) bool {
+        for i, c := range b {
+                if l.r.Peek(i) != c {
+                        return false
+                }
+        }
+        return true
+}
+func (l *Lexer) atCaseInsensitive(b ...byte) bool {
+        for i, c := range b {
+                if l.r.Peek(i) != c && (l.r.Peek(i)+('a'-'A')) != c {
+                        return false
+                }
+        }
+        return true
+}

diff --git a/vendor/github.com/tdewolff/parse/v2/html/lex.go b/vendor/github.com/tdewolff/parse/v2/html/lex.go new file mode 100644 index 0000000..4325024 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/html/lex.go
@@ -0,0 +1,494 @@
	1	// Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html.
	2	package html
	3
	4	import (
	5	"strconv"
	6
	7	"github.com/tdewolff/parse/v2"
	8	)
	9
	10	// TokenType determines the type of token, eg. a number or a semicolon.
	11	type TokenType uint32
	12
	13	// TokenType values.
	14	const (
	15	ErrorToken TokenType = iota // extra token when errors occur
	16	CommentToken
	17	DoctypeToken
	18	StartTagToken
	19	StartTagCloseToken
	20	StartTagVoidToken
	21	EndTagToken
	22	AttributeToken
	23	TextToken
	24	SvgToken
	25	MathToken
	26	)
	27
	28	// String returns the string representation of a TokenType.
	29	func (tt TokenType) String() string {
	30	switch tt {
	31	case ErrorToken:
	32	return "Error"
	33	case CommentToken:
	34	return "Comment"
	35	case DoctypeToken:
	36	return "Doctype"
	37	case StartTagToken:
	38	return "StartTag"
	39	case StartTagCloseToken:
	40	return "StartTagClose"
	41	case StartTagVoidToken:
	42	return "StartTagVoid"
	43	case EndTagToken:
	44	return "EndTag"
	45	case AttributeToken:
	46	return "Attribute"
	47	case TextToken:
	48	return "Text"
	49	case SvgToken:
	50	return "Svg"
	51	case MathToken:
	52	return "Math"
	53	}
	54	return "Invalid(" + strconv.Itoa(int(tt)) + ")"
	55	}
	56
	57	////////////////////////////////////////////////////////////////
	58
	59	// Lexer is the state for the lexer.
	60	type Lexer struct {
	61	r *parse.Input
	62	err error
	63
	64	rawTag Hash
	65	inTag bool
	66
	67	text []byte
	68	attrVal []byte
	69	}
	70
	71	// NewLexer returns a new Lexer for a given io.Reader.
	72	func NewLexer(r parse.Input) Lexer {
	73	return &Lexer{
	74	r: r,
	75	}
	76	}
	77
	78	// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
	79	func (l *Lexer) Err() error {
	80	if l.err != nil {
	81	return l.err
	82	}
	83	return l.r.Err()
	84	}
	85
	86	// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
	87	func (l *Lexer) Text() []byte {
	88	return l.text
	89	}
	90
	91	// AttrVal returns the attribute value when an AttributeToken was returned from Next.
	92	func (l *Lexer) AttrVal() []byte {
	93	return l.attrVal
	94	}
	95
	96	// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
	97	func (l *Lexer) Next() (TokenType, []byte) {
	98	l.text = nil
	99	var c byte
	100	if l.inTag {
	101	l.attrVal = nil
	102	for { // before attribute name state
	103	if c = l.r.Peek(0); c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r' \|\| c == '\f' {
	104	l.r.Move(1)
	105	continue
	106	}
	107	break
	108	}
	109	if c == 0 && l.r.Err() != nil {
	110	return ErrorToken, nil
	111	} else if c != '>' && (c != '/' \|\| l.r.Peek(1) != '>') {
	112	return AttributeToken, l.shiftAttribute()
	113	}
	114	l.r.Skip()
	115	l.inTag = false
	116	if c == '/' {
	117	l.r.Move(2)
	118	return StartTagVoidToken, l.r.Shift()
	119	}
	120	l.r.Move(1)
	121	return StartTagCloseToken, l.r.Shift()
	122	}
	123
	124	if l.rawTag != 0 {
	125	if rawText := l.shiftRawText(); len(rawText) > 0 {
	126	l.text = rawText
	127	l.rawTag = 0
	128	return TextToken, rawText
	129	}
	130	l.rawTag = 0
	131	}
	132
	133	for {
	134	c = l.r.Peek(0)
	135	if c == '<' {
	136	c = l.r.Peek(1)
	137	isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 \|\| l.r.PeekErr(2) == nil)
	138	if l.r.Pos() > 0 {
	139	if isEndTag \|\| 'a' <= c && c <= 'z' \|\| 'A' <= c && c <= 'Z' \|\| c == '!' \|\| c == '?' {
	140	// return currently buffered texttoken so that we can return tag next iteration
	141	l.text = l.r.Shift()
	142	return TextToken, l.text
	143	}
	144	} else if isEndTag {
	145	l.r.Move(2)
	146	// only endtags that are not followed by > or EOF arrive here
	147	if c = l.r.Peek(0); !('a' <= c && c <= 'z' \|\| 'A' <= c && c <= 'Z') {
	148	return CommentToken, l.shiftBogusComment()
	149	}
	150	return EndTagToken, l.shiftEndTag()
	151	} else if 'a' <= c && c <= 'z' \|\| 'A' <= c && c <= 'Z' {
	152	l.r.Move(1)
	153	l.inTag = true
	154	return l.shiftStartTag()
	155	} else if c == '!' {
	156	l.r.Move(2)
	157	return l.readMarkup()
	158	} else if c == '?' {
	159	l.r.Move(1)
	160	return CommentToken, l.shiftBogusComment()
	161	}
	162	} else if c == 0 && l.r.Err() != nil {
	163	if l.r.Pos() > 0 {
	164	l.text = l.r.Shift()
	165	return TextToken, l.text
	166	}
	167	return ErrorToken, nil
	168	}
	169	l.r.Move(1)
	170	}
	171	}
	172
	173	////////////////////////////////////////////////////////////////
	174
	175	// The following functions follow the specifications at https://html.spec.whatwg.org/multipage/parsing.html
	176
	177	func (l *Lexer) shiftRawText() []byte {
	178	if l.rawTag == Plaintext {
	179	for {
	180	if l.r.Peek(0) == 0 && l.r.Err() != nil {
	181	return l.r.Shift()
	182	}
	183	l.r.Move(1)
	184	}
	185	} else { // RCDATA, RAWTEXT and SCRIPT
	186	for {
	187	c := l.r.Peek(0)
	188	if c == '<' {
	189	if l.r.Peek(1) == '/' {
	190	mark := l.r.Pos()
	191	l.r.Move(2)
	192	for {
	193	if c = l.r.Peek(0); !('a' <= c && c <= 'z' \|\| 'A' <= c && c <= 'Z') {
	194	break
	195	}
	196	l.r.Move(1)
	197	}
	198	if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == l.rawTag { // copy so that ToLower doesn't change the case of the underlying slice
	199	l.r.Rewind(mark)
	200	return l.r.Shift()
	201	}
	202	} else if l.rawTag == Script && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' {
	203	l.r.Move(4)
	204	inScript := false
	205	for {
	206	c := l.r.Peek(0)
	207	if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' {
	208	l.r.Move(3)
	209	break
	210	} else if c == '<' {
	211	isEnd := l.r.Peek(1) == '/'
	212	if isEnd {
	213	l.r.Move(2)
	214	} else {
	215	l.r.Move(1)
	216	}
	217	mark := l.r.Pos()
	218	for {
	219	if c = l.r.Peek(0); !('a' <= c && c <= 'z' \|\| 'A' <= c && c <= 'Z') {
	220	break
	221	}
	222	l.r.Move(1)
	223	}
	224	if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark:]))); h == Script { // copy so that ToLower doesn't change the case of the underlying slice
	225	if !isEnd {
	226	inScript = true
	227	} else {
	228	if !inScript {
	229	l.r.Rewind(mark - 2)
	230	return l.r.Shift()
	231	}
	232	inScript = false
	233	}
	234	}
	235	} else if c == 0 && l.r.Err() != nil {
	236	return l.r.Shift()
	237	} else {
	238	l.r.Move(1)
	239	}
	240	}
	241	} else {
	242	l.r.Move(1)
	243	}
	244	} else if c == 0 && l.r.Err() != nil {
	245	return l.r.Shift()
	246	} else {
	247	l.r.Move(1)
	248	}
	249	}
	250	}
	251	}
	252
	253	func (l *Lexer) readMarkup() (TokenType, []byte) {
	254	if l.at('-', '-') {
	255	l.r.Move(2)
	256	for {
	257	if l.r.Peek(0) == 0 && l.r.Err() != nil {
	258	l.text = l.r.Lexeme()[4:]
	259	return CommentToken, l.r.Shift()
	260	} else if l.at('-', '-', '>') {
	261	l.text = l.r.Lexeme()[4:]
	262	l.r.Move(3)
	263	return CommentToken, l.r.Shift()
	264	} else if l.at('-', '-', '!', '>') {
	265	l.text = l.r.Lexeme()[4:]
	266	l.r.Move(4)
	267	return CommentToken, l.r.Shift()
	268	}
	269	l.r.Move(1)
	270	}
	271	} else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') {
	272	l.r.Move(7)
	273	for {
	274	if l.r.Peek(0) == 0 && l.r.Err() != nil {
	275	l.text = l.r.Lexeme()[9:]
	276	return TextToken, l.r.Shift()
	277	} else if l.at(']', ']', '>') {
	278	l.text = l.r.Lexeme()[9:]
	279	l.r.Move(3)
	280	return TextToken, l.r.Shift()
	281	}
	282	l.r.Move(1)
	283	}
	284	} else {
	285	if l.atCaseInsensitive('d', 'o', 'c', 't', 'y', 'p', 'e') {
	286	l.r.Move(7)
	287	if l.r.Peek(0) == ' ' {
	288	l.r.Move(1)
	289	}
	290	for {
	291	if c := l.r.Peek(0); c == '>' \|\| c == 0 && l.r.Err() != nil {
	292	l.text = l.r.Lexeme()[9:]
	293	if c == '>' {
	294	l.r.Move(1)
	295	}
	296	return DoctypeToken, l.r.Shift()
	297	}
	298	l.r.Move(1)
	299	}
	300	}
	301	}
	302	return CommentToken, l.shiftBogusComment()
	303	}
	304
	305	func (l *Lexer) shiftBogusComment() []byte {
	306	for {
	307	c := l.r.Peek(0)
	308	if c == '>' {
	309	l.text = l.r.Lexeme()[2:]
	310	l.r.Move(1)
	311	return l.r.Shift()
	312	} else if c == 0 && l.r.Err() != nil {
	313	l.text = l.r.Lexeme()[2:]
	314	return l.r.Shift()
	315	}
	316	l.r.Move(1)
	317	}
	318	}
	319
	320	func (l *Lexer) shiftStartTag() (TokenType, []byte) {
	321	for {
	322	if c := l.r.Peek(0); c == ' ' \|\| c == '>' \|\| c == '/' && l.r.Peek(1) == '>' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r' \|\| c == '\f' \|\| c == 0 && l.r.Err() != nil {
	323	break
	324	}
	325	l.r.Move(1)
	326	}
	327	l.text = parse.ToLower(l.r.Lexeme()[1:])
	328	if h := ToHash(l.text); h == Textarea \|\| h == Title \|\| h == Style \|\| h == Xmp \|\| h == Iframe \|\| h == Script \|\| h == Plaintext \|\| h == Svg \|\| h == Math {
	329	if h == Svg \|\| h == Math {
	330	data := l.shiftXML(h)
	331	if l.err != nil {
	332	return ErrorToken, nil
	333	}
	334
	335	l.inTag = false
	336	if h == Svg {
	337	return SvgToken, data
	338	}
	339	return MathToken, data
	340	}
	341	l.rawTag = h
	342	}
	343	return StartTagToken, l.r.Shift()
	344	}
	345
	346	func (l *Lexer) shiftAttribute() []byte {
	347	nameStart := l.r.Pos()
	348	var c byte
	349	for { // attribute name state
	350	if c = l.r.Peek(0); c == ' ' \|\| c == '=' \|\| c == '>' \|\| c == '/' && l.r.Peek(1) == '>' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r' \|\| c == '\f' \|\| c == 0 && l.r.Err() != nil {
	351	break
	352	}
	353	l.r.Move(1)
	354	}
	355	nameEnd := l.r.Pos()
	356	for { // after attribute name state
	357	if c = l.r.Peek(0); c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r' \|\| c == '\f' {
	358	l.r.Move(1)
	359	continue
	360	}
	361	break
	362	}
	363	if c == '=' {
	364	l.r.Move(1)
	365	for { // before attribute value state
	366	if c = l.r.Peek(0); c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r' \|\| c == '\f' {
	367	l.r.Move(1)
	368	continue
	369	}
	370	break
	371	}
	372	attrPos := l.r.Pos()
	373	delim := c
	374	if delim == '"' \|\| delim == '\'' { // attribute value single- and double-quoted state
	375	l.r.Move(1)
	376	for {
	377	c := l.r.Peek(0)
	378	if c == delim {
	379	l.r.Move(1)
	380	break
	381	} else if c == 0 && l.r.Err() != nil {
	382	break
	383	}
	384	l.r.Move(1)
	385	}
	386	} else { // attribute value unquoted state
	387	for {
	388	if c := l.r.Peek(0); c == ' ' \|\| c == '>' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r' \|\| c == '\f' \|\| c == 0 && l.r.Err() != nil {
	389	break
	390	}
	391	l.r.Move(1)
	392	}
	393	}
	394	l.attrVal = l.r.Lexeme()[attrPos:]
	395	} else {
	396	l.r.Rewind(nameEnd)
	397	l.attrVal = nil
	398	}
	399	l.text = parse.ToLower(l.r.Lexeme()[nameStart:nameEnd])
	400	return l.r.Shift()
	401	}
	402
	403	func (l *Lexer) shiftEndTag() []byte {
	404	for {
	405	c := l.r.Peek(0)
	406	if c == '>' {
	407	l.text = l.r.Lexeme()[2:]
	408	l.r.Move(1)
	409	break
	410	} else if c == 0 && l.r.Err() != nil {
	411	l.text = l.r.Lexeme()[2:]
	412	break
	413	}
	414	l.r.Move(1)
	415	}
	416
	417	end := len(l.text)
	418	for end > 0 {
	419	if c := l.text[end-1]; c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r' {
	420	end--
	421	continue
	422	}
	423	break
	424	}
	425	l.text = l.text[:end]
	426	return parse.ToLower(l.r.Shift())
	427	}
	428
	429	// shiftXML parses the content of a svg or math tag according to the XML 1.1 specifications, including the tag itself.
	430	// So far we have already parsed `<svg` or `<math`.
	431	func (l *Lexer) shiftXML(rawTag Hash) []byte {
	432	inQuote := false
	433	for {
	434	c := l.r.Peek(0)
	435	if c == '"' {
	436	inQuote = !inQuote
	437	l.r.Move(1)
	438	} else if c == '<' && !inQuote && l.r.Peek(1) == '/' {
	439	mark := l.r.Pos()
	440	l.r.Move(2)
	441	for {
	442	if c = l.r.Peek(0); !('a' <= c && c <= 'z' \|\| 'A' <= c && c <= 'Z') {
	443	break
	444	}
	445	l.r.Move(1)
	446	}
	447	if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == rawTag { // copy so that ToLower doesn't change the case of the underlying slice
	448	break
	449	}
	450	} else if c == 0 {
	451	if l.r.Err() == nil {
	452	l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character")
	453	}
	454	return l.r.Shift()
	455	} else {
	456	l.r.Move(1)
	457	}
	458	}
	459
	460	for {
	461	c := l.r.Peek(0)
	462	if c == '>' {
	463	l.r.Move(1)
	464	break
	465	} else if c == 0 {
	466	if l.r.Err() == nil {
	467	l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character")
	468	}
	469	return l.r.Shift()
	470	}
	471	l.r.Move(1)
	472	}
	473	return l.r.Shift()
	474	}
	475
	476	////////////////////////////////////////////////////////////////
	477
	478	func (l *Lexer) at(b ...byte) bool {
	479	for i, c := range b {
	480	if l.r.Peek(i) != c {
	481	return false
	482	}
	483	}
	484	return true
	485	}
	486
	487	func (l *Lexer) atCaseInsensitive(b ...byte) bool {
	488	for i, c := range b {
	489	if l.r.Peek(i) != c && (l.r.Peek(i)+('a'-'A')) != c {
	490	return false
	491	}
	492	}
	493	return true
	494	}