1package parser
  2
  3import (
  4	"bytes"
  5	"regexp"
  6
  7	"github.com/yuin/goldmark/ast"
  8	"github.com/yuin/goldmark/text"
  9	"github.com/yuin/goldmark/util"
 10)
 11
 12type rawHTMLParser struct {
 13}
 14
 15var defaultRawHTMLParser = &rawHTMLParser{}
 16
 17// NewRawHTMLParser return a new InlineParser that can parse
 18// inline htmls
 19func NewRawHTMLParser() InlineParser {
 20	return defaultRawHTMLParser
 21}
 22
 23func (s *rawHTMLParser) Trigger() []byte {
 24	return []byte{'<'}
 25}
 26
 27func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node {
 28	line, _ := block.PeekLine()
 29	if len(line) > 1 && util.IsAlphaNumeric(line[1]) {
 30		return s.parseMultiLineRegexp(openTagRegexp, block, pc)
 31	}
 32	if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) {
 33		return s.parseMultiLineRegexp(closeTagRegexp, block, pc)
 34	}
 35	if bytes.HasPrefix(line, openComment) {
 36		return s.parseComment(block, pc)
 37	}
 38	if bytes.HasPrefix(line, openProcessingInstruction) {
 39		return s.parseUntil(block, closeProcessingInstruction, pc)
 40	}
 41	if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' {
 42		return s.parseUntil(block, closeDecl, pc)
 43	}
 44	if bytes.HasPrefix(line, openCDATA) {
 45		return s.parseUntil(block, closeCDATA, pc)
 46	}
 47	return nil
 48}
 49
 50var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)`
 51
 52var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)`
 53var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*[ \t]*/?>`)
 54var closeTagRegexp = regexp.MustCompile("^</" + tagnamePattern + `\s*>`)
 55
 56var openProcessingInstruction = []byte("<?")
 57var closeProcessingInstruction = []byte("?>")
 58var openCDATA = []byte("<![CDATA[")
 59var closeCDATA = []byte("]]>")
 60var closeDecl = []byte(">")
 61var emptyComment = []byte("<!---->")
 62var invalidComment1 = []byte("<!-->")
 63var invalidComment2 = []byte("<!--->")
 64var openComment = []byte("<!--")
 65var closeComment = []byte("-->")
 66var doubleHyphen = []byte("--")
 67
 68func (s *rawHTMLParser) parseComment(block text.Reader, pc Context) ast.Node {
 69	savedLine, savedSegment := block.Position()
 70	node := ast.NewRawHTML()
 71	line, segment := block.PeekLine()
 72	if bytes.HasPrefix(line, emptyComment) {
 73		node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment)))
 74		block.Advance(len(emptyComment))
 75		return node
 76	}
 77	if bytes.HasPrefix(line, invalidComment1) || bytes.HasPrefix(line, invalidComment2) {
 78		return nil
 79	}
 80	offset := len(openComment)
 81	line = line[offset:]
 82	for {
 83		hindex := bytes.Index(line, doubleHyphen)
 84		if hindex > -1 {
 85			hindex += offset
 86		}
 87		index := bytes.Index(line, closeComment) + offset
 88		if index > -1 && hindex == index {
 89			if index == 0 || len(line) < 2 || line[index-offset-1] != '-' {
 90				node.Segments.Append(segment.WithStop(segment.Start + index + len(closeComment)))
 91				block.Advance(index + len(closeComment))
 92				return node
 93			}
 94		}
 95		if hindex > 0 {
 96			break
 97		}
 98		node.Segments.Append(segment)
 99		block.AdvanceLine()
100		line, segment = block.PeekLine()
101		offset = 0
102		if line == nil {
103			break
104		}
105	}
106	block.SetPosition(savedLine, savedSegment)
107	return nil
108}
109
110func (s *rawHTMLParser) parseUntil(block text.Reader, closer []byte, pc Context) ast.Node {
111	savedLine, savedSegment := block.Position()
112	node := ast.NewRawHTML()
113	for {
114		line, segment := block.PeekLine()
115		if line == nil {
116			break
117		}
118		index := bytes.Index(line, closer)
119		if index > -1 {
120			node.Segments.Append(segment.WithStop(segment.Start + index + len(closer)))
121			block.Advance(index + len(closer))
122			return node
123		}
124		node.Segments.Append(segment)
125		block.AdvanceLine()
126	}
127	block.SetPosition(savedLine, savedSegment)
128	return nil
129}
130
131func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node {
132	sline, ssegment := block.Position()
133	if block.Match(reg) {
134		node := ast.NewRawHTML()
135		eline, esegment := block.Position()
136		block.SetPosition(sline, ssegment)
137		for {
138			line, segment := block.PeekLine()
139			if line == nil {
140				break
141			}
142			l, _ := block.Position()
143			start := segment.Start
144			if l == sline {
145				start = ssegment.Start
146			}
147			end := segment.Stop
148			if l == eline {
149				end = esegment.Start
150			}
151
152			node.Segments.Append(text.NewSegment(start, end))
153			if l == eline {
154				block.Advance(end - start)
155				break
156			} else {
157				block.AdvanceLine()
158			}
159		}
160		return node
161	}
162	return nil
163}