1package parser
  2
  3import (
  4	"bytes"
  5	"regexp"
  6	"strings"
  7
  8	"github.com/yuin/goldmark/ast"
  9	"github.com/yuin/goldmark/text"
 10	"github.com/yuin/goldmark/util"
 11)
 12
 13var allowedBlockTags = map[string]bool{
 14	"address":    true,
 15	"article":    true,
 16	"aside":      true,
 17	"base":       true,
 18	"basefont":   true,
 19	"blockquote": true,
 20	"body":       true,
 21	"caption":    true,
 22	"center":     true,
 23	"col":        true,
 24	"colgroup":   true,
 25	"dd":         true,
 26	"details":    true,
 27	"dialog":     true,
 28	"dir":        true,
 29	"div":        true,
 30	"dl":         true,
 31	"dt":         true,
 32	"fieldset":   true,
 33	"figcaption": true,
 34	"figure":     true,
 35	"footer":     true,
 36	"form":       true,
 37	"frame":      true,
 38	"frameset":   true,
 39	"h1":         true,
 40	"h2":         true,
 41	"h3":         true,
 42	"h4":         true,
 43	"h5":         true,
 44	"h6":         true,
 45	"head":       true,
 46	"header":     true,
 47	"hr":         true,
 48	"html":       true,
 49	"iframe":     true,
 50	"legend":     true,
 51	"li":         true,
 52	"link":       true,
 53	"main":       true,
 54	"menu":       true,
 55	"menuitem":   true,
 56	"meta":       true,
 57	"nav":        true,
 58	"noframes":   true,
 59	"ol":         true,
 60	"optgroup":   true,
 61	"option":     true,
 62	"p":          true,
 63	"param":      true,
 64	"search":     true,
 65	"section":    true,
 66	"summary":    true,
 67	"table":      true,
 68	"tbody":      true,
 69	"td":         true,
 70	"tfoot":      true,
 71	"th":         true,
 72	"thead":      true,
 73	"title":      true,
 74	"tr":         true,
 75	"track":      true,
 76	"ul":         true,
 77}
 78
 79var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style|textarea)(?:\s.*|>.*|/>.*|)(?:\r\n|\n)?$`) //nolint:golint,lll
 80var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style|textarea)>.*`)
 81
 82var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`)
 83var htmlBlockType2Close = []byte{'-', '-', '>'}
 84
 85var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`)
 86var htmlBlockType3Close = []byte{'?', '>'}
 87
 88var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*(?:\r\n|\n)?$`)
 89var htmlBlockType4Close = []byte{'>'}
 90
 91var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`)
 92var htmlBlockType5Close = []byte{']', ']', '>'}
 93
 94var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}<(?:/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(?:[ ].*|>.*|/>.*|)(?:\r\n|\n)?$`) //nolint:golint,lll
 95
 96var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(` + attributePattern + `*)[ ]*(?:>|/>)[ ]*(?:\r\n|\n)?$`) //nolint:golint,lll
 97
 98type htmlBlockParser struct {
 99}
100
101var defaultHTMLBlockParser = &htmlBlockParser{}
102
103// NewHTMLBlockParser return a new BlockParser that can parse html
104// blocks.
105func NewHTMLBlockParser() BlockParser {
106	return defaultHTMLBlockParser
107}
108
109func (b *htmlBlockParser) Trigger() []byte {
110	return []byte{'<'}
111}
112
113func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
114	var node *ast.HTMLBlock
115	line, segment := reader.PeekLine()
116	last := pc.LastOpenedBlock().Node
117
118	if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil {
119		node = ast.NewHTMLBlock(ast.HTMLBlockType1)
120	} else if htmlBlockType2OpenRegexp.Match(line) {
121		node = ast.NewHTMLBlock(ast.HTMLBlockType2)
122	} else if htmlBlockType3OpenRegexp.Match(line) {
123		node = ast.NewHTMLBlock(ast.HTMLBlockType3)
124	} else if htmlBlockType4OpenRegexp.Match(line) {
125		node = ast.NewHTMLBlock(ast.HTMLBlockType4)
126	} else if htmlBlockType5OpenRegexp.Match(line) {
127		node = ast.NewHTMLBlock(ast.HTMLBlockType5)
128	} else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil {
129		isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/"))
130		hasAttr := match[6] != match[7]
131		tagName := strings.ToLower(string(line[match[4]:match[5]]))
132		_, ok := allowedBlockTags[tagName]
133		if ok {
134			node = ast.NewHTMLBlock(ast.HTMLBlockType6)
135		} else if tagName != "script" && tagName != "style" &&
136			tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph
137			node = ast.NewHTMLBlock(ast.HTMLBlockType7)
138		}
139	}
140	if node == nil {
141		if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil {
142			tagName := string(line[match[2]:match[3]])
143			_, ok := allowedBlockTags[strings.ToLower(tagName)]
144			if ok {
145				node = ast.NewHTMLBlock(ast.HTMLBlockType6)
146			}
147		}
148	}
149	if node != nil {
150		reader.AdvanceToEOL()
151		node.Lines().Append(segment)
152		return node, NoChildren
153	}
154	return nil, NoChildren
155}
156
157func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
158	htmlBlock := node.(*ast.HTMLBlock)
159	lines := htmlBlock.Lines()
160	line, segment := reader.PeekLine()
161	var closurePattern []byte
162
163	switch htmlBlock.HTMLBlockType {
164	case ast.HTMLBlockType1:
165		if lines.Len() == 1 {
166			firstLine := lines.At(0)
167			if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) {
168				return Close
169			}
170		}
171		if htmlBlockType1CloseRegexp.Match(line) {
172			htmlBlock.ClosureLine = segment
173			reader.AdvanceToEOL()
174			return Close
175		}
176	case ast.HTMLBlockType2:
177		closurePattern = htmlBlockType2Close
178		fallthrough
179	case ast.HTMLBlockType3:
180		if closurePattern == nil {
181			closurePattern = htmlBlockType3Close
182		}
183		fallthrough
184	case ast.HTMLBlockType4:
185		if closurePattern == nil {
186			closurePattern = htmlBlockType4Close
187		}
188		fallthrough
189	case ast.HTMLBlockType5:
190		if closurePattern == nil {
191			closurePattern = htmlBlockType5Close
192		}
193
194		if lines.Len() == 1 {
195			firstLine := lines.At(0)
196			if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) {
197				return Close
198			}
199		}
200		if bytes.Contains(line, closurePattern) {
201			htmlBlock.ClosureLine = segment
202			reader.AdvanceToEOL()
203			return Close
204		}
205
206	case ast.HTMLBlockType6, ast.HTMLBlockType7:
207		if util.IsBlank(line) {
208			return Close
209		}
210	}
211	node.Lines().Append(segment)
212	reader.AdvanceToEOL()
213	return Continue | NoChildren
214}
215
216func (b *htmlBlockParser) Close(node ast.Node, reader text.Reader, pc Context) {
217	// nothing to do
218}
219
220func (b *htmlBlockParser) CanInterruptParagraph() bool {
221	return true
222}
223
224func (b *htmlBlockParser) CanAcceptIndentedLine() bool {
225	return false
226}