1package parser
  2
  3import (
  4	"bytes"
  5	"regexp"
  6	"strings"
  7
  8	"github.com/yuin/goldmark/ast"
  9	"github.com/yuin/goldmark/text"
 10	"github.com/yuin/goldmark/util"
 11)
 12
 13var allowedBlockTags = map[string]bool{
 14	"address":    true,
 15	"article":    true,
 16	"aside":      true,
 17	"base":       true,
 18	"basefont":   true,
 19	"blockquote": true,
 20	"body":       true,
 21	"caption":    true,
 22	"center":     true,
 23	"col":        true,
 24	"colgroup":   true,
 25	"dd":         true,
 26	"details":    true,
 27	"dialog":     true,
 28	"dir":        true,
 29	"div":        true,
 30	"dl":         true,
 31	"dt":         true,
 32	"fieldset":   true,
 33	"figcaption": true,
 34	"figure":     true,
 35	"footer":     true,
 36	"form":       true,
 37	"frame":      true,
 38	"frameset":   true,
 39	"h1":         true,
 40	"h2":         true,
 41	"h3":         true,
 42	"h4":         true,
 43	"h5":         true,
 44	"h6":         true,
 45	"head":       true,
 46	"header":     true,
 47	"hr":         true,
 48	"html":       true,
 49	"iframe":     true,
 50	"legend":     true,
 51	"li":         true,
 52	"link":       true,
 53	"main":       true,
 54	"menu":       true,
 55	"menuitem":   true,
 56	"meta":       true,
 57	"nav":        true,
 58	"noframes":   true,
 59	"ol":         true,
 60	"optgroup":   true,
 61	"option":     true,
 62	"p":          true,
 63	"param":      true,
 64	"section":    true,
 65	"source":     true,
 66	"summary":    true,
 67	"table":      true,
 68	"tbody":      true,
 69	"td":         true,
 70	"tfoot":      true,
 71	"th":         true,
 72	"thead":      true,
 73	"title":      true,
 74	"tr":         true,
 75	"track":      true,
 76	"ul":         true,
 77}
 78
 79var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style|textarea)(?:\s.*|>.*|/>.*|)(?:\r\n|\n)?$`)
 80var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style|textarea)>.*`)
 81
 82var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`)
 83var htmlBlockType2Close = []byte{'-', '-', '>'}
 84
 85var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`)
 86var htmlBlockType3Close = []byte{'?', '>'}
 87
 88var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*(?:\r\n|\n)?$`)
 89var htmlBlockType4Close = []byte{'>'}
 90
 91var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`)
 92var htmlBlockType5Close = []byte{']', ']', '>'}
 93
 94var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}<(?:/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(?:[ ].*|>.*|/>.*|)(?:\r\n|\n)?$`)
 95
 96var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(` + attributePattern + `*)[ ]*(?:>|/>)[ ]*(?:\r\n|\n)?$`)
 97
 98type htmlBlockParser struct {
 99}
100
101var defaultHTMLBlockParser = &htmlBlockParser{}
102
103// NewHTMLBlockParser return a new BlockParser that can parse html
104// blocks.
105func NewHTMLBlockParser() BlockParser {
106	return defaultHTMLBlockParser
107}
108
109func (b *htmlBlockParser) Trigger() []byte {
110	return []byte{'<'}
111}
112
113func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
114	var node *ast.HTMLBlock
115	line, segment := reader.PeekLine()
116	last := pc.LastOpenedBlock().Node
117	if pos := pc.BlockOffset(); pos < 0 || line[pos] != '<' {
118		return nil, NoChildren
119	}
120
121	if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil {
122		node = ast.NewHTMLBlock(ast.HTMLBlockType1)
123	} else if htmlBlockType2OpenRegexp.Match(line) {
124		node = ast.NewHTMLBlock(ast.HTMLBlockType2)
125	} else if htmlBlockType3OpenRegexp.Match(line) {
126		node = ast.NewHTMLBlock(ast.HTMLBlockType3)
127	} else if htmlBlockType4OpenRegexp.Match(line) {
128		node = ast.NewHTMLBlock(ast.HTMLBlockType4)
129	} else if htmlBlockType5OpenRegexp.Match(line) {
130		node = ast.NewHTMLBlock(ast.HTMLBlockType5)
131	} else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil {
132		isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/"))
133		hasAttr := match[6] != match[7]
134		tagName := strings.ToLower(string(line[match[4]:match[5]]))
135		_, ok := allowedBlockTags[tagName]
136		if ok {
137			node = ast.NewHTMLBlock(ast.HTMLBlockType6)
138		} else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph
139			node = ast.NewHTMLBlock(ast.HTMLBlockType7)
140		}
141	}
142	if node == nil {
143		if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil {
144			tagName := string(line[match[2]:match[3]])
145			_, ok := allowedBlockTags[strings.ToLower(tagName)]
146			if ok {
147				node = ast.NewHTMLBlock(ast.HTMLBlockType6)
148			}
149		}
150	}
151	if node != nil {
152		reader.Advance(segment.Len() - util.TrimRightSpaceLength(line))
153		node.Lines().Append(segment)
154		return node, NoChildren
155	}
156	return nil, NoChildren
157}
158
159func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
160	htmlBlock := node.(*ast.HTMLBlock)
161	lines := htmlBlock.Lines()
162	line, segment := reader.PeekLine()
163	var closurePattern []byte
164
165	switch htmlBlock.HTMLBlockType {
166	case ast.HTMLBlockType1:
167		if lines.Len() == 1 {
168			firstLine := lines.At(0)
169			if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) {
170				return Close
171			}
172		}
173		if htmlBlockType1CloseRegexp.Match(line) {
174			htmlBlock.ClosureLine = segment
175			reader.Advance(segment.Len() - util.TrimRightSpaceLength(line))
176			return Close
177		}
178	case ast.HTMLBlockType2:
179		closurePattern = htmlBlockType2Close
180		fallthrough
181	case ast.HTMLBlockType3:
182		if closurePattern == nil {
183			closurePattern = htmlBlockType3Close
184		}
185		fallthrough
186	case ast.HTMLBlockType4:
187		if closurePattern == nil {
188			closurePattern = htmlBlockType4Close
189		}
190		fallthrough
191	case ast.HTMLBlockType5:
192		if closurePattern == nil {
193			closurePattern = htmlBlockType5Close
194		}
195
196		if lines.Len() == 1 {
197			firstLine := lines.At(0)
198			if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) {
199				return Close
200			}
201		}
202		if bytes.Contains(line, closurePattern) {
203			htmlBlock.ClosureLine = segment
204			reader.Advance(segment.Len())
205			return Close
206		}
207
208	case ast.HTMLBlockType6, ast.HTMLBlockType7:
209		if util.IsBlank(line) {
210			return Close
211		}
212	}
213	node.Lines().Append(segment)
214	reader.Advance(segment.Len() - util.TrimRightSpaceLength(line))
215	return Continue | NoChildren
216}
217
218func (b *htmlBlockParser) Close(node ast.Node, reader text.Reader, pc Context) {
219	// nothing to do
220}
221
222func (b *htmlBlockParser) CanInterruptParagraph() bool {
223	return true
224}
225
226func (b *htmlBlockParser) CanAcceptIndentedLine() bool {
227	return false
228}