1// Package html implements renderer that outputs HTMLs.
  2package html
  3
  4import (
  5	"bytes"
  6	"fmt"
  7	"strconv"
  8	"unicode"
  9	"unicode/utf8"
 10
 11	"github.com/yuin/goldmark/ast"
 12	"github.com/yuin/goldmark/renderer"
 13	"github.com/yuin/goldmark/util"
 14)
 15
 16// A Config struct has configurations for the HTML based renderers.
 17type Config struct {
 18	Writer              Writer
 19	HardWraps           bool
 20	EastAsianLineBreaks EastAsianLineBreaks
 21	XHTML               bool
 22	Unsafe              bool
 23}
 24
 25// NewConfig returns a new Config with defaults.
 26func NewConfig() Config {
 27	return Config{
 28		Writer:              DefaultWriter,
 29		HardWraps:           false,
 30		EastAsianLineBreaks: EastAsianLineBreaksNone,
 31		XHTML:               false,
 32		Unsafe:              false,
 33	}
 34}
 35
 36// SetOption implements renderer.NodeRenderer.SetOption.
 37func (c *Config) SetOption(name renderer.OptionName, value any) {
 38	switch name {
 39	case optHardWraps:
 40		c.HardWraps = value.(bool)
 41	case optEastAsianLineBreaks:
 42		c.EastAsianLineBreaks = value.(EastAsianLineBreaks)
 43	case optXHTML:
 44		c.XHTML = value.(bool)
 45	case optUnsafe:
 46		c.Unsafe = value.(bool)
 47	case optTextWriter:
 48		c.Writer = value.(Writer)
 49	}
 50}
 51
 52// An Option interface sets options for HTML based renderers.
 53type Option interface {
 54	SetHTMLOption(*Config)
 55}
 56
 57// TextWriter is an option name used in WithWriter.
 58const optTextWriter renderer.OptionName = "Writer"
 59
 60type withWriter struct {
 61	value Writer
 62}
 63
 64func (o *withWriter) SetConfig(c *renderer.Config) {
 65	c.Options[optTextWriter] = o.value
 66}
 67
 68func (o *withWriter) SetHTMLOption(c *Config) {
 69	c.Writer = o.value
 70}
 71
 72// WithWriter is a functional option that allow you to set the given writer to
 73// the renderer.
 74func WithWriter(writer Writer) interface {
 75	renderer.Option
 76	Option
 77} {
 78	return &withWriter{writer}
 79}
 80
 81// HardWraps is an option name used in WithHardWraps.
 82const optHardWraps renderer.OptionName = "HardWraps"
 83
 84type withHardWraps struct {
 85}
 86
 87func (o *withHardWraps) SetConfig(c *renderer.Config) {
 88	c.Options[optHardWraps] = true
 89}
 90
 91func (o *withHardWraps) SetHTMLOption(c *Config) {
 92	c.HardWraps = true
 93}
 94
 95// WithHardWraps is a functional option that indicates whether softline breaks
 96// should be rendered as '<br>'.
 97func WithHardWraps() interface {
 98	renderer.Option
 99	Option
100} {
101	return &withHardWraps{}
102}
103
104// EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks.
105const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks"
106
107// A EastAsianLineBreaks is a style of east asian line breaks.
108type EastAsianLineBreaks int
109
110const (
111	//EastAsianLineBreaksNone renders line breaks as it is.
112	EastAsianLineBreaksNone EastAsianLineBreaks = iota
113	// EastAsianLineBreaksSimple follows east_asian_line_breaks in Pandoc.
114	EastAsianLineBreaksSimple
115	// EastAsianLineBreaksCSS3Draft follows CSS text level3 "Segment Break Transformation Rules" with some enhancements.
116	EastAsianLineBreaksCSS3Draft
117)
118
119func (b EastAsianLineBreaks) softLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
120	switch b {
121	case EastAsianLineBreaksNone:
122		return false
123	case EastAsianLineBreaksSimple:
124		return !(util.IsEastAsianWideRune(thisLastRune) && util.IsEastAsianWideRune(siblingFirstRune))
125	case EastAsianLineBreaksCSS3Draft:
126		return eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune, siblingFirstRune)
127	}
128	return false
129}
130
131func eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
132	// Implements CSS text level3 Segment Break Transformation Rules with some enhancements.
133	// References:
134	//   - https://www.w3.org/TR/2020/WD-css-text-3-20200429/#line-break-transform
135	//   - https://github.com/w3c/csswg-drafts/issues/5086
136
137	// Rule1:
138	//   If the character immediately before or immediately after the segment break is
139	//   the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
140	if thisLastRune == '\u200B' || siblingFirstRune == '\u200B' {
141		return false
142	}
143
144	// Rule2:
145	//   Otherwise, if the East Asian Width property of both the character before and after the segment break is
146	//   F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
147	thisLastRuneEastAsianWidth := util.EastAsianWidth(thisLastRune)
148	siblingFirstRuneEastAsianWidth := util.EastAsianWidth(siblingFirstRune)
149	if (thisLastRuneEastAsianWidth == "F" ||
150		thisLastRuneEastAsianWidth == "W" ||
151		thisLastRuneEastAsianWidth == "H") &&
152		(siblingFirstRuneEastAsianWidth == "F" ||
153			siblingFirstRuneEastAsianWidth == "W" ||
154			siblingFirstRuneEastAsianWidth == "H") {
155		return unicode.Is(unicode.Hangul, thisLastRune) || unicode.Is(unicode.Hangul, siblingFirstRune)
156	}
157
158	// Rule3:
159	//   Otherwise, if either the character before or after the segment break belongs to
160	//   the space-discarding character set and it is a Unicode Punctuation (P*) or U+3000,
161	//   then the segment break is removed.
162	if util.IsSpaceDiscardingUnicodeRune(thisLastRune) ||
163		unicode.IsPunct(thisLastRune) ||
164		thisLastRune == '\u3000' ||
165		util.IsSpaceDiscardingUnicodeRune(siblingFirstRune) ||
166		unicode.IsPunct(siblingFirstRune) ||
167		siblingFirstRune == '\u3000' {
168		return false
169	}
170
171	// Rule4:
172	//   Otherwise, the segment break is converted to a space (U+0020).
173	return true
174}
175
176type withEastAsianLineBreaks struct {
177	eastAsianLineBreaksStyle EastAsianLineBreaks
178}
179
180func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) {
181	c.Options[optEastAsianLineBreaks] = o.eastAsianLineBreaksStyle
182}
183
184func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) {
185	c.EastAsianLineBreaks = o.eastAsianLineBreaksStyle
186}
187
188// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
189// between east asian wide characters should be ignored.
190func WithEastAsianLineBreaks(e EastAsianLineBreaks) interface {
191	renderer.Option
192	Option
193} {
194	return &withEastAsianLineBreaks{e}
195}
196
197// XHTML is an option name used in WithXHTML.
198const optXHTML renderer.OptionName = "XHTML"
199
200type withXHTML struct {
201}
202
203func (o *withXHTML) SetConfig(c *renderer.Config) {
204	c.Options[optXHTML] = true
205}
206
207func (o *withXHTML) SetHTMLOption(c *Config) {
208	c.XHTML = true
209}
210
211// WithXHTML is a functional option indicates that nodes should be rendered in
212// xhtml instead of HTML5.
213func WithXHTML() interface {
214	Option
215	renderer.Option
216} {
217	return &withXHTML{}
218}
219
220// Unsafe is an option name used in WithUnsafe.
221const optUnsafe renderer.OptionName = "Unsafe"
222
223type withUnsafe struct {
224}
225
226func (o *withUnsafe) SetConfig(c *renderer.Config) {
227	c.Options[optUnsafe] = true
228}
229
230func (o *withUnsafe) SetHTMLOption(c *Config) {
231	c.Unsafe = true
232}
233
234// WithUnsafe is a functional option that renders dangerous contents
235// (raw htmls and potentially dangerous links) as it is.
236func WithUnsafe() interface {
237	renderer.Option
238	Option
239} {
240	return &withUnsafe{}
241}
242
243// A Renderer struct is an implementation of renderer.NodeRenderer that renders
244// nodes as (X)HTML.
245type Renderer struct {
246	Config
247}
248
249// NewRenderer returns a new Renderer with given options.
250func NewRenderer(opts ...Option) renderer.NodeRenderer {
251	r := &Renderer{
252		Config: NewConfig(),
253	}
254
255	for _, opt := range opts {
256		opt.SetHTMLOption(&r.Config)
257	}
258	return r
259}
260
261// RegisterFuncs implements NodeRenderer.RegisterFuncs .
262func (r *Renderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
263	// blocks
264
265	reg.Register(ast.KindDocument, r.renderDocument)
266	reg.Register(ast.KindHeading, r.renderHeading)
267	reg.Register(ast.KindBlockquote, r.renderBlockquote)
268	reg.Register(ast.KindCodeBlock, r.renderCodeBlock)
269	reg.Register(ast.KindFencedCodeBlock, r.renderFencedCodeBlock)
270	reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock)
271	reg.Register(ast.KindList, r.renderList)
272	reg.Register(ast.KindListItem, r.renderListItem)
273	reg.Register(ast.KindParagraph, r.renderParagraph)
274	reg.Register(ast.KindTextBlock, r.renderTextBlock)
275	reg.Register(ast.KindThematicBreak, r.renderThematicBreak)
276	reg.Register(ast.KindLinkReferenceDefinition, func(
277		_ util.BufWriter, _ []byte, _ ast.Node, _ bool) (ast.WalkStatus, error) {
278		return ast.WalkSkipChildren, nil
279	})
280
281	// inlines
282
283	reg.Register(ast.KindAutoLink, r.renderAutoLink)
284	reg.Register(ast.KindCodeSpan, r.renderCodeSpan)
285	reg.Register(ast.KindEmphasis, r.renderEmphasis)
286	reg.Register(ast.KindImage, r.renderImage)
287	reg.Register(ast.KindLink, r.renderLink)
288	reg.Register(ast.KindRawHTML, r.renderRawHTML)
289	reg.Register(ast.KindText, r.renderText)
290	reg.Register(ast.KindString, r.renderString)
291}
292
293func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) {
294	l := n.Lines().Len()
295	for i := range l {
296		line := n.Lines().At(i)
297		r.Writer.RawWrite(w, line.Value(source))
298	}
299}
300
301// GlobalAttributeFilter defines attribute names which any elements can have.
302var GlobalAttributeFilter = util.NewBytesFilterString(`accesskey,autocapitalize,autofocus,class,contenteditable,dir,draggable,enterkeyhint,hidden,id,inert,inputmode,is,itemid,itemprop,itemref,itemscope,itemtype,lang,part,role,slot,spellcheck,style,tabindex,title,translate`) // nolint:lll
303
304func (r *Renderer) renderDocument(
305	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
306	// nothing to do
307	return ast.WalkContinue, nil
308}
309
310// HeadingAttributeFilter defines attribute names which heading elements can have.
311var HeadingAttributeFilter = GlobalAttributeFilter
312
313func (r *Renderer) renderHeading(
314	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
315	n := node.(*ast.Heading)
316	if entering {
317		_, _ = w.WriteString("<h")
318		_ = w.WriteByte("0123456"[n.Level])
319		if n.Attributes() != nil {
320			RenderAttributes(w, node, HeadingAttributeFilter)
321		}
322		_ = w.WriteByte('>')
323	} else {
324		_, _ = w.WriteString("</h")
325		_ = w.WriteByte("0123456"[n.Level])
326		_, _ = w.WriteString(">\n")
327	}
328	return ast.WalkContinue, nil
329}
330
331// BlockquoteAttributeFilter defines attribute names which blockquote elements can have.
332var BlockquoteAttributeFilter = GlobalAttributeFilter.ExtendString(`cite`)
333
334func (r *Renderer) renderBlockquote(
335	w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
336	if entering {
337		if n.Attributes() != nil {
338			_, _ = w.WriteString("<blockquote")
339			RenderAttributes(w, n, BlockquoteAttributeFilter)
340			_ = w.WriteByte('>')
341		} else {
342			_, _ = w.WriteString("<blockquote>\n")
343		}
344	} else {
345		_, _ = w.WriteString("</blockquote>\n")
346	}
347	return ast.WalkContinue, nil
348}
349
350func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
351	if entering {
352		_, _ = w.WriteString("<pre><code>")
353		r.writeLines(w, source, n)
354	} else {
355		_, _ = w.WriteString("</code></pre>\n")
356	}
357	return ast.WalkContinue, nil
358}
359
360func (r *Renderer) renderFencedCodeBlock(
361	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
362	n := node.(*ast.FencedCodeBlock)
363	if entering {
364		_, _ = w.WriteString("<pre><code")
365		language := n.Language(source)
366		if language != nil {
367			_, _ = w.WriteString(" class=\"language-")
368			r.Writer.Write(w, language)
369			_, _ = w.WriteString("\"")
370		}
371		_ = w.WriteByte('>')
372		r.writeLines(w, source, n)
373	} else {
374		_, _ = w.WriteString("</code></pre>\n")
375	}
376	return ast.WalkContinue, nil
377}
378
379func (r *Renderer) renderHTMLBlock(
380	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
381	n := node.(*ast.HTMLBlock)
382	if entering {
383		if r.Unsafe {
384			l := n.Lines().Len()
385			for i := range l {
386				line := n.Lines().At(i)
387				r.Writer.SecureWrite(w, line.Value(source))
388			}
389		} else {
390			_, _ = w.WriteString("<!-- raw HTML omitted -->\n")
391		}
392	} else {
393		if n.HasClosure() {
394			if r.Unsafe {
395				closure := n.ClosureLine
396				r.Writer.SecureWrite(w, closure.Value(source))
397			} else {
398				_, _ = w.WriteString("<!-- raw HTML omitted -->\n")
399			}
400		}
401	}
402	return ast.WalkContinue, nil
403}
404
405// ListAttributeFilter defines attribute names which list elements can have.
406var ListAttributeFilter = GlobalAttributeFilter.ExtendString(`start,reversed,type`)
407
408func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
409	n := node.(*ast.List)
410	tag := "ul"
411	if n.IsOrdered() {
412		tag = "ol"
413	}
414	if entering {
415		_ = w.WriteByte('<')
416		_, _ = w.WriteString(tag)
417		if n.IsOrdered() && n.Start != 1 {
418			_, _ = fmt.Fprintf(w, " start=\"%d\"", n.Start)
419		}
420		if n.Attributes() != nil {
421			RenderAttributes(w, n, ListAttributeFilter)
422		}
423		_, _ = w.WriteString(">\n")
424	} else {
425		_, _ = w.WriteString("</")
426		_, _ = w.WriteString(tag)
427		_, _ = w.WriteString(">\n")
428	}
429	return ast.WalkContinue, nil
430}
431
432// ListItemAttributeFilter defines attribute names which list item elements can have.
433var ListItemAttributeFilter = GlobalAttributeFilter.ExtendString(`value`)
434
435func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
436	if entering {
437		if n.Attributes() != nil {
438			_, _ = w.WriteString("<li")
439			RenderAttributes(w, n, ListItemAttributeFilter)
440			_ = w.WriteByte('>')
441		} else {
442			_, _ = w.WriteString("<li>")
443		}
444		fc := n.FirstChild()
445		if fc != nil {
446			if _, ok := fc.(*ast.TextBlock); !ok {
447				_ = w.WriteByte('\n')
448			}
449		}
450	} else {
451		_, _ = w.WriteString("</li>\n")
452	}
453	return ast.WalkContinue, nil
454}
455
456// ParagraphAttributeFilter defines attribute names which paragraph elements can have.
457var ParagraphAttributeFilter = GlobalAttributeFilter
458
459func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
460	if entering {
461		if n.Attributes() != nil {
462			_, _ = w.WriteString("<p")
463			RenderAttributes(w, n, ParagraphAttributeFilter)
464			_ = w.WriteByte('>')
465		} else {
466			_, _ = w.WriteString("<p>")
467		}
468	} else {
469		_, _ = w.WriteString("</p>\n")
470	}
471	return ast.WalkContinue, nil
472}
473
474func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
475	if !entering {
476		if n.NextSibling() != nil && n.FirstChild() != nil {
477			_ = w.WriteByte('\n')
478		}
479	}
480	return ast.WalkContinue, nil
481}
482
483// ThematicAttributeFilter defines attribute names which hr elements can have.
484var ThematicAttributeFilter = GlobalAttributeFilter.ExtendString(`align,color,noshade,size,width`)
485
486func (r *Renderer) renderThematicBreak(
487	w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
488	if !entering {
489		return ast.WalkContinue, nil
490	}
491	_, _ = w.WriteString("<hr")
492	if n.Attributes() != nil {
493		RenderAttributes(w, n, ThematicAttributeFilter)
494	}
495	if r.XHTML {
496		_, _ = w.WriteString(" />\n")
497	} else {
498		_, _ = w.WriteString(">\n")
499	}
500	return ast.WalkContinue, nil
501}
502
503// LinkAttributeFilter defines attribute names which link elements can have.
504var LinkAttributeFilter = GlobalAttributeFilter.ExtendString(`download,href,lang,media,ping,referrerpolicy,rel,shape,target`) // nolint:lll
505
506func (r *Renderer) renderAutoLink(
507	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
508	n := node.(*ast.AutoLink)
509	if !entering {
510		return ast.WalkContinue, nil
511	}
512	_, _ = w.WriteString(`<a href="`)
513	url := util.URLEscape(n.URL(source), false)
514	label := n.Label(source)
515	if n.AutoLinkType == ast.AutoLinkEmail && !bytes.HasPrefix(bytes.ToLower(url), []byte("mailto:")) {
516		_, _ = w.WriteString("mailto:")
517	}
518	if r.Unsafe || !IsDangerousURL(url) {
519		_, _ = w.Write(util.EscapeHTML(url))
520	}
521	if n.Attributes() != nil {
522		_ = w.WriteByte('"')
523		RenderAttributes(w, n, LinkAttributeFilter)
524		_ = w.WriteByte('>')
525	} else {
526		_, _ = w.WriteString(`">`)
527	}
528	_, _ = w.Write(util.EscapeHTML(label))
529	_, _ = w.WriteString(`</a>`)
530	return ast.WalkContinue, nil
531}
532
533// CodeAttributeFilter defines attribute names which code elements can have.
534var CodeAttributeFilter = GlobalAttributeFilter
535
536func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
537	if entering {
538		if n.Attributes() != nil {
539			_, _ = w.WriteString("<code")
540			RenderAttributes(w, n, CodeAttributeFilter)
541			_ = w.WriteByte('>')
542		} else {
543			_, _ = w.WriteString("<code>")
544		}
545		for c := n.FirstChild(); c != nil; c = c.NextSibling() {
546			segment := c.(*ast.Text).Segment
547			value := segment.Value(source)
548			if bytes.HasSuffix(value, []byte("\n")) {
549				r.Writer.RawWrite(w, value[:len(value)-1])
550				r.Writer.RawWrite(w, []byte(" "))
551			} else {
552				r.Writer.RawWrite(w, value)
553			}
554		}
555		return ast.WalkSkipChildren, nil
556	}
557	_, _ = w.WriteString("</code>")
558	return ast.WalkContinue, nil
559}
560
561// EmphasisAttributeFilter defines attribute names which emphasis elements can have.
562var EmphasisAttributeFilter = GlobalAttributeFilter
563
564func (r *Renderer) renderEmphasis(
565	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
566	n := node.(*ast.Emphasis)
567	tag := "em"
568	if n.Level == 2 {
569		tag = "strong"
570	}
571	if entering {
572		_ = w.WriteByte('<')
573		_, _ = w.WriteString(tag)
574		if n.Attributes() != nil {
575			RenderAttributes(w, n, EmphasisAttributeFilter)
576		}
577		_ = w.WriteByte('>')
578	} else {
579		_, _ = w.WriteString("</")
580		_, _ = w.WriteString(tag)
581		_ = w.WriteByte('>')
582	}
583	return ast.WalkContinue, nil
584}
585
586func (r *Renderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
587	n := node.(*ast.Link)
588	if entering {
589		_, _ = w.WriteString("<a href=\"")
590		dest := util.URLEscape(n.Destination, true)
591		if r.Unsafe || !IsDangerousURL(dest) {
592			_, _ = w.Write(util.EscapeHTML(dest))
593		}
594		_ = w.WriteByte('"')
595		if n.Title != nil {
596			_, _ = w.WriteString(` title="`)
597			r.Writer.Write(w, n.Title)
598			_ = w.WriteByte('"')
599		}
600		if n.Attributes() != nil {
601			RenderAttributes(w, n, LinkAttributeFilter)
602		}
603		_ = w.WriteByte('>')
604	} else {
605		_, _ = w.WriteString("</a>")
606	}
607	return ast.WalkContinue, nil
608}
609
610// ImageAttributeFilter defines attribute names which image elements can have.
611var ImageAttributeFilter = GlobalAttributeFilter.ExtendString(`align,border,crossorigin,decoding,height,importance,intrinsicsize,ismap,loading,referrerpolicy,sizes,srcset,usemap,width`) // nolint: lll
612
613func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
614	if !entering {
615		return ast.WalkContinue, nil
616	}
617	n := node.(*ast.Image)
618	_, _ = w.WriteString("<img src=\"")
619	dest := util.URLEscape(n.Destination, true)
620	if r.Unsafe || !IsDangerousURL(dest) {
621		_, _ = w.Write(util.EscapeHTML(dest))
622	}
623	_, _ = w.WriteString(`" alt="`)
624	r.renderTexts(w, source, n)
625	_ = w.WriteByte('"')
626	if n.Title != nil {
627		_, _ = w.WriteString(` title="`)
628		r.Writer.Write(w, n.Title)
629		_ = w.WriteByte('"')
630	}
631	if n.Attributes() != nil {
632		RenderAttributes(w, n, ImageAttributeFilter)
633	}
634	if r.XHTML {
635		_, _ = w.WriteString(" />")
636	} else {
637		_, _ = w.WriteString(">")
638	}
639	return ast.WalkSkipChildren, nil
640}
641
642func (r *Renderer) renderRawHTML(
643	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
644	if !entering {
645		return ast.WalkSkipChildren, nil
646	}
647	if r.Unsafe {
648		n := node.(*ast.RawHTML)
649		l := n.Segments.Len()
650		for i := range l {
651			segment := n.Segments.At(i)
652			_, _ = w.Write(segment.Value(source))
653		}
654		return ast.WalkSkipChildren, nil
655	}
656	_, _ = w.WriteString("<!-- raw HTML omitted -->")
657	return ast.WalkSkipChildren, nil
658}
659
660func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
661	if !entering {
662		return ast.WalkContinue, nil
663	}
664	n := node.(*ast.Text)
665	segment := n.Segment
666	if n.IsRaw() {
667		r.Writer.RawWrite(w, segment.Value(source))
668	} else {
669		value := segment.Value(source)
670		r.Writer.Write(w, value)
671		if n.HardLineBreak() || (n.SoftLineBreak() && r.HardWraps) {
672			if r.XHTML {
673				_, _ = w.WriteString("<br />\n")
674			} else {
675				_, _ = w.WriteString("<br>\n")
676			}
677		} else if n.SoftLineBreak() {
678			if r.EastAsianLineBreaks != EastAsianLineBreaksNone && len(value) != 0 {
679				sibling := node.NextSibling()
680				if sibling != nil && sibling.Kind() == ast.KindText {
681					if siblingText := sibling.(*ast.Text).Value(source); len(siblingText) != 0 {
682						thisLastRune := util.ToRune(value, len(value)-1)
683						siblingFirstRune, _ := utf8.DecodeRune(siblingText)
684						if r.EastAsianLineBreaks.softLineBreak(thisLastRune, siblingFirstRune) {
685							_ = w.WriteByte('\n')
686						}
687					}
688				}
689			} else {
690				_ = w.WriteByte('\n')
691			}
692		}
693	}
694	return ast.WalkContinue, nil
695}
696
697func (r *Renderer) renderString(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
698	if !entering {
699		return ast.WalkContinue, nil
700	}
701	n := node.(*ast.String)
702	if n.IsCode() {
703		_, _ = w.Write(n.Value)
704	} else {
705		if n.IsRaw() {
706			r.Writer.RawWrite(w, n.Value)
707		} else {
708			r.Writer.Write(w, n.Value)
709		}
710	}
711	return ast.WalkContinue, nil
712}
713
714func (r *Renderer) renderTexts(w util.BufWriter, source []byte, n ast.Node) {
715	for c := n.FirstChild(); c != nil; c = c.NextSibling() {
716		if s, ok := c.(*ast.String); ok {
717			_, _ = r.renderString(w, source, s, true)
718		} else if t, ok := c.(*ast.Text); ok {
719			_, _ = r.renderText(w, source, t, true)
720		} else {
721			r.renderTexts(w, source, c)
722		}
723	}
724}
725
726var dataPrefix = []byte("data-")
727
728// RenderAttributes renders given node's attributes.
729// You can specify attribute names to render by the filter.
730// If filter is nil, RenderAttributes renders all attributes.
731func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter) {
732	for _, attr := range node.Attributes() {
733		if filter != nil && !filter.Contains(attr.Name) {
734			if !bytes.HasPrefix(attr.Name, dataPrefix) {
735				continue
736			}
737		}
738		_, _ = w.WriteString(" ")
739		_, _ = w.Write(attr.Name)
740		_, _ = w.WriteString(`="`)
741		// TODO: convert numeric values to strings
742		var value []byte
743		switch typed := attr.Value.(type) {
744		case []byte:
745			value = typed
746		case string:
747			value = util.StringToReadOnlyBytes(typed)
748		}
749		_, _ = w.Write(util.EscapeHTML(value))
750		_ = w.WriteByte('"')
751	}
752}
753
754// A Writer interface writes textual contents to a writer.
755type Writer interface {
756	// Write writes the given source to writer with resolving references and unescaping
757	// backslash escaped characters.
758	Write(writer util.BufWriter, source []byte)
759
760	// RawWrite writes the given source to writer without resolving references and
761	// unescaping backslash escaped characters.
762	RawWrite(writer util.BufWriter, source []byte)
763
764	// SecureWrite writes the given source to writer with replacing insecure characters.
765	SecureWrite(writer util.BufWriter, source []byte)
766}
767
768var replacementCharacter = []byte("\ufffd")
769
770// A WriterConfig struct has configurations for the HTML based writers.
771type WriterConfig struct {
772	// EscapedSpace is an option that indicates that a '\' escaped half-space(0x20) should not be rendered.
773	EscapedSpace bool
774}
775
776// A WriterOption interface sets options for HTML based writers.
777type WriterOption func(*WriterConfig)
778
779// WithEscapedSpace is a WriterOption indicates that a '\' escaped half-space(0x20) should not be rendered.
780func WithEscapedSpace() WriterOption {
781	return func(c *WriterConfig) {
782		c.EscapedSpace = true
783	}
784}
785
786type defaultWriter struct {
787	WriterConfig
788}
789
790// NewWriter returns a new Writer.
791func NewWriter(opts ...WriterOption) Writer {
792	w := &defaultWriter{}
793	for _, opt := range opts {
794		opt(&w.WriterConfig)
795	}
796	return w
797}
798
799func escapeRune(writer util.BufWriter, r rune) {
800	if r < 256 {
801		v := util.EscapeHTMLByte(byte(r))
802		if v != nil {
803			_, _ = writer.Write(v)
804			return
805		}
806	}
807	_, _ = writer.WriteRune(util.ToValidRune(r))
808}
809
810func (d *defaultWriter) SecureWrite(writer util.BufWriter, source []byte) {
811	n := 0
812	l := len(source)
813	for i := range l {
814		if source[i] == '\u0000' {
815			_, _ = writer.Write(source[i-n : i])
816			n = 0
817			_, _ = writer.Write(replacementCharacter)
818			continue
819		}
820		n++
821	}
822	if n != 0 {
823		_, _ = writer.Write(source[l-n:])
824	}
825}
826
827func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) {
828	n := 0
829	l := len(source)
830	for i := range l {
831		v := util.EscapeHTMLByte(source[i])
832		if v != nil {
833			_, _ = writer.Write(source[i-n : i])
834			n = 0
835			_, _ = writer.Write(v)
836			continue
837		}
838		n++
839	}
840	if n != 0 {
841		_, _ = writer.Write(source[l-n:])
842	}
843}
844
845func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
846	escaped := false
847	var ok bool
848	limit := len(source)
849	n := 0
850	for i := 0; i < limit; i++ {
851		c := source[i]
852		if escaped {
853			if util.IsPunct(c) {
854				d.RawWrite(writer, source[n:i-1])
855				n = i
856				escaped = false
857				continue
858			}
859			if d.EscapedSpace && c == ' ' {
860				d.RawWrite(writer, source[n:i-1])
861				n = i + 1
862				escaped = false
863				continue
864			}
865		}
866		if c == '\x00' {
867			d.RawWrite(writer, source[n:i])
868			d.RawWrite(writer, replacementCharacter)
869			n = i + 1
870			escaped = false
871			continue
872		}
873		if c == '&' {
874			pos := i
875			next := i + 1
876			if next < limit && source[next] == '#' {
877				nnext := next + 1
878				if nnext < limit {
879					nc := source[nnext]
880					// code point like #x22;
881					if nnext < limit && nc == 'x' || nc == 'X' {
882						start := nnext + 1
883						i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsHexDecimal)
884						if ok && i < limit && source[i] == ';' && i-start < 7 {
885							v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32)
886							d.RawWrite(writer, source[n:pos])
887							n = i + 1
888							escapeRune(writer, rune(v))
889							continue
890						}
891						// code point like #1234;
892					} else if nc >= '0' && nc <= '9' {
893						start := nnext
894						i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsNumeric)
895						if ok && i < limit && i-start < 8 && source[i] == ';' {
896							v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 10, 32)
897							d.RawWrite(writer, source[n:pos])
898							n = i + 1
899							escapeRune(writer, rune(v))
900							continue
901						}
902					}
903				}
904			} else {
905				start := next
906				i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsAlphaNumeric)
907				// entity reference
908				if ok && i < limit && source[i] == ';' {
909					name := util.BytesToReadOnlyString(source[start:i])
910					entity, ok := util.LookUpHTML5EntityByName(name)
911					if ok {
912						d.RawWrite(writer, source[n:pos])
913						n = i + 1
914						d.RawWrite(writer, entity.Characters)
915						continue
916					}
917				}
918			}
919			i = next - 1
920		}
921		if c == '\\' {
922			escaped = true
923			continue
924		}
925		escaped = false
926	}
927	d.RawWrite(writer, source[n:])
928}
929
930// DefaultWriter is a default instance of the Writer.
931var DefaultWriter = NewWriter()
932
933var bDataImage = []byte("data:image/")
934var bPng = []byte("png;")
935var bGif = []byte("gif;")
936var bJpeg = []byte("jpeg;")
937var bWebp = []byte("webp;")
938var bSvg = []byte("svg+xml;")
939var bJs = []byte("javascript:")
940var bVb = []byte("vbscript:")
941var bFile = []byte("file:")
942var bData = []byte("data:")
943
944func hasPrefix(s, prefix []byte) bool {
945	return len(s) >= len(prefix) && bytes.Equal(bytes.ToLower(s[0:len(prefix)]), bytes.ToLower(prefix))
946}
947
948// IsDangerousURL returns true if the given url seems a potentially dangerous url,
949// otherwise false.
950func IsDangerousURL(url []byte) bool {
951	if hasPrefix(url, bDataImage) && len(url) >= 11 {
952		v := url[11:]
953		if hasPrefix(v, bPng) || hasPrefix(v, bGif) ||
954			hasPrefix(v, bJpeg) || hasPrefix(v, bWebp) ||
955			hasPrefix(v, bSvg) {
956			return false
957		}
958		return true
959	}
960	return hasPrefix(url, bJs) || hasPrefix(url, bVb) ||
961		hasPrefix(url, bFile) || hasPrefix(url, bData)
962}