1// Package html implements renderer that outputs HTMLs.
2package html
3
4import (
5 "bytes"
6 "fmt"
7 "strconv"
8 "unicode"
9 "unicode/utf8"
10
11 "github.com/yuin/goldmark/ast"
12 "github.com/yuin/goldmark/renderer"
13 "github.com/yuin/goldmark/util"
14)
15
16// A Config struct has configurations for the HTML based renderers.
17type Config struct {
18 Writer Writer
19 HardWraps bool
20 EastAsianLineBreaks EastAsianLineBreaks
21 XHTML bool
22 Unsafe bool
23}
24
25// NewConfig returns a new Config with defaults.
26func NewConfig() Config {
27 return Config{
28 Writer: DefaultWriter,
29 HardWraps: false,
30 EastAsianLineBreaks: EastAsianLineBreaksNone,
31 XHTML: false,
32 Unsafe: false,
33 }
34}
35
36// SetOption implements renderer.NodeRenderer.SetOption.
37func (c *Config) SetOption(name renderer.OptionName, value any) {
38 switch name {
39 case optHardWraps:
40 c.HardWraps = value.(bool)
41 case optEastAsianLineBreaks:
42 c.EastAsianLineBreaks = value.(EastAsianLineBreaks)
43 case optXHTML:
44 c.XHTML = value.(bool)
45 case optUnsafe:
46 c.Unsafe = value.(bool)
47 case optTextWriter:
48 c.Writer = value.(Writer)
49 }
50}
51
52// An Option interface sets options for HTML based renderers.
53type Option interface {
54 SetHTMLOption(*Config)
55}
56
57// TextWriter is an option name used in WithWriter.
58const optTextWriter renderer.OptionName = "Writer"
59
60type withWriter struct {
61 value Writer
62}
63
64func (o *withWriter) SetConfig(c *renderer.Config) {
65 c.Options[optTextWriter] = o.value
66}
67
68func (o *withWriter) SetHTMLOption(c *Config) {
69 c.Writer = o.value
70}
71
72// WithWriter is a functional option that allow you to set the given writer to
73// the renderer.
74func WithWriter(writer Writer) interface {
75 renderer.Option
76 Option
77} {
78 return &withWriter{writer}
79}
80
81// HardWraps is an option name used in WithHardWraps.
82const optHardWraps renderer.OptionName = "HardWraps"
83
84type withHardWraps struct {
85}
86
87func (o *withHardWraps) SetConfig(c *renderer.Config) {
88 c.Options[optHardWraps] = true
89}
90
91func (o *withHardWraps) SetHTMLOption(c *Config) {
92 c.HardWraps = true
93}
94
95// WithHardWraps is a functional option that indicates whether softline breaks
96// should be rendered as '<br>'.
97func WithHardWraps() interface {
98 renderer.Option
99 Option
100} {
101 return &withHardWraps{}
102}
103
104// EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks.
105const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks"
106
107// A EastAsianLineBreaks is a style of east asian line breaks.
108type EastAsianLineBreaks int
109
110const (
111 //EastAsianLineBreaksNone renders line breaks as it is.
112 EastAsianLineBreaksNone EastAsianLineBreaks = iota
113 // EastAsianLineBreaksSimple follows east_asian_line_breaks in Pandoc.
114 EastAsianLineBreaksSimple
115 // EastAsianLineBreaksCSS3Draft follows CSS text level3 "Segment Break Transformation Rules" with some enhancements.
116 EastAsianLineBreaksCSS3Draft
117)
118
119func (b EastAsianLineBreaks) softLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
120 switch b {
121 case EastAsianLineBreaksNone:
122 return false
123 case EastAsianLineBreaksSimple:
124 return !(util.IsEastAsianWideRune(thisLastRune) && util.IsEastAsianWideRune(siblingFirstRune))
125 case EastAsianLineBreaksCSS3Draft:
126 return eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune, siblingFirstRune)
127 }
128 return false
129}
130
131func eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
132 // Implements CSS text level3 Segment Break Transformation Rules with some enhancements.
133 // References:
134 // - https://www.w3.org/TR/2020/WD-css-text-3-20200429/#line-break-transform
135 // - https://github.com/w3c/csswg-drafts/issues/5086
136
137 // Rule1:
138 // If the character immediately before or immediately after the segment break is
139 // the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
140 if thisLastRune == '\u200B' || siblingFirstRune == '\u200B' {
141 return false
142 }
143
144 // Rule2:
145 // Otherwise, if the East Asian Width property of both the character before and after the segment break is
146 // F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
147 thisLastRuneEastAsianWidth := util.EastAsianWidth(thisLastRune)
148 siblingFirstRuneEastAsianWidth := util.EastAsianWidth(siblingFirstRune)
149 if (thisLastRuneEastAsianWidth == "F" ||
150 thisLastRuneEastAsianWidth == "W" ||
151 thisLastRuneEastAsianWidth == "H") &&
152 (siblingFirstRuneEastAsianWidth == "F" ||
153 siblingFirstRuneEastAsianWidth == "W" ||
154 siblingFirstRuneEastAsianWidth == "H") {
155 return unicode.Is(unicode.Hangul, thisLastRune) || unicode.Is(unicode.Hangul, siblingFirstRune)
156 }
157
158 // Rule3:
159 // Otherwise, if either the character before or after the segment break belongs to
160 // the space-discarding character set and it is a Unicode Punctuation (P*) or U+3000,
161 // then the segment break is removed.
162 if util.IsSpaceDiscardingUnicodeRune(thisLastRune) ||
163 unicode.IsPunct(thisLastRune) ||
164 thisLastRune == '\u3000' ||
165 util.IsSpaceDiscardingUnicodeRune(siblingFirstRune) ||
166 unicode.IsPunct(siblingFirstRune) ||
167 siblingFirstRune == '\u3000' {
168 return false
169 }
170
171 // Rule4:
172 // Otherwise, the segment break is converted to a space (U+0020).
173 return true
174}
175
176type withEastAsianLineBreaks struct {
177 eastAsianLineBreaksStyle EastAsianLineBreaks
178}
179
180func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) {
181 c.Options[optEastAsianLineBreaks] = o.eastAsianLineBreaksStyle
182}
183
184func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) {
185 c.EastAsianLineBreaks = o.eastAsianLineBreaksStyle
186}
187
188// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
189// between east asian wide characters should be ignored.
190func WithEastAsianLineBreaks(e EastAsianLineBreaks) interface {
191 renderer.Option
192 Option
193} {
194 return &withEastAsianLineBreaks{e}
195}
196
197// XHTML is an option name used in WithXHTML.
198const optXHTML renderer.OptionName = "XHTML"
199
200type withXHTML struct {
201}
202
203func (o *withXHTML) SetConfig(c *renderer.Config) {
204 c.Options[optXHTML] = true
205}
206
207func (o *withXHTML) SetHTMLOption(c *Config) {
208 c.XHTML = true
209}
210
211// WithXHTML is a functional option indicates that nodes should be rendered in
212// xhtml instead of HTML5.
213func WithXHTML() interface {
214 Option
215 renderer.Option
216} {
217 return &withXHTML{}
218}
219
220// Unsafe is an option name used in WithUnsafe.
221const optUnsafe renderer.OptionName = "Unsafe"
222
223type withUnsafe struct {
224}
225
226func (o *withUnsafe) SetConfig(c *renderer.Config) {
227 c.Options[optUnsafe] = true
228}
229
230func (o *withUnsafe) SetHTMLOption(c *Config) {
231 c.Unsafe = true
232}
233
234// WithUnsafe is a functional option that renders dangerous contents
235// (raw htmls and potentially dangerous links) as it is.
236func WithUnsafe() interface {
237 renderer.Option
238 Option
239} {
240 return &withUnsafe{}
241}
242
243// A Renderer struct is an implementation of renderer.NodeRenderer that renders
244// nodes as (X)HTML.
245type Renderer struct {
246 Config
247}
248
249// NewRenderer returns a new Renderer with given options.
250func NewRenderer(opts ...Option) renderer.NodeRenderer {
251 r := &Renderer{
252 Config: NewConfig(),
253 }
254
255 for _, opt := range opts {
256 opt.SetHTMLOption(&r.Config)
257 }
258 return r
259}
260
261// RegisterFuncs implements NodeRenderer.RegisterFuncs .
262func (r *Renderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
263 // blocks
264
265 reg.Register(ast.KindDocument, r.renderDocument)
266 reg.Register(ast.KindHeading, r.renderHeading)
267 reg.Register(ast.KindBlockquote, r.renderBlockquote)
268 reg.Register(ast.KindCodeBlock, r.renderCodeBlock)
269 reg.Register(ast.KindFencedCodeBlock, r.renderFencedCodeBlock)
270 reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock)
271 reg.Register(ast.KindList, r.renderList)
272 reg.Register(ast.KindListItem, r.renderListItem)
273 reg.Register(ast.KindParagraph, r.renderParagraph)
274 reg.Register(ast.KindTextBlock, r.renderTextBlock)
275 reg.Register(ast.KindThematicBreak, r.renderThematicBreak)
276 reg.Register(ast.KindLinkReferenceDefinition, func(
277 _ util.BufWriter, _ []byte, _ ast.Node, _ bool) (ast.WalkStatus, error) {
278 return ast.WalkSkipChildren, nil
279 })
280
281 // inlines
282
283 reg.Register(ast.KindAutoLink, r.renderAutoLink)
284 reg.Register(ast.KindCodeSpan, r.renderCodeSpan)
285 reg.Register(ast.KindEmphasis, r.renderEmphasis)
286 reg.Register(ast.KindImage, r.renderImage)
287 reg.Register(ast.KindLink, r.renderLink)
288 reg.Register(ast.KindRawHTML, r.renderRawHTML)
289 reg.Register(ast.KindText, r.renderText)
290 reg.Register(ast.KindString, r.renderString)
291}
292
293func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) {
294 l := n.Lines().Len()
295 for i := range l {
296 line := n.Lines().At(i)
297 r.Writer.RawWrite(w, line.Value(source))
298 }
299}
300
301// GlobalAttributeFilter defines attribute names which any elements can have.
302var GlobalAttributeFilter = util.NewBytesFilterString(`accesskey,autocapitalize,autofocus,class,contenteditable,dir,draggable,enterkeyhint,hidden,id,inert,inputmode,is,itemid,itemprop,itemref,itemscope,itemtype,lang,part,role,slot,spellcheck,style,tabindex,title,translate`) // nolint:lll
303
304func (r *Renderer) renderDocument(
305 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
306 // nothing to do
307 return ast.WalkContinue, nil
308}
309
310// HeadingAttributeFilter defines attribute names which heading elements can have.
311var HeadingAttributeFilter = GlobalAttributeFilter
312
313func (r *Renderer) renderHeading(
314 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
315 n := node.(*ast.Heading)
316 if entering {
317 _, _ = w.WriteString("<h")
318 _ = w.WriteByte("0123456"[n.Level])
319 if n.Attributes() != nil {
320 RenderAttributes(w, node, HeadingAttributeFilter)
321 }
322 _ = w.WriteByte('>')
323 } else {
324 _, _ = w.WriteString("</h")
325 _ = w.WriteByte("0123456"[n.Level])
326 _, _ = w.WriteString(">\n")
327 }
328 return ast.WalkContinue, nil
329}
330
331// BlockquoteAttributeFilter defines attribute names which blockquote elements can have.
332var BlockquoteAttributeFilter = GlobalAttributeFilter.ExtendString(`cite`)
333
334func (r *Renderer) renderBlockquote(
335 w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
336 if entering {
337 if n.Attributes() != nil {
338 _, _ = w.WriteString("<blockquote")
339 RenderAttributes(w, n, BlockquoteAttributeFilter)
340 _ = w.WriteByte('>')
341 } else {
342 _, _ = w.WriteString("<blockquote>\n")
343 }
344 } else {
345 _, _ = w.WriteString("</blockquote>\n")
346 }
347 return ast.WalkContinue, nil
348}
349
350func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
351 if entering {
352 _, _ = w.WriteString("<pre><code>")
353 r.writeLines(w, source, n)
354 } else {
355 _, _ = w.WriteString("</code></pre>\n")
356 }
357 return ast.WalkContinue, nil
358}
359
360func (r *Renderer) renderFencedCodeBlock(
361 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
362 n := node.(*ast.FencedCodeBlock)
363 if entering {
364 _, _ = w.WriteString("<pre><code")
365 language := n.Language(source)
366 if language != nil {
367 _, _ = w.WriteString(" class=\"language-")
368 r.Writer.Write(w, language)
369 _, _ = w.WriteString("\"")
370 }
371 _ = w.WriteByte('>')
372 r.writeLines(w, source, n)
373 } else {
374 _, _ = w.WriteString("</code></pre>\n")
375 }
376 return ast.WalkContinue, nil
377}
378
379func (r *Renderer) renderHTMLBlock(
380 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
381 n := node.(*ast.HTMLBlock)
382 if entering {
383 if r.Unsafe {
384 l := n.Lines().Len()
385 for i := range l {
386 line := n.Lines().At(i)
387 r.Writer.SecureWrite(w, line.Value(source))
388 }
389 } else {
390 _, _ = w.WriteString("<!-- raw HTML omitted -->\n")
391 }
392 } else {
393 if n.HasClosure() {
394 if r.Unsafe {
395 closure := n.ClosureLine
396 r.Writer.SecureWrite(w, closure.Value(source))
397 } else {
398 _, _ = w.WriteString("<!-- raw HTML omitted -->\n")
399 }
400 }
401 }
402 return ast.WalkContinue, nil
403}
404
405// ListAttributeFilter defines attribute names which list elements can have.
406var ListAttributeFilter = GlobalAttributeFilter.ExtendString(`start,reversed,type`)
407
408func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
409 n := node.(*ast.List)
410 tag := "ul"
411 if n.IsOrdered() {
412 tag = "ol"
413 }
414 if entering {
415 _ = w.WriteByte('<')
416 _, _ = w.WriteString(tag)
417 if n.IsOrdered() && n.Start != 1 {
418 _, _ = fmt.Fprintf(w, " start=\"%d\"", n.Start)
419 }
420 if n.Attributes() != nil {
421 RenderAttributes(w, n, ListAttributeFilter)
422 }
423 _, _ = w.WriteString(">\n")
424 } else {
425 _, _ = w.WriteString("</")
426 _, _ = w.WriteString(tag)
427 _, _ = w.WriteString(">\n")
428 }
429 return ast.WalkContinue, nil
430}
431
432// ListItemAttributeFilter defines attribute names which list item elements can have.
433var ListItemAttributeFilter = GlobalAttributeFilter.ExtendString(`value`)
434
435func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
436 if entering {
437 if n.Attributes() != nil {
438 _, _ = w.WriteString("<li")
439 RenderAttributes(w, n, ListItemAttributeFilter)
440 _ = w.WriteByte('>')
441 } else {
442 _, _ = w.WriteString("<li>")
443 }
444 fc := n.FirstChild()
445 if fc != nil {
446 if _, ok := fc.(*ast.TextBlock); !ok {
447 _ = w.WriteByte('\n')
448 }
449 }
450 } else {
451 _, _ = w.WriteString("</li>\n")
452 }
453 return ast.WalkContinue, nil
454}
455
456// ParagraphAttributeFilter defines attribute names which paragraph elements can have.
457var ParagraphAttributeFilter = GlobalAttributeFilter
458
459func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
460 if entering {
461 if n.Attributes() != nil {
462 _, _ = w.WriteString("<p")
463 RenderAttributes(w, n, ParagraphAttributeFilter)
464 _ = w.WriteByte('>')
465 } else {
466 _, _ = w.WriteString("<p>")
467 }
468 } else {
469 _, _ = w.WriteString("</p>\n")
470 }
471 return ast.WalkContinue, nil
472}
473
474func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
475 if !entering {
476 if n.NextSibling() != nil && n.FirstChild() != nil {
477 _ = w.WriteByte('\n')
478 }
479 }
480 return ast.WalkContinue, nil
481}
482
483// ThematicAttributeFilter defines attribute names which hr elements can have.
484var ThematicAttributeFilter = GlobalAttributeFilter.ExtendString(`align,color,noshade,size,width`)
485
486func (r *Renderer) renderThematicBreak(
487 w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
488 if !entering {
489 return ast.WalkContinue, nil
490 }
491 _, _ = w.WriteString("<hr")
492 if n.Attributes() != nil {
493 RenderAttributes(w, n, ThematicAttributeFilter)
494 }
495 if r.XHTML {
496 _, _ = w.WriteString(" />\n")
497 } else {
498 _, _ = w.WriteString(">\n")
499 }
500 return ast.WalkContinue, nil
501}
502
503// LinkAttributeFilter defines attribute names which link elements can have.
504var LinkAttributeFilter = GlobalAttributeFilter.ExtendString(`download,href,lang,media,ping,referrerpolicy,rel,shape,target`) // nolint:lll
505
506func (r *Renderer) renderAutoLink(
507 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
508 n := node.(*ast.AutoLink)
509 if !entering {
510 return ast.WalkContinue, nil
511 }
512 _, _ = w.WriteString(`<a href="`)
513 url := util.URLEscape(n.URL(source), false)
514 label := n.Label(source)
515 if n.AutoLinkType == ast.AutoLinkEmail && !bytes.HasPrefix(bytes.ToLower(url), []byte("mailto:")) {
516 _, _ = w.WriteString("mailto:")
517 }
518 if r.Unsafe || !IsDangerousURL(url) {
519 _, _ = w.Write(util.EscapeHTML(url))
520 }
521 if n.Attributes() != nil {
522 _ = w.WriteByte('"')
523 RenderAttributes(w, n, LinkAttributeFilter)
524 _ = w.WriteByte('>')
525 } else {
526 _, _ = w.WriteString(`">`)
527 }
528 _, _ = w.Write(util.EscapeHTML(label))
529 _, _ = w.WriteString(`</a>`)
530 return ast.WalkContinue, nil
531}
532
533// CodeAttributeFilter defines attribute names which code elements can have.
534var CodeAttributeFilter = GlobalAttributeFilter
535
536func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
537 if entering {
538 if n.Attributes() != nil {
539 _, _ = w.WriteString("<code")
540 RenderAttributes(w, n, CodeAttributeFilter)
541 _ = w.WriteByte('>')
542 } else {
543 _, _ = w.WriteString("<code>")
544 }
545 for c := n.FirstChild(); c != nil; c = c.NextSibling() {
546 segment := c.(*ast.Text).Segment
547 value := segment.Value(source)
548 if bytes.HasSuffix(value, []byte("\n")) {
549 r.Writer.RawWrite(w, value[:len(value)-1])
550 r.Writer.RawWrite(w, []byte(" "))
551 } else {
552 r.Writer.RawWrite(w, value)
553 }
554 }
555 return ast.WalkSkipChildren, nil
556 }
557 _, _ = w.WriteString("</code>")
558 return ast.WalkContinue, nil
559}
560
561// EmphasisAttributeFilter defines attribute names which emphasis elements can have.
562var EmphasisAttributeFilter = GlobalAttributeFilter
563
564func (r *Renderer) renderEmphasis(
565 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
566 n := node.(*ast.Emphasis)
567 tag := "em"
568 if n.Level == 2 {
569 tag = "strong"
570 }
571 if entering {
572 _ = w.WriteByte('<')
573 _, _ = w.WriteString(tag)
574 if n.Attributes() != nil {
575 RenderAttributes(w, n, EmphasisAttributeFilter)
576 }
577 _ = w.WriteByte('>')
578 } else {
579 _, _ = w.WriteString("</")
580 _, _ = w.WriteString(tag)
581 _ = w.WriteByte('>')
582 }
583 return ast.WalkContinue, nil
584}
585
586func (r *Renderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
587 n := node.(*ast.Link)
588 if entering {
589 _, _ = w.WriteString("<a href=\"")
590 dest := util.URLEscape(n.Destination, true)
591 if r.Unsafe || !IsDangerousURL(dest) {
592 _, _ = w.Write(util.EscapeHTML(dest))
593 }
594 _ = w.WriteByte('"')
595 if n.Title != nil {
596 _, _ = w.WriteString(` title="`)
597 r.Writer.Write(w, n.Title)
598 _ = w.WriteByte('"')
599 }
600 if n.Attributes() != nil {
601 RenderAttributes(w, n, LinkAttributeFilter)
602 }
603 _ = w.WriteByte('>')
604 } else {
605 _, _ = w.WriteString("</a>")
606 }
607 return ast.WalkContinue, nil
608}
609
610// ImageAttributeFilter defines attribute names which image elements can have.
611var ImageAttributeFilter = GlobalAttributeFilter.ExtendString(`align,border,crossorigin,decoding,height,importance,intrinsicsize,ismap,loading,referrerpolicy,sizes,srcset,usemap,width`) // nolint: lll
612
613func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
614 if !entering {
615 return ast.WalkContinue, nil
616 }
617 n := node.(*ast.Image)
618 _, _ = w.WriteString("<img src=\"")
619 dest := util.URLEscape(n.Destination, true)
620 if r.Unsafe || !IsDangerousURL(dest) {
621 _, _ = w.Write(util.EscapeHTML(dest))
622 }
623 _, _ = w.WriteString(`" alt="`)
624 r.renderTexts(w, source, n)
625 _ = w.WriteByte('"')
626 if n.Title != nil {
627 _, _ = w.WriteString(` title="`)
628 r.Writer.Write(w, n.Title)
629 _ = w.WriteByte('"')
630 }
631 if n.Attributes() != nil {
632 RenderAttributes(w, n, ImageAttributeFilter)
633 }
634 if r.XHTML {
635 _, _ = w.WriteString(" />")
636 } else {
637 _, _ = w.WriteString(">")
638 }
639 return ast.WalkSkipChildren, nil
640}
641
642func (r *Renderer) renderRawHTML(
643 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
644 if !entering {
645 return ast.WalkSkipChildren, nil
646 }
647 if r.Unsafe {
648 n := node.(*ast.RawHTML)
649 l := n.Segments.Len()
650 for i := range l {
651 segment := n.Segments.At(i)
652 _, _ = w.Write(segment.Value(source))
653 }
654 return ast.WalkSkipChildren, nil
655 }
656 _, _ = w.WriteString("<!-- raw HTML omitted -->")
657 return ast.WalkSkipChildren, nil
658}
659
660func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
661 if !entering {
662 return ast.WalkContinue, nil
663 }
664 n := node.(*ast.Text)
665 segment := n.Segment
666 if n.IsRaw() {
667 r.Writer.RawWrite(w, segment.Value(source))
668 } else {
669 value := segment.Value(source)
670 r.Writer.Write(w, value)
671 if n.HardLineBreak() || (n.SoftLineBreak() && r.HardWraps) {
672 if r.XHTML {
673 _, _ = w.WriteString("<br />\n")
674 } else {
675 _, _ = w.WriteString("<br>\n")
676 }
677 } else if n.SoftLineBreak() {
678 if r.EastAsianLineBreaks != EastAsianLineBreaksNone && len(value) != 0 {
679 sibling := node.NextSibling()
680 if sibling != nil && sibling.Kind() == ast.KindText {
681 if siblingText := sibling.(*ast.Text).Value(source); len(siblingText) != 0 {
682 thisLastRune := util.ToRune(value, len(value)-1)
683 siblingFirstRune, _ := utf8.DecodeRune(siblingText)
684 if r.EastAsianLineBreaks.softLineBreak(thisLastRune, siblingFirstRune) {
685 _ = w.WriteByte('\n')
686 }
687 }
688 }
689 } else {
690 _ = w.WriteByte('\n')
691 }
692 }
693 }
694 return ast.WalkContinue, nil
695}
696
697func (r *Renderer) renderString(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
698 if !entering {
699 return ast.WalkContinue, nil
700 }
701 n := node.(*ast.String)
702 if n.IsCode() {
703 _, _ = w.Write(n.Value)
704 } else {
705 if n.IsRaw() {
706 r.Writer.RawWrite(w, n.Value)
707 } else {
708 r.Writer.Write(w, n.Value)
709 }
710 }
711 return ast.WalkContinue, nil
712}
713
714func (r *Renderer) renderTexts(w util.BufWriter, source []byte, n ast.Node) {
715 for c := n.FirstChild(); c != nil; c = c.NextSibling() {
716 if s, ok := c.(*ast.String); ok {
717 _, _ = r.renderString(w, source, s, true)
718 } else if t, ok := c.(*ast.Text); ok {
719 _, _ = r.renderText(w, source, t, true)
720 } else {
721 r.renderTexts(w, source, c)
722 }
723 }
724}
725
726var dataPrefix = []byte("data-")
727
728// RenderAttributes renders given node's attributes.
729// You can specify attribute names to render by the filter.
730// If filter is nil, RenderAttributes renders all attributes.
731func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter) {
732 for _, attr := range node.Attributes() {
733 if filter != nil && !filter.Contains(attr.Name) {
734 if !bytes.HasPrefix(attr.Name, dataPrefix) {
735 continue
736 }
737 }
738 _, _ = w.WriteString(" ")
739 _, _ = w.Write(attr.Name)
740 _, _ = w.WriteString(`="`)
741 // TODO: convert numeric values to strings
742 var value []byte
743 switch typed := attr.Value.(type) {
744 case []byte:
745 value = typed
746 case string:
747 value = util.StringToReadOnlyBytes(typed)
748 }
749 _, _ = w.Write(util.EscapeHTML(value))
750 _ = w.WriteByte('"')
751 }
752}
753
754// A Writer interface writes textual contents to a writer.
755type Writer interface {
756 // Write writes the given source to writer with resolving references and unescaping
757 // backslash escaped characters.
758 Write(writer util.BufWriter, source []byte)
759
760 // RawWrite writes the given source to writer without resolving references and
761 // unescaping backslash escaped characters.
762 RawWrite(writer util.BufWriter, source []byte)
763
764 // SecureWrite writes the given source to writer with replacing insecure characters.
765 SecureWrite(writer util.BufWriter, source []byte)
766}
767
768var replacementCharacter = []byte("\ufffd")
769
770// A WriterConfig struct has configurations for the HTML based writers.
771type WriterConfig struct {
772 // EscapedSpace is an option that indicates that a '\' escaped half-space(0x20) should not be rendered.
773 EscapedSpace bool
774}
775
776// A WriterOption interface sets options for HTML based writers.
777type WriterOption func(*WriterConfig)
778
779// WithEscapedSpace is a WriterOption indicates that a '\' escaped half-space(0x20) should not be rendered.
780func WithEscapedSpace() WriterOption {
781 return func(c *WriterConfig) {
782 c.EscapedSpace = true
783 }
784}
785
786type defaultWriter struct {
787 WriterConfig
788}
789
790// NewWriter returns a new Writer.
791func NewWriter(opts ...WriterOption) Writer {
792 w := &defaultWriter{}
793 for _, opt := range opts {
794 opt(&w.WriterConfig)
795 }
796 return w
797}
798
799func escapeRune(writer util.BufWriter, r rune) {
800 if r < 256 {
801 v := util.EscapeHTMLByte(byte(r))
802 if v != nil {
803 _, _ = writer.Write(v)
804 return
805 }
806 }
807 _, _ = writer.WriteRune(util.ToValidRune(r))
808}
809
810func (d *defaultWriter) SecureWrite(writer util.BufWriter, source []byte) {
811 n := 0
812 l := len(source)
813 for i := range l {
814 if source[i] == '\u0000' {
815 _, _ = writer.Write(source[i-n : i])
816 n = 0
817 _, _ = writer.Write(replacementCharacter)
818 continue
819 }
820 n++
821 }
822 if n != 0 {
823 _, _ = writer.Write(source[l-n:])
824 }
825}
826
827func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) {
828 n := 0
829 l := len(source)
830 for i := range l {
831 v := util.EscapeHTMLByte(source[i])
832 if v != nil {
833 _, _ = writer.Write(source[i-n : i])
834 n = 0
835 _, _ = writer.Write(v)
836 continue
837 }
838 n++
839 }
840 if n != 0 {
841 _, _ = writer.Write(source[l-n:])
842 }
843}
844
845func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
846 escaped := false
847 var ok bool
848 limit := len(source)
849 n := 0
850 for i := 0; i < limit; i++ {
851 c := source[i]
852 if escaped {
853 if util.IsPunct(c) {
854 d.RawWrite(writer, source[n:i-1])
855 n = i
856 escaped = false
857 continue
858 }
859 if d.EscapedSpace && c == ' ' {
860 d.RawWrite(writer, source[n:i-1])
861 n = i + 1
862 escaped = false
863 continue
864 }
865 }
866 if c == '\x00' {
867 d.RawWrite(writer, source[n:i])
868 d.RawWrite(writer, replacementCharacter)
869 n = i + 1
870 escaped = false
871 continue
872 }
873 if c == '&' {
874 pos := i
875 next := i + 1
876 if next < limit && source[next] == '#' {
877 nnext := next + 1
878 if nnext < limit {
879 nc := source[nnext]
880 // code point like #x22;
881 if nnext < limit && nc == 'x' || nc == 'X' {
882 start := nnext + 1
883 i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsHexDecimal)
884 if ok && i < limit && source[i] == ';' && i-start < 7 {
885 v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32)
886 d.RawWrite(writer, source[n:pos])
887 n = i + 1
888 escapeRune(writer, rune(v))
889 continue
890 }
891 // code point like #1234;
892 } else if nc >= '0' && nc <= '9' {
893 start := nnext
894 i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsNumeric)
895 if ok && i < limit && i-start < 8 && source[i] == ';' {
896 v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 10, 32)
897 d.RawWrite(writer, source[n:pos])
898 n = i + 1
899 escapeRune(writer, rune(v))
900 continue
901 }
902 }
903 }
904 } else {
905 start := next
906 i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsAlphaNumeric)
907 // entity reference
908 if ok && i < limit && source[i] == ';' {
909 name := util.BytesToReadOnlyString(source[start:i])
910 entity, ok := util.LookUpHTML5EntityByName(name)
911 if ok {
912 d.RawWrite(writer, source[n:pos])
913 n = i + 1
914 d.RawWrite(writer, entity.Characters)
915 continue
916 }
917 }
918 }
919 i = next - 1
920 }
921 if c == '\\' {
922 escaped = true
923 continue
924 }
925 escaped = false
926 }
927 d.RawWrite(writer, source[n:])
928}
929
930// DefaultWriter is a default instance of the Writer.
931var DefaultWriter = NewWriter()
932
933var bDataImage = []byte("data:image/")
934var bPng = []byte("png;")
935var bGif = []byte("gif;")
936var bJpeg = []byte("jpeg;")
937var bWebp = []byte("webp;")
938var bSvg = []byte("svg+xml;")
939var bJs = []byte("javascript:")
940var bVb = []byte("vbscript:")
941var bFile = []byte("file:")
942var bData = []byte("data:")
943
944func hasPrefix(s, prefix []byte) bool {
945 return len(s) >= len(prefix) && bytes.Equal(bytes.ToLower(s[0:len(prefix)]), bytes.ToLower(prefix))
946}
947
948// IsDangerousURL returns true if the given url seems a potentially dangerous url,
949// otherwise false.
950func IsDangerousURL(url []byte) bool {
951 if hasPrefix(url, bDataImage) && len(url) >= 11 {
952 v := url[11:]
953 if hasPrefix(v, bPng) || hasPrefix(v, bGif) ||
954 hasPrefix(v, bJpeg) || hasPrefix(v, bWebp) ||
955 hasPrefix(v, bSvg) {
956 return false
957 }
958 return true
959 }
960 return hasPrefix(url, bJs) || hasPrefix(url, bVb) ||
961 hasPrefix(url, bFile) || hasPrefix(url, bData)
962}