1// Package parser contains stuff that are related to parsing a Markdown text.
2package parser
3
4import (
5 "fmt"
6 "strings"
7 "sync"
8
9 "github.com/yuin/goldmark/ast"
10 "github.com/yuin/goldmark/text"
11 "github.com/yuin/goldmark/util"
12)
13
14// A Reference interface represents a link reference in Markdown text.
15type Reference interface {
16 // String implements Stringer.
17 String() string
18
19 // Label returns a label of the reference.
20 Label() []byte
21
22 // Destination returns a destination(URL) of the reference.
23 Destination() []byte
24
25 // Title returns a title of the reference.
26 Title() []byte
27}
28
29type reference struct {
30 label []byte
31 destination []byte
32 title []byte
33}
34
35// NewReference returns a new Reference.
36func NewReference(label, destination, title []byte) Reference {
37 return &reference{label, destination, title}
38}
39
40func newASTReference(v *ast.LinkReferenceDefinition) Reference {
41 return &astReference{v}
42}
43
44func (r *reference) Label() []byte {
45 return r.label
46}
47
48func (r *reference) Destination() []byte {
49 return r.destination
50}
51
52func (r *reference) Title() []byte {
53 return r.title
54}
55
56func (r *reference) String() string {
57 return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
58}
59
60type astReference struct {
61 v *ast.LinkReferenceDefinition
62}
63
64func (r *astReference) Label() []byte {
65 return r.v.Label
66}
67
68func (r *astReference) Destination() []byte {
69 return r.v.Destination
70}
71
72func (r *astReference) Title() []byte {
73 return r.v.Title
74}
75
76func (r *astReference) String() string {
77 return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.Label(), r.Destination(), r.Title())
78}
79
80// An IDs interface is a collection of the element ids.
81type IDs interface {
82 // Generate generates a new element id.
83 Generate(value []byte, kind ast.NodeKind) []byte
84
85 // Put puts a given element id to the used ids table.
86 Put(value []byte)
87}
88
89type ids struct {
90 values map[string]bool
91}
92
93func newIDs() IDs {
94 return &ids{
95 values: map[string]bool{},
96 }
97}
98
99func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
100 value = util.TrimLeftSpace(value)
101 value = util.TrimRightSpace(value)
102 result := []byte{}
103 for i := 0; i < len(value); {
104 v := value[i]
105 l := util.UTF8Len(v)
106 i += int(l)
107 if l != 1 {
108 continue
109 }
110 if util.IsAlphaNumeric(v) {
111 if 'A' <= v && v <= 'Z' {
112 v += 'a' - 'A'
113 }
114 result = append(result, v)
115 } else if util.IsSpace(v) || v == '-' || v == '_' {
116 result = append(result, '-')
117 }
118 }
119 if len(result) == 0 {
120 if kind == ast.KindHeading {
121 result = []byte("heading")
122 } else {
123 result = []byte("id")
124 }
125 }
126 if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
127 s.values[util.BytesToReadOnlyString(result)] = true
128 return result
129 }
130 for i := 1; ; i++ {
131 newResult := fmt.Sprintf("%s-%d", result, i)
132 if _, ok := s.values[newResult]; !ok {
133 s.values[newResult] = true
134 return []byte(newResult)
135 }
136
137 }
138}
139
140func (s *ids) Put(value []byte) {
141 s.values[util.BytesToReadOnlyString(value)] = true
142}
143
144// ContextKey is a key that is used to set arbitrary values to the context.
145type ContextKey int
146
147// ContextKeyMax is a maximum value of the ContextKey.
148var ContextKeyMax ContextKey
149
150// NewContextKey return a new ContextKey value.
151func NewContextKey() ContextKey {
152 ContextKeyMax++
153 return ContextKeyMax
154}
155
156// A Context interface holds a information that are necessary to parse
157// Markdown text.
158type Context interface {
159 // String implements Stringer.
160 String() string
161
162 // Get returns a value associated with the given key.
163 Get(ContextKey) any
164
165 // ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value.
166 ComputeIfAbsent(ContextKey, func() any) any
167
168 // Set sets the given value to the context.
169 Set(ContextKey, any)
170
171 // AddReference adds the given reference to this context.
172 AddReference(Reference)
173
174 // Reference returns (a reference, true) if a reference associated with
175 // the given label exists, otherwise (nil, false).
176 Reference(label string) (Reference, bool)
177
178 // References returns a list of references.
179 References() []Reference
180
181 // IDs returns a collection of the element ids.
182 IDs() IDs
183
184 // BlockOffset returns a first non-space character position on current line.
185 // This value is valid only for BlockParser.Open.
186 // BlockOffset returns -1 if current line is blank.
187 BlockOffset() int
188
189 // BlockOffset sets a first non-space character position on current line.
190 // This value is valid only for BlockParser.Open.
191 SetBlockOffset(int)
192
193 // BlockIndent returns an indent width on current line.
194 // This value is valid only for BlockParser.Open.
195 // BlockIndent returns -1 if current line is blank.
196 BlockIndent() int
197
198 // BlockIndent sets an indent width on current line.
199 // This value is valid only for BlockParser.Open.
200 SetBlockIndent(int)
201
202 // FirstDelimiter returns a first delimiter of the current delimiter list.
203 FirstDelimiter() *Delimiter
204
205 // LastDelimiter returns a last delimiter of the current delimiter list.
206 LastDelimiter() *Delimiter
207
208 // PushDelimiter appends the given delimiter to the tail of the current
209 // delimiter list.
210 PushDelimiter(delimiter *Delimiter)
211
212 // RemoveDelimiter removes the given delimiter from the current delimiter list.
213 RemoveDelimiter(d *Delimiter)
214
215 // ClearDelimiters clears the current delimiter list.
216 ClearDelimiters(bottom ast.Node)
217
218 // OpenedBlocks returns a list of nodes that are currently in parsing.
219 OpenedBlocks() []Block
220
221 // SetOpenedBlocks sets a list of nodes that are currently in parsing.
222 SetOpenedBlocks([]Block)
223
224 // LastOpenedBlock returns a last node that is currently in parsing.
225 LastOpenedBlock() Block
226
227 // IsInLinkLabel returns true if current position seems to be in link label.
228 IsInLinkLabel() bool
229}
230
231// A ContextConfig struct is a data structure that holds configuration of the Context.
232type ContextConfig struct {
233 IDs IDs
234}
235
236// An ContextOption is a functional option type for the Context.
237type ContextOption func(*ContextConfig)
238
239// WithIDs is a functional option for the Context.
240func WithIDs(ids IDs) ContextOption {
241 return func(c *ContextConfig) {
242 c.IDs = ids
243 }
244}
245
246type parseContext struct {
247 store []any
248 ids IDs
249 refs map[string]Reference
250 blockOffset int
251 blockIndent int
252 delimiters *Delimiter
253 lastDelimiter *Delimiter
254 openedBlocks []Block
255}
256
257// NewContext returns a new Context.
258func NewContext(options ...ContextOption) Context {
259 cfg := &ContextConfig{
260 IDs: newIDs(),
261 }
262 for _, option := range options {
263 option(cfg)
264 }
265
266 return &parseContext{
267 store: make([]any, ContextKeyMax+1),
268 refs: map[string]Reference{},
269 ids: cfg.IDs,
270 blockOffset: -1,
271 blockIndent: -1,
272 delimiters: nil,
273 lastDelimiter: nil,
274 openedBlocks: []Block{},
275 }
276}
277
278func (p *parseContext) Get(key ContextKey) any {
279 return p.store[key]
280}
281
282func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() any) any {
283 v := p.store[key]
284 if v == nil {
285 v = f()
286 p.store[key] = v
287 }
288 return v
289}
290
291func (p *parseContext) Set(key ContextKey, value any) {
292 p.store[key] = value
293}
294
295func (p *parseContext) IDs() IDs {
296 return p.ids
297}
298
299func (p *parseContext) BlockOffset() int {
300 return p.blockOffset
301}
302
303func (p *parseContext) SetBlockOffset(v int) {
304 p.blockOffset = v
305}
306
307func (p *parseContext) BlockIndent() int {
308 return p.blockIndent
309}
310
311func (p *parseContext) SetBlockIndent(v int) {
312 p.blockIndent = v
313}
314
315func (p *parseContext) LastDelimiter() *Delimiter {
316 return p.lastDelimiter
317}
318
319func (p *parseContext) FirstDelimiter() *Delimiter {
320 return p.delimiters
321}
322
323func (p *parseContext) PushDelimiter(d *Delimiter) {
324 if p.delimiters == nil {
325 p.delimiters = d
326 p.lastDelimiter = d
327 } else {
328 l := p.lastDelimiter
329 p.lastDelimiter = d
330 l.NextDelimiter = d
331 d.PreviousDelimiter = l
332 }
333}
334
335func (p *parseContext) RemoveDelimiter(d *Delimiter) {
336 if d.PreviousDelimiter == nil {
337 p.delimiters = d.NextDelimiter
338 } else {
339 d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
340 if d.NextDelimiter != nil {
341 d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
342 }
343 }
344 if d.NextDelimiter == nil {
345 p.lastDelimiter = d.PreviousDelimiter
346 }
347 if p.delimiters != nil {
348 p.delimiters.PreviousDelimiter = nil
349 }
350 if p.lastDelimiter != nil {
351 p.lastDelimiter.NextDelimiter = nil
352 }
353 d.NextDelimiter = nil
354 d.PreviousDelimiter = nil
355 if d.Length != 0 {
356 ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
357 } else {
358 d.Parent().RemoveChild(d.Parent(), d)
359 }
360}
361
362func (p *parseContext) ClearDelimiters(bottom ast.Node) {
363 if p.lastDelimiter == nil {
364 return
365 }
366 var c ast.Node
367 for c = p.lastDelimiter; c != nil && c != bottom; {
368 prev := c.PreviousSibling()
369 if d, ok := c.(*Delimiter); ok {
370 p.RemoveDelimiter(d)
371 }
372 c = prev
373 }
374}
375
376func (p *parseContext) AddReference(ref Reference) {
377 key := util.ToLinkReference(ref.Label())
378 if _, ok := p.refs[key]; !ok {
379 p.refs[key] = ref
380 }
381}
382
383func (p *parseContext) Reference(label string) (Reference, bool) {
384 v, ok := p.refs[label]
385 return v, ok
386}
387
388func (p *parseContext) References() []Reference {
389 ret := make([]Reference, 0, len(p.refs))
390 for _, v := range p.refs {
391 ret = append(ret, v)
392 }
393 return ret
394}
395
396func (p *parseContext) String() string {
397 refs := []string{}
398 for _, r := range p.refs {
399 refs = append(refs, r.String())
400 }
401
402 return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
403}
404
405func (p *parseContext) OpenedBlocks() []Block {
406 return p.openedBlocks
407}
408
409func (p *parseContext) SetOpenedBlocks(v []Block) {
410 p.openedBlocks = v
411}
412
413func (p *parseContext) LastOpenedBlock() Block {
414 if l := len(p.openedBlocks); l != 0 {
415 return p.openedBlocks[l-1]
416 }
417 return Block{}
418}
419
420func (p *parseContext) IsInLinkLabel() bool {
421 tlist := p.Get(linkLabelStateKey)
422 return tlist != nil
423}
424
425// State represents parser's state.
426// State is designed to use as a bit flag.
427type State int
428
429const (
430 // None is a default value of the [State].
431 None State = 1 << iota
432
433 // Continue indicates parser can continue parsing.
434 Continue
435
436 // Close indicates parser cannot parse anymore.
437 Close
438
439 // HasChildren indicates parser may have child blocks.
440 HasChildren
441
442 // NoChildren indicates parser does not have child blocks.
443 NoChildren
444
445 // RequireParagraph indicates parser requires that the last node
446 // must be a paragraph and is not converted to other nodes by
447 // ParagraphTransformers.
448 RequireParagraph
449)
450
451// A Config struct is a data structure that holds configuration of the Parser.
452type Config struct {
453 Options map[OptionName]any
454 BlockParsers util.PrioritizedSlice /*<BlockParser>*/
455 InlineParsers util.PrioritizedSlice /*<InlineParser>*/
456 ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
457 ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/
458 EscapedSpace bool
459}
460
461// NewConfig returns a new Config.
462func NewConfig() *Config {
463 return &Config{
464 Options: map[OptionName]any{},
465 BlockParsers: util.PrioritizedSlice{},
466 InlineParsers: util.PrioritizedSlice{},
467 ParagraphTransformers: util.PrioritizedSlice{},
468 ASTTransformers: util.PrioritizedSlice{},
469 }
470}
471
472// An Option interface is a functional option type for the Parser.
473type Option interface {
474 SetParserOption(*Config)
475}
476
477// OptionName is a name of parser options.
478type OptionName string
479
480// Attribute is an option name that spacify attributes of elements.
481const optAttribute OptionName = "Attribute"
482
483type withAttribute struct {
484}
485
486func (o *withAttribute) SetParserOption(c *Config) {
487 c.Options[optAttribute] = true
488}
489
490// WithAttribute is a functional option that enables custom attributes.
491func WithAttribute() Option {
492 return &withAttribute{}
493}
494
495// A Parser interface parses Markdown text into AST nodes.
496type Parser interface {
497 // Parse parses the given Markdown text into AST nodes.
498 Parse(reader text.Reader, opts ...ParseOption) ast.Node
499
500 // AddOption adds the given option to this parser.
501 AddOptions(...Option)
502}
503
504// A SetOptioner interface sets the given option to the object.
505type SetOptioner interface {
506 // SetOption sets the given option to the object.
507 // Unacceptable options may be passed.
508 // Thus implementations must ignore unacceptable options.
509 SetOption(name OptionName, value any)
510}
511
512// A BlockParser interface parses a block level element like Paragraph, List,
513// Blockquote etc.
514type BlockParser interface {
515 // Trigger returns a list of characters that triggers Parse method of
516 // this parser.
517 // If Trigger returns a nil, Open will be called with any lines.
518 Trigger() []byte
519
520 // Open parses the current line and returns a result of parsing.
521 //
522 // Open must not parse beyond the current line.
523 // If Open has been able to parse the current line, Open must advance a reader
524 // position by consumed byte length.
525 //
526 // If Open has not been able to parse the current line, Open should returns
527 // (nil, NoChildren). If Open has been able to parse the current line, Open
528 // should returns a new Block node and returns HasChildren or NoChildren.
529 Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
530
531 // Continue parses the current line and returns a result of parsing.
532 //
533 // Continue must not parse beyond the current line.
534 // If Continue has been able to parse the current line, Continue must advance
535 // a reader position by consumed byte length.
536 //
537 // If Continue has not been able to parse the current line, Continue should
538 // returns Close. If Continue has been able to parse the current line,
539 // Continue should returns (Continue | NoChildren) or
540 // (Continue | HasChildren)
541 Continue(node ast.Node, reader text.Reader, pc Context) State
542
543 // Close will be called when the parser returns Close.
544 Close(node ast.Node, reader text.Reader, pc Context)
545
546 // CanInterruptParagraph returns true if the parser can interrupt paragraphs,
547 // otherwise false.
548 CanInterruptParagraph() bool
549
550 // CanAcceptIndentedLine returns true if the parser can open new node when
551 // the given line is being indented more than 3 spaces.
552 CanAcceptIndentedLine() bool
553}
554
555// An InlineParser interface parses an inline level element like CodeSpan, Link etc.
556type InlineParser interface {
557 // Trigger returns a list of characters that triggers Parse method of
558 // this parser.
559 // Trigger characters must be a punctuation or a halfspace.
560 // Halfspaces triggers this parser when character is any spaces characters or
561 // a head of line
562 Trigger() []byte
563
564 // Parse parse the given block into an inline node.
565 //
566 // Parse can parse beyond the current line.
567 // If Parse has been able to parse the current line, it must advance a reader
568 // position by consumed byte length.
569 Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
570}
571
572// A CloseBlocker interface is a callback function that will be
573// called when block is closed in the inline parsing.
574type CloseBlocker interface {
575 // CloseBlock will be called when a block is closed.
576 CloseBlock(parent ast.Node, block text.Reader, pc Context)
577}
578
579// A ParagraphTransformer transforms parsed Paragraph nodes.
580// For example, link references are searched in parsed Paragraphs.
581type ParagraphTransformer interface {
582 // Transform transforms the given paragraph.
583 Transform(node *ast.Paragraph, reader text.Reader, pc Context)
584}
585
586// ASTTransformer transforms entire Markdown document AST tree.
587type ASTTransformer interface {
588 // Transform transforms the given AST tree.
589 Transform(node *ast.Document, reader text.Reader, pc Context)
590}
591
592// DefaultBlockParsers returns a new list of default BlockParsers.
593// Priorities of default BlockParsers are:
594//
595// SetextHeadingParser, 100
596// ThematicBreakParser, 200
597// ListParser, 300
598// ListItemParser, 400
599// CodeBlockParser, 500
600// ATXHeadingParser, 600
601// FencedCodeBlockParser, 700
602// BlockquoteParser, 800
603// HTMLBlockParser, 900
604// ParagraphParser, 1000
605func DefaultBlockParsers() []util.PrioritizedValue {
606 return []util.PrioritizedValue{
607 util.Prioritized(NewSetextHeadingParser(), 100),
608 util.Prioritized(NewThematicBreakParser(), 200),
609 util.Prioritized(NewListParser(), 300),
610 util.Prioritized(NewListItemParser(), 400),
611 util.Prioritized(NewCodeBlockParser(), 500),
612 util.Prioritized(NewATXHeadingParser(), 600),
613 util.Prioritized(NewFencedCodeBlockParser(), 700),
614 util.Prioritized(NewBlockquoteParser(), 800),
615 util.Prioritized(NewHTMLBlockParser(), 900),
616 util.Prioritized(NewParagraphParser(), 1000),
617 }
618}
619
620// DefaultInlineParsers returns a new list of default InlineParsers.
621// Priorities of default InlineParsers are:
622//
623// CodeSpanParser, 100
624// LinkParser, 200
625// AutoLinkParser, 300
626// RawHTMLParser, 400
627// EmphasisParser, 500
628func DefaultInlineParsers() []util.PrioritizedValue {
629 return []util.PrioritizedValue{
630 util.Prioritized(NewCodeSpanParser(), 100),
631 util.Prioritized(NewLinkParser(), 200),
632 util.Prioritized(NewAutoLinkParser(), 300),
633 util.Prioritized(NewRawHTMLParser(), 400),
634 util.Prioritized(NewEmphasisParser(), 500),
635 }
636}
637
638// DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
639// Priorities of default ParagraphTransformers are:
640//
641// LinkReferenceParagraphTransformer, 100
642func DefaultParagraphTransformers() []util.PrioritizedValue {
643 return []util.PrioritizedValue{
644 util.Prioritized(LinkReferenceParagraphTransformer, 100),
645 }
646}
647
648// A Block struct holds a node and correspond parser pair.
649type Block struct {
650 // Node is a BlockNode.
651 Node ast.Node
652 // Parser is a BlockParser.
653 Parser BlockParser
654}
655
656type parser struct {
657 options map[OptionName]any
658 blockParsers [256][]BlockParser
659 freeBlockParsers []BlockParser
660 inlineParsers [256][]InlineParser
661 closeBlockers []CloseBlocker
662 paragraphTransformers []ParagraphTransformer
663 astTransformers []ASTTransformer
664 escapedSpace bool
665 config *Config
666 initSync sync.Once
667}
668
669type withBlockParsers struct {
670 value []util.PrioritizedValue
671}
672
673func (o *withBlockParsers) SetParserOption(c *Config) {
674 c.BlockParsers = append(c.BlockParsers, o.value...)
675}
676
677// WithBlockParsers is a functional option that allow you to add
678// BlockParsers to the parser.
679func WithBlockParsers(bs ...util.PrioritizedValue) Option {
680 return &withBlockParsers{bs}
681}
682
683type withInlineParsers struct {
684 value []util.PrioritizedValue
685}
686
687func (o *withInlineParsers) SetParserOption(c *Config) {
688 c.InlineParsers = append(c.InlineParsers, o.value...)
689}
690
691// WithInlineParsers is a functional option that allow you to add
692// InlineParsers to the parser.
693func WithInlineParsers(bs ...util.PrioritizedValue) Option {
694 return &withInlineParsers{bs}
695}
696
697type withParagraphTransformers struct {
698 value []util.PrioritizedValue
699}
700
701func (o *withParagraphTransformers) SetParserOption(c *Config) {
702 c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
703}
704
705// WithParagraphTransformers is a functional option that allow you to add
706// ParagraphTransformers to the parser.
707func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
708 return &withParagraphTransformers{ps}
709}
710
711type withASTTransformers struct {
712 value []util.PrioritizedValue
713}
714
715func (o *withASTTransformers) SetParserOption(c *Config) {
716 c.ASTTransformers = append(c.ASTTransformers, o.value...)
717}
718
719// WithASTTransformers is a functional option that allow you to add
720// ASTTransformers to the parser.
721func WithASTTransformers(ps ...util.PrioritizedValue) Option {
722 return &withASTTransformers{ps}
723}
724
725type withEscapedSpace struct {
726}
727
728func (o *withEscapedSpace) SetParserOption(c *Config) {
729 c.EscapedSpace = true
730}
731
732// WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
733func WithEscapedSpace() Option {
734 return &withEscapedSpace{}
735}
736
737type withOption struct {
738 name OptionName
739 value any
740}
741
742func (o *withOption) SetParserOption(c *Config) {
743 c.Options[o.name] = o.value
744}
745
746// WithOption is a functional option that allow you to set
747// an arbitrary option to the parser.
748func WithOption(name OptionName, value any) Option {
749 return &withOption{name, value}
750}
751
752// NewParser returns a new Parser with given options.
753func NewParser(options ...Option) Parser {
754 config := NewConfig()
755 for _, opt := range options {
756 opt.SetParserOption(config)
757 }
758
759 p := &parser{
760 options: map[OptionName]any{},
761 config: config,
762 }
763
764 return p
765}
766
767func (p *parser) AddOptions(opts ...Option) {
768 for _, opt := range opts {
769 opt.SetParserOption(p.config)
770 }
771}
772
773func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]any) {
774 bp, ok := v.Value.(BlockParser)
775 if !ok {
776 panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
777 }
778 tcs := bp.Trigger()
779 so, ok := v.Value.(SetOptioner)
780 if ok {
781 for oname, ovalue := range options {
782 so.SetOption(oname, ovalue)
783 }
784 }
785 if tcs == nil {
786 p.freeBlockParsers = append(p.freeBlockParsers, bp)
787 } else {
788 for _, tc := range tcs {
789 if p.blockParsers[tc] == nil {
790 p.blockParsers[tc] = []BlockParser{}
791 }
792 p.blockParsers[tc] = append(p.blockParsers[tc], bp)
793 }
794 }
795}
796
797func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]any) {
798 ip, ok := v.Value.(InlineParser)
799 if !ok {
800 panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
801 }
802 tcs := ip.Trigger()
803 so, ok := v.Value.(SetOptioner)
804 if ok {
805 for oname, ovalue := range options {
806 so.SetOption(oname, ovalue)
807 }
808 }
809 if cb, ok := ip.(CloseBlocker); ok {
810 p.closeBlockers = append(p.closeBlockers, cb)
811 }
812 for _, tc := range tcs {
813 if p.inlineParsers[tc] == nil {
814 p.inlineParsers[tc] = []InlineParser{}
815 }
816 p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
817 }
818}
819
820func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]any) {
821 pt, ok := v.Value.(ParagraphTransformer)
822 if !ok {
823 panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
824 }
825 so, ok := v.Value.(SetOptioner)
826 if ok {
827 for oname, ovalue := range options {
828 so.SetOption(oname, ovalue)
829 }
830 }
831 p.paragraphTransformers = append(p.paragraphTransformers, pt)
832}
833
834func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]any) {
835 at, ok := v.Value.(ASTTransformer)
836 if !ok {
837 panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
838 }
839 so, ok := v.Value.(SetOptioner)
840 if ok {
841 for oname, ovalue := range options {
842 so.SetOption(oname, ovalue)
843 }
844 }
845 p.astTransformers = append(p.astTransformers, at)
846}
847
848// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
849type ParseConfig struct {
850 Context Context
851}
852
853// A ParseOption is a functional option type for the Parser.Parse.
854type ParseOption func(c *ParseConfig)
855
856// WithContext is a functional option that allow you to override
857// a default context.
858func WithContext(context Context) ParseOption {
859 return func(c *ParseConfig) {
860 c.Context = context
861 }
862}
863
864func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
865 p.initSync.Do(func() {
866 p.config.BlockParsers.Sort()
867 for _, v := range p.config.BlockParsers {
868 p.addBlockParser(v, p.config.Options)
869 }
870 for i := range p.blockParsers {
871 if p.blockParsers[i] != nil {
872 p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
873 }
874 }
875
876 p.config.InlineParsers.Sort()
877 for _, v := range p.config.InlineParsers {
878 p.addInlineParser(v, p.config.Options)
879 }
880 p.config.ParagraphTransformers.Sort()
881 for _, v := range p.config.ParagraphTransformers {
882 p.addParagraphTransformer(v, p.config.Options)
883 }
884 p.config.ASTTransformers.Sort()
885 for _, v := range p.config.ASTTransformers {
886 p.addASTTransformer(v, p.config.Options)
887 }
888 p.escapedSpace = p.config.EscapedSpace
889 p.config = nil
890 })
891 c := &ParseConfig{}
892 for _, opt := range opts {
893 opt(c)
894 }
895 if c.Context == nil {
896 c.Context = NewContext()
897 }
898 pc := c.Context
899 root := ast.NewDocument()
900 p.parseBlocks(root, reader, pc)
901
902 blockReader := text.NewBlockReader(reader.Source(), nil)
903 p.walkBlock(root, func(node ast.Node) {
904 p.parseBlock(blockReader, node, pc)
905 })
906 for _, at := range p.astTransformers {
907 at.Transform(root, reader, pc)
908 }
909
910 // root.Dump(reader.Source(), 0)
911 return root
912}
913
914func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
915 for _, pt := range p.paragraphTransformers {
916 pt.Transform(node, reader, pc)
917 if node.Parent() == nil {
918 return true
919 }
920 }
921 return false
922}
923
924func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
925 blocks := pc.OpenedBlocks()
926 for i := from; i >= to; i-- {
927 node := blocks[i].Node
928 paragraph, ok := node.(*ast.Paragraph)
929 if ok && node.Parent() != nil {
930 p.transformParagraph(paragraph, reader, pc)
931 }
932 if node.Parent() != nil { // closes only if node has not been transformed
933 blocks[i].Parser.Close(blocks[i].Node, reader, pc)
934 }
935 }
936 if from == len(blocks)-1 {
937 blocks = blocks[0:to]
938 } else {
939 blocks = append(blocks[0:to], blocks[from+1:]...)
940 }
941 pc.SetOpenedBlocks(blocks)
942}
943
944type blockOpenResult int
945
946const (
947 paragraphContinuation blockOpenResult = iota + 1
948 newBlocksOpened
949 noBlocksOpened
950)
951
952func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
953 result := blockOpenResult(noBlocksOpened)
954 continuable := false
955 lastBlock := pc.LastOpenedBlock()
956 if lastBlock.Node != nil {
957 continuable = ast.IsParagraph(lastBlock.Node)
958 }
959retry:
960 var bps []BlockParser
961 line, _ := reader.PeekLine()
962 w, pos := util.IndentWidth(line, reader.LineOffset())
963 if w >= len(line) {
964 pc.SetBlockOffset(-1)
965 pc.SetBlockIndent(-1)
966 } else {
967 pc.SetBlockOffset(pos)
968 pc.SetBlockIndent(w)
969 }
970 if line == nil || line[0] == '\n' {
971 goto continuable
972 }
973 bps = p.freeBlockParsers
974 if pos < len(line) {
975 bps = p.blockParsers[line[pos]]
976 if bps == nil {
977 bps = p.freeBlockParsers
978 }
979 }
980 if bps == nil {
981 goto continuable
982 }
983
984 for _, bp := range bps {
985 if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
986 continue
987 }
988
989 if w > 3 && !bp.CanAcceptIndentedLine() {
990 continue
991 }
992 lastBlock = pc.LastOpenedBlock()
993 last := lastBlock.Node
994 _, blockPos := reader.Position()
995 node, state := bp.Open(parent, reader, pc)
996 if node != nil {
997 node.SetPos(blockPos.Start + max(pc.BlockOffset(), 0))
998
999 // Parser requires last node to be a paragraph.
1000 // With table extension:
1001 //
1002 // 0
1003 // -:
1004 // -
1005 //
1006 // '-' on 3rd line seems a Setext heading because 1st and 2nd lines
1007 // are being paragraph when the Settext heading parser tries to parse the 3rd
1008 // line.
1009 // But 1st line and 2nd line are a table. Thus this paragraph will be transformed
1010 // by a paragraph transformer. So this text should be converted to a table and
1011 // an empty list.
1012 if state&RequireParagraph != 0 {
1013 if last == parent.LastChild() {
1014 // Opened paragraph may be transformed by ParagraphTransformers in
1015 // closeBlocks().
1016 lastBlock.Parser.Close(last, reader, pc)
1017 blocks := pc.OpenedBlocks()
1018 pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
1019 if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
1020 // Paragraph has been transformed.
1021 // So this parser is considered as failing.
1022 continuable = false
1023 goto retry
1024 }
1025 }
1026 }
1027 node.SetBlankPreviousLines(blankLine)
1028 if last != nil && last.Parent() == nil {
1029 lastPos := len(pc.OpenedBlocks()) - 1
1030 p.closeBlocks(lastPos, lastPos, reader, pc)
1031 }
1032 parent.AppendChild(parent, node)
1033 result = newBlocksOpened
1034 be := Block{node, bp}
1035 pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
1036 if state&HasChildren != 0 {
1037 parent = node
1038 goto retry // try child block
1039 }
1040 break // no children, can not open more blocks on this line
1041 }
1042 }
1043
1044continuable:
1045 if result == noBlocksOpened && continuable {
1046 state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
1047 if state&Continue != 0 {
1048 result = paragraphContinuation
1049 }
1050 }
1051 return result
1052}
1053
1054type lineStat struct {
1055 lineNum int
1056 level int
1057 isBlank bool
1058}
1059
1060func isBlankLine(lineNum, level int, stats []lineStat) bool {
1061 l := len(stats)
1062 if l == 0 {
1063 return true
1064 }
1065 for i := l - 1 - level; i >= 0; i-- {
1066 s := stats[i]
1067 if s.lineNum == lineNum && s.level <= level {
1068 return s.isBlank
1069 } else if s.lineNum < lineNum {
1070 break
1071 }
1072 }
1073 return false
1074}
1075
1076func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
1077 pc.SetOpenedBlocks(nil)
1078 blankLines := make([]lineStat, 0, 128)
1079 for { // process blocks separated by blank lines
1080 _, _, ok := reader.SkipBlankLines()
1081 if !ok {
1082 return
1083 }
1084 // first, we try to open blocks
1085 if p.openBlocks(parent, true, reader, pc) != newBlocksOpened {
1086 return
1087 }
1088 reader.AdvanceLine()
1089 blankLines = blankLines[0:0]
1090 for { // process opened blocks line by line
1091 openedBlocks := pc.OpenedBlocks()
1092 l := len(openedBlocks)
1093 if l == 0 {
1094 break
1095 }
1096 lastIndex := l - 1
1097 for i := range l {
1098 be := openedBlocks[i]
1099 line, _ := reader.PeekLine()
1100 if line == nil {
1101 p.closeBlocks(lastIndex, 0, reader, pc)
1102 reader.AdvanceLine()
1103 return
1104 }
1105 lineNum, _ := reader.Position()
1106 blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
1107 // If node is a paragraph, p.openBlocks determines whether it is continuable.
1108 // So we do not process paragraphs here.
1109 if !ast.IsParagraph(be.Node) {
1110 state := be.Parser.Continue(be.Node, reader, pc)
1111 if state&Continue != 0 {
1112 // When current node is a container block and has no children,
1113 // we try to open new child nodes
1114 if state&HasChildren != 0 && i == lastIndex {
1115 isBlank := isBlankLine(lineNum-1, i+1, blankLines)
1116 p.openBlocks(be.Node, isBlank, reader, pc)
1117 break
1118 }
1119 continue
1120 }
1121 }
1122 // current node may be closed or lazy continuation
1123 isBlank := isBlankLine(lineNum-1, i, blankLines)
1124 thisParent := parent
1125 if i != 0 {
1126 thisParent = openedBlocks[i-1].Node
1127 }
1128 lastNode := openedBlocks[lastIndex].Node
1129 result := p.openBlocks(thisParent, isBlank, reader, pc)
1130 if result != paragraphContinuation {
1131 // lastNode is a paragraph and was transformed by the paragraph
1132 // transformers.
1133 if openedBlocks[lastIndex].Node != lastNode {
1134 lastIndex--
1135 }
1136 p.closeBlocks(lastIndex, i, reader, pc)
1137 }
1138 break
1139 }
1140
1141 reader.AdvanceLine()
1142 }
1143 }
1144}
1145
1146func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
1147 for c := block.FirstChild(); c != nil; c = c.NextSibling() {
1148 p.walkBlock(c, cb)
1149 }
1150 cb(block)
1151}
1152
1153const (
1154 lineBreakHard uint8 = 1 << iota
1155 lineBreakSoft
1156 lineBreakVisible
1157)
1158
1159func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
1160 if parent.IsRaw() {
1161 return
1162 }
1163 escaped := false
1164 source := block.Source()
1165 block.Reset(parent.Lines())
1166 for {
1167 retry:
1168 line, _ := block.PeekLine()
1169 if line == nil {
1170 break
1171 }
1172 lineLength := len(line)
1173 var lineBreakFlags uint8
1174 hasNewLine := line[lineLength-1] == '\n'
1175 if ((lineLength >= 3 && line[lineLength-2] == '\\' &&
1176 line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
1177 lineLength -= 2
1178 lineBreakFlags |= lineBreakHard | lineBreakVisible
1179 } else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' &&
1180 line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) &&
1181 hasNewLine { // ends with \\r\n
1182 lineLength -= 3
1183 lineBreakFlags |= lineBreakHard | lineBreakVisible
1184 } else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' &&
1185 hasNewLine { // ends with [space][space]\n
1186 lineLength -= 3
1187 lineBreakFlags |= lineBreakHard
1188 } else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' &&
1189 line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
1190 lineLength -= 4
1191 lineBreakFlags |= lineBreakHard
1192 } else if hasNewLine {
1193 // If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
1194 // If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
1195 // See https://spec.commonmark.org/0.30/#soft-line-breaks
1196 lineBreakFlags |= lineBreakSoft
1197 }
1198
1199 l, startPosition := block.Position()
1200 n := 0
1201 for i := range lineLength {
1202 c := line[i]
1203 if c == '\n' {
1204 break
1205 }
1206 isSpace := util.IsSpace(c) && c != '\r' && c != '\n'
1207 isPunct := util.IsPunct(c)
1208 if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 {
1209 parserChar := c
1210 if isSpace || (i == 0 && !isPunct) {
1211 parserChar = ' '
1212 }
1213 ips := p.inlineParsers[parserChar]
1214 if ips != nil {
1215 block.Advance(n)
1216 n = 0
1217 savedLine, savedPosition := block.Position()
1218 if i != 0 {
1219 _, currentPosition := block.Position()
1220 ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
1221 _, startPosition = block.Position()
1222 }
1223 var inlineNode ast.Node
1224 for _, ip := range ips {
1225 inlineNode = ip.Parse(parent, block, pc)
1226 if inlineNode != nil {
1227 if inlineNode.Pos() < 0 {
1228 inlineNode.(interface{ SetPos(int) }).SetPos(startPosition.Start)
1229 }
1230 break
1231 }
1232 block.SetPosition(savedLine, savedPosition)
1233 }
1234 if inlineNode != nil {
1235 parent.AppendChild(parent, inlineNode)
1236 goto retry
1237 }
1238 }
1239 }
1240 if escaped {
1241 escaped = false
1242 n++
1243 continue
1244 }
1245
1246 if c == '\\' {
1247 escaped = true
1248 n++
1249 continue
1250 }
1251
1252 escaped = false
1253 n++
1254 }
1255 if n != 0 {
1256 block.Advance(n)
1257 }
1258 currentL, currentPosition := block.Position()
1259 if l != currentL {
1260 continue
1261 }
1262 diff := startPosition.Between(currentPosition)
1263 var text *ast.Text
1264 if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible {
1265 text = ast.NewTextSegment(diff)
1266 } else {
1267 text = ast.NewTextSegment(diff.TrimRightSpace(source))
1268 }
1269 text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
1270 text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
1271 parent.AppendChild(parent, text)
1272 block.AdvanceLine()
1273 }
1274
1275 ProcessDelimiters(nil, pc)
1276 for _, ip := range p.closeBlockers {
1277 ip.CloseBlock(parent, block, pc)
1278 }
1279
1280}