1// Package parser contains stuff that are related to parsing a Markdown text.
2package parser
3
4import (
5 "fmt"
6 "strings"
7 "sync"
8
9 "github.com/yuin/goldmark/ast"
10 "github.com/yuin/goldmark/text"
11 "github.com/yuin/goldmark/util"
12)
13
14// A Reference interface represents a link reference in Markdown text.
15type Reference interface {
16 // String implements Stringer.
17 String() string
18
19 // Label returns a label of the reference.
20 Label() []byte
21
22 // Destination returns a destination(URL) of the reference.
23 Destination() []byte
24
25 // Title returns a title of the reference.
26 Title() []byte
27}
28
29type reference struct {
30 label []byte
31 destination []byte
32 title []byte
33}
34
35// NewReference returns a new Reference.
36func NewReference(label, destination, title []byte) Reference {
37 return &reference{label, destination, title}
38}
39
40func (r *reference) Label() []byte {
41 return r.label
42}
43
44func (r *reference) Destination() []byte {
45 return r.destination
46}
47
48func (r *reference) Title() []byte {
49 return r.title
50}
51
52func (r *reference) String() string {
53 return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
54}
55
56// An IDs interface is a collection of the element ids.
57type IDs interface {
58 // Generate generates a new element id.
59 Generate(value []byte, kind ast.NodeKind) []byte
60
61 // Put puts a given element id to the used ids table.
62 Put(value []byte)
63}
64
65type ids struct {
66 values map[string]bool
67}
68
69func newIDs() IDs {
70 return &ids{
71 values: map[string]bool{},
72 }
73}
74
75func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
76 value = util.TrimLeftSpace(value)
77 value = util.TrimRightSpace(value)
78 result := []byte{}
79 for i := 0; i < len(value); {
80 v := value[i]
81 l := util.UTF8Len(v)
82 i += int(l)
83 if l != 1 {
84 continue
85 }
86 if util.IsAlphaNumeric(v) {
87 if 'A' <= v && v <= 'Z' {
88 v += 'a' - 'A'
89 }
90 result = append(result, v)
91 } else if util.IsSpace(v) || v == '-' || v == '_' {
92 result = append(result, '-')
93 }
94 }
95 if len(result) == 0 {
96 if kind == ast.KindHeading {
97 result = []byte("heading")
98 } else {
99 result = []byte("id")
100 }
101 }
102 if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
103 s.values[util.BytesToReadOnlyString(result)] = true
104 return result
105 }
106 for i := 1; ; i++ {
107 newResult := fmt.Sprintf("%s-%d", result, i)
108 if _, ok := s.values[newResult]; !ok {
109 s.values[newResult] = true
110 return []byte(newResult)
111 }
112
113 }
114}
115
116func (s *ids) Put(value []byte) {
117 s.values[util.BytesToReadOnlyString(value)] = true
118}
119
120// ContextKey is a key that is used to set arbitrary values to the context.
121type ContextKey int
122
123// ContextKeyMax is a maximum value of the ContextKey.
124var ContextKeyMax ContextKey
125
126// NewContextKey return a new ContextKey value.
127func NewContextKey() ContextKey {
128 ContextKeyMax++
129 return ContextKeyMax
130}
131
132// A Context interface holds a information that are necessary to parse
133// Markdown text.
134type Context interface {
135 // String implements Stringer.
136 String() string
137
138 // Get returns a value associated with the given key.
139 Get(ContextKey) interface{}
140
141 // ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value.
142 ComputeIfAbsent(ContextKey, func() interface{}) interface{}
143
144 // Set sets the given value to the context.
145 Set(ContextKey, interface{})
146
147 // AddReference adds the given reference to this context.
148 AddReference(Reference)
149
150 // Reference returns (a reference, true) if a reference associated with
151 // the given label exists, otherwise (nil, false).
152 Reference(label string) (Reference, bool)
153
154 // References returns a list of references.
155 References() []Reference
156
157 // IDs returns a collection of the element ids.
158 IDs() IDs
159
160 // BlockOffset returns a first non-space character position on current line.
161 // This value is valid only for BlockParser.Open.
162 // BlockOffset returns -1 if current line is blank.
163 BlockOffset() int
164
165 // BlockOffset sets a first non-space character position on current line.
166 // This value is valid only for BlockParser.Open.
167 SetBlockOffset(int)
168
169 // BlockIndent returns an indent width on current line.
170 // This value is valid only for BlockParser.Open.
171 // BlockIndent returns -1 if current line is blank.
172 BlockIndent() int
173
174 // BlockIndent sets an indent width on current line.
175 // This value is valid only for BlockParser.Open.
176 SetBlockIndent(int)
177
178 // FirstDelimiter returns a first delimiter of the current delimiter list.
179 FirstDelimiter() *Delimiter
180
181 // LastDelimiter returns a last delimiter of the current delimiter list.
182 LastDelimiter() *Delimiter
183
184 // PushDelimiter appends the given delimiter to the tail of the current
185 // delimiter list.
186 PushDelimiter(delimiter *Delimiter)
187
188 // RemoveDelimiter removes the given delimiter from the current delimiter list.
189 RemoveDelimiter(d *Delimiter)
190
191 // ClearDelimiters clears the current delimiter list.
192 ClearDelimiters(bottom ast.Node)
193
194 // OpenedBlocks returns a list of nodes that are currently in parsing.
195 OpenedBlocks() []Block
196
197 // SetOpenedBlocks sets a list of nodes that are currently in parsing.
198 SetOpenedBlocks([]Block)
199
200 // LastOpenedBlock returns a last node that is currently in parsing.
201 LastOpenedBlock() Block
202
203 // IsInLinkLabel returns true if current position seems to be in link label.
204 IsInLinkLabel() bool
205}
206
207// A ContextConfig struct is a data structure that holds configuration of the Context.
208type ContextConfig struct {
209 IDs IDs
210}
211
212// An ContextOption is a functional option type for the Context.
213type ContextOption func(*ContextConfig)
214
215// WithIDs is a functional option for the Context.
216func WithIDs(ids IDs) ContextOption {
217 return func(c *ContextConfig) {
218 c.IDs = ids
219 }
220}
221
222type parseContext struct {
223 store []interface{}
224 ids IDs
225 refs map[string]Reference
226 blockOffset int
227 blockIndent int
228 delimiters *Delimiter
229 lastDelimiter *Delimiter
230 openedBlocks []Block
231}
232
233// NewContext returns a new Context.
234func NewContext(options ...ContextOption) Context {
235 cfg := &ContextConfig{
236 IDs: newIDs(),
237 }
238 for _, option := range options {
239 option(cfg)
240 }
241
242 return &parseContext{
243 store: make([]interface{}, ContextKeyMax+1),
244 refs: map[string]Reference{},
245 ids: cfg.IDs,
246 blockOffset: -1,
247 blockIndent: -1,
248 delimiters: nil,
249 lastDelimiter: nil,
250 openedBlocks: []Block{},
251 }
252}
253
254func (p *parseContext) Get(key ContextKey) interface{} {
255 return p.store[key]
256}
257
258func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() interface{}) interface{} {
259 v := p.store[key]
260 if v == nil {
261 v = f()
262 p.store[key] = v
263 }
264 return v
265}
266
267func (p *parseContext) Set(key ContextKey, value interface{}) {
268 p.store[key] = value
269}
270
271func (p *parseContext) IDs() IDs {
272 return p.ids
273}
274
275func (p *parseContext) BlockOffset() int {
276 return p.blockOffset
277}
278
279func (p *parseContext) SetBlockOffset(v int) {
280 p.blockOffset = v
281}
282
283func (p *parseContext) BlockIndent() int {
284 return p.blockIndent
285}
286
287func (p *parseContext) SetBlockIndent(v int) {
288 p.blockIndent = v
289}
290
291func (p *parseContext) LastDelimiter() *Delimiter {
292 return p.lastDelimiter
293}
294
295func (p *parseContext) FirstDelimiter() *Delimiter {
296 return p.delimiters
297}
298
299func (p *parseContext) PushDelimiter(d *Delimiter) {
300 if p.delimiters == nil {
301 p.delimiters = d
302 p.lastDelimiter = d
303 } else {
304 l := p.lastDelimiter
305 p.lastDelimiter = d
306 l.NextDelimiter = d
307 d.PreviousDelimiter = l
308 }
309}
310
311func (p *parseContext) RemoveDelimiter(d *Delimiter) {
312 if d.PreviousDelimiter == nil {
313 p.delimiters = d.NextDelimiter
314 } else {
315 d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
316 if d.NextDelimiter != nil {
317 d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
318 }
319 }
320 if d.NextDelimiter == nil {
321 p.lastDelimiter = d.PreviousDelimiter
322 }
323 if p.delimiters != nil {
324 p.delimiters.PreviousDelimiter = nil
325 }
326 if p.lastDelimiter != nil {
327 p.lastDelimiter.NextDelimiter = nil
328 }
329 d.NextDelimiter = nil
330 d.PreviousDelimiter = nil
331 if d.Length != 0 {
332 ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
333 } else {
334 d.Parent().RemoveChild(d.Parent(), d)
335 }
336}
337
338func (p *parseContext) ClearDelimiters(bottom ast.Node) {
339 if p.lastDelimiter == nil {
340 return
341 }
342 var c ast.Node
343 for c = p.lastDelimiter; c != nil && c != bottom; {
344 prev := c.PreviousSibling()
345 if d, ok := c.(*Delimiter); ok {
346 p.RemoveDelimiter(d)
347 }
348 c = prev
349 }
350}
351
352func (p *parseContext) AddReference(ref Reference) {
353 key := util.ToLinkReference(ref.Label())
354 if _, ok := p.refs[key]; !ok {
355 p.refs[key] = ref
356 }
357}
358
359func (p *parseContext) Reference(label string) (Reference, bool) {
360 v, ok := p.refs[label]
361 return v, ok
362}
363
364func (p *parseContext) References() []Reference {
365 ret := make([]Reference, 0, len(p.refs))
366 for _, v := range p.refs {
367 ret = append(ret, v)
368 }
369 return ret
370}
371
372func (p *parseContext) String() string {
373 refs := []string{}
374 for _, r := range p.refs {
375 refs = append(refs, r.String())
376 }
377
378 return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
379}
380
381func (p *parseContext) OpenedBlocks() []Block {
382 return p.openedBlocks
383}
384
385func (p *parseContext) SetOpenedBlocks(v []Block) {
386 p.openedBlocks = v
387}
388
389func (p *parseContext) LastOpenedBlock() Block {
390 if l := len(p.openedBlocks); l != 0 {
391 return p.openedBlocks[l-1]
392 }
393 return Block{}
394}
395
396func (p *parseContext) IsInLinkLabel() bool {
397 tlist := p.Get(linkLabelStateKey)
398 return tlist != nil
399}
400
401// State represents parser's state.
402// State is designed to use as a bit flag.
403type State int
404
405const (
406 none State = 1 << iota
407
408 // Continue indicates parser can continue parsing.
409 Continue
410
411 // Close indicates parser cannot parse anymore.
412 Close
413
414 // HasChildren indicates parser may have child blocks.
415 HasChildren
416
417 // NoChildren indicates parser does not have child blocks.
418 NoChildren
419
420 // RequireParagraph indicates parser requires that the last node
421 // must be a paragraph and is not converted to other nodes by
422 // ParagraphTransformers.
423 RequireParagraph
424)
425
426// A Config struct is a data structure that holds configuration of the Parser.
427type Config struct {
428 Options map[OptionName]interface{}
429 BlockParsers util.PrioritizedSlice /*<BlockParser>*/
430 InlineParsers util.PrioritizedSlice /*<InlineParser>*/
431 ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
432 ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/
433 EscapedSpace bool
434}
435
436// NewConfig returns a new Config.
437func NewConfig() *Config {
438 return &Config{
439 Options: map[OptionName]interface{}{},
440 BlockParsers: util.PrioritizedSlice{},
441 InlineParsers: util.PrioritizedSlice{},
442 ParagraphTransformers: util.PrioritizedSlice{},
443 ASTTransformers: util.PrioritizedSlice{},
444 }
445}
446
447// An Option interface is a functional option type for the Parser.
448type Option interface {
449 SetParserOption(*Config)
450}
451
452// OptionName is a name of parser options.
453type OptionName string
454
455// Attribute is an option name that spacify attributes of elements.
456const optAttribute OptionName = "Attribute"
457
458type withAttribute struct {
459}
460
461func (o *withAttribute) SetParserOption(c *Config) {
462 c.Options[optAttribute] = true
463}
464
465// WithAttribute is a functional option that enables custom attributes.
466func WithAttribute() Option {
467 return &withAttribute{}
468}
469
470// A Parser interface parses Markdown text into AST nodes.
471type Parser interface {
472 // Parse parses the given Markdown text into AST nodes.
473 Parse(reader text.Reader, opts ...ParseOption) ast.Node
474
475 // AddOption adds the given option to this parser.
476 AddOptions(...Option)
477}
478
479// A SetOptioner interface sets the given option to the object.
480type SetOptioner interface {
481 // SetOption sets the given option to the object.
482 // Unacceptable options may be passed.
483 // Thus implementations must ignore unacceptable options.
484 SetOption(name OptionName, value interface{})
485}
486
487// A BlockParser interface parses a block level element like Paragraph, List,
488// Blockquote etc.
489type BlockParser interface {
490 // Trigger returns a list of characters that triggers Parse method of
491 // this parser.
492 // If Trigger returns a nil, Open will be called with any lines.
493 Trigger() []byte
494
495 // Open parses the current line and returns a result of parsing.
496 //
497 // Open must not parse beyond the current line.
498 // If Open has been able to parse the current line, Open must advance a reader
499 // position by consumed byte length.
500 //
501 // If Open has not been able to parse the current line, Open should returns
502 // (nil, NoChildren). If Open has been able to parse the current line, Open
503 // should returns a new Block node and returns HasChildren or NoChildren.
504 Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
505
506 // Continue parses the current line and returns a result of parsing.
507 //
508 // Continue must not parse beyond the current line.
509 // If Continue has been able to parse the current line, Continue must advance
510 // a reader position by consumed byte length.
511 //
512 // If Continue has not been able to parse the current line, Continue should
513 // returns Close. If Continue has been able to parse the current line,
514 // Continue should returns (Continue | NoChildren) or
515 // (Continue | HasChildren)
516 Continue(node ast.Node, reader text.Reader, pc Context) State
517
518 // Close will be called when the parser returns Close.
519 Close(node ast.Node, reader text.Reader, pc Context)
520
521 // CanInterruptParagraph returns true if the parser can interrupt paragraphs,
522 // otherwise false.
523 CanInterruptParagraph() bool
524
525 // CanAcceptIndentedLine returns true if the parser can open new node when
526 // the given line is being indented more than 3 spaces.
527 CanAcceptIndentedLine() bool
528}
529
530// An InlineParser interface parses an inline level element like CodeSpan, Link etc.
531type InlineParser interface {
532 // Trigger returns a list of characters that triggers Parse method of
533 // this parser.
534 // Trigger characters must be a punctuation or a halfspace.
535 // Halfspaces triggers this parser when character is any spaces characters or
536 // a head of line
537 Trigger() []byte
538
539 // Parse parse the given block into an inline node.
540 //
541 // Parse can parse beyond the current line.
542 // If Parse has been able to parse the current line, it must advance a reader
543 // position by consumed byte length.
544 Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
545}
546
547// A CloseBlocker interface is a callback function that will be
548// called when block is closed in the inline parsing.
549type CloseBlocker interface {
550 // CloseBlock will be called when a block is closed.
551 CloseBlock(parent ast.Node, block text.Reader, pc Context)
552}
553
554// A ParagraphTransformer transforms parsed Paragraph nodes.
555// For example, link references are searched in parsed Paragraphs.
556type ParagraphTransformer interface {
557 // Transform transforms the given paragraph.
558 Transform(node *ast.Paragraph, reader text.Reader, pc Context)
559}
560
561// ASTTransformer transforms entire Markdown document AST tree.
562type ASTTransformer interface {
563 // Transform transforms the given AST tree.
564 Transform(node *ast.Document, reader text.Reader, pc Context)
565}
566
567// DefaultBlockParsers returns a new list of default BlockParsers.
568// Priorities of default BlockParsers are:
569//
570// SetextHeadingParser, 100
571// ThematicBreakParser, 200
572// ListParser, 300
573// ListItemParser, 400
574// CodeBlockParser, 500
575// ATXHeadingParser, 600
576// FencedCodeBlockParser, 700
577// BlockquoteParser, 800
578// HTMLBlockParser, 900
579// ParagraphParser, 1000
580func DefaultBlockParsers() []util.PrioritizedValue {
581 return []util.PrioritizedValue{
582 util.Prioritized(NewSetextHeadingParser(), 100),
583 util.Prioritized(NewThematicBreakParser(), 200),
584 util.Prioritized(NewListParser(), 300),
585 util.Prioritized(NewListItemParser(), 400),
586 util.Prioritized(NewCodeBlockParser(), 500),
587 util.Prioritized(NewATXHeadingParser(), 600),
588 util.Prioritized(NewFencedCodeBlockParser(), 700),
589 util.Prioritized(NewBlockquoteParser(), 800),
590 util.Prioritized(NewHTMLBlockParser(), 900),
591 util.Prioritized(NewParagraphParser(), 1000),
592 }
593}
594
595// DefaultInlineParsers returns a new list of default InlineParsers.
596// Priorities of default InlineParsers are:
597//
598// CodeSpanParser, 100
599// LinkParser, 200
600// AutoLinkParser, 300
601// RawHTMLParser, 400
602// EmphasisParser, 500
603func DefaultInlineParsers() []util.PrioritizedValue {
604 return []util.PrioritizedValue{
605 util.Prioritized(NewCodeSpanParser(), 100),
606 util.Prioritized(NewLinkParser(), 200),
607 util.Prioritized(NewAutoLinkParser(), 300),
608 util.Prioritized(NewRawHTMLParser(), 400),
609 util.Prioritized(NewEmphasisParser(), 500),
610 }
611}
612
613// DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
614// Priorities of default ParagraphTransformers are:
615//
616// LinkReferenceParagraphTransformer, 100
617func DefaultParagraphTransformers() []util.PrioritizedValue {
618 return []util.PrioritizedValue{
619 util.Prioritized(LinkReferenceParagraphTransformer, 100),
620 }
621}
622
623// A Block struct holds a node and correspond parser pair.
624type Block struct {
625 // Node is a BlockNode.
626 Node ast.Node
627 // Parser is a BlockParser.
628 Parser BlockParser
629}
630
631type parser struct {
632 options map[OptionName]interface{}
633 blockParsers [256][]BlockParser
634 freeBlockParsers []BlockParser
635 inlineParsers [256][]InlineParser
636 closeBlockers []CloseBlocker
637 paragraphTransformers []ParagraphTransformer
638 astTransformers []ASTTransformer
639 escapedSpace bool
640 config *Config
641 initSync sync.Once
642}
643
644type withBlockParsers struct {
645 value []util.PrioritizedValue
646}
647
648func (o *withBlockParsers) SetParserOption(c *Config) {
649 c.BlockParsers = append(c.BlockParsers, o.value...)
650}
651
652// WithBlockParsers is a functional option that allow you to add
653// BlockParsers to the parser.
654func WithBlockParsers(bs ...util.PrioritizedValue) Option {
655 return &withBlockParsers{bs}
656}
657
658type withInlineParsers struct {
659 value []util.PrioritizedValue
660}
661
662func (o *withInlineParsers) SetParserOption(c *Config) {
663 c.InlineParsers = append(c.InlineParsers, o.value...)
664}
665
666// WithInlineParsers is a functional option that allow you to add
667// InlineParsers to the parser.
668func WithInlineParsers(bs ...util.PrioritizedValue) Option {
669 return &withInlineParsers{bs}
670}
671
672type withParagraphTransformers struct {
673 value []util.PrioritizedValue
674}
675
676func (o *withParagraphTransformers) SetParserOption(c *Config) {
677 c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
678}
679
680// WithParagraphTransformers is a functional option that allow you to add
681// ParagraphTransformers to the parser.
682func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
683 return &withParagraphTransformers{ps}
684}
685
686type withASTTransformers struct {
687 value []util.PrioritizedValue
688}
689
690func (o *withASTTransformers) SetParserOption(c *Config) {
691 c.ASTTransformers = append(c.ASTTransformers, o.value...)
692}
693
694// WithASTTransformers is a functional option that allow you to add
695// ASTTransformers to the parser.
696func WithASTTransformers(ps ...util.PrioritizedValue) Option {
697 return &withASTTransformers{ps}
698}
699
700type withEscapedSpace struct {
701}
702
703func (o *withEscapedSpace) SetParserOption(c *Config) {
704 c.EscapedSpace = true
705}
706
707// WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
708func WithEscapedSpace() Option {
709 return &withEscapedSpace{}
710}
711
712type withOption struct {
713 name OptionName
714 value interface{}
715}
716
717func (o *withOption) SetParserOption(c *Config) {
718 c.Options[o.name] = o.value
719}
720
721// WithOption is a functional option that allow you to set
722// an arbitrary option to the parser.
723func WithOption(name OptionName, value interface{}) Option {
724 return &withOption{name, value}
725}
726
727// NewParser returns a new Parser with given options.
728func NewParser(options ...Option) Parser {
729 config := NewConfig()
730 for _, opt := range options {
731 opt.SetParserOption(config)
732 }
733
734 p := &parser{
735 options: map[OptionName]interface{}{},
736 config: config,
737 }
738
739 return p
740}
741
742func (p *parser) AddOptions(opts ...Option) {
743 for _, opt := range opts {
744 opt.SetParserOption(p.config)
745 }
746}
747
748func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
749 bp, ok := v.Value.(BlockParser)
750 if !ok {
751 panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
752 }
753 tcs := bp.Trigger()
754 so, ok := v.Value.(SetOptioner)
755 if ok {
756 for oname, ovalue := range options {
757 so.SetOption(oname, ovalue)
758 }
759 }
760 if tcs == nil {
761 p.freeBlockParsers = append(p.freeBlockParsers, bp)
762 } else {
763 for _, tc := range tcs {
764 if p.blockParsers[tc] == nil {
765 p.blockParsers[tc] = []BlockParser{}
766 }
767 p.blockParsers[tc] = append(p.blockParsers[tc], bp)
768 }
769 }
770}
771
772func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
773 ip, ok := v.Value.(InlineParser)
774 if !ok {
775 panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
776 }
777 tcs := ip.Trigger()
778 so, ok := v.Value.(SetOptioner)
779 if ok {
780 for oname, ovalue := range options {
781 so.SetOption(oname, ovalue)
782 }
783 }
784 if cb, ok := ip.(CloseBlocker); ok {
785 p.closeBlockers = append(p.closeBlockers, cb)
786 }
787 for _, tc := range tcs {
788 if p.inlineParsers[tc] == nil {
789 p.inlineParsers[tc] = []InlineParser{}
790 }
791 p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
792 }
793}
794
795func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
796 pt, ok := v.Value.(ParagraphTransformer)
797 if !ok {
798 panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
799 }
800 so, ok := v.Value.(SetOptioner)
801 if ok {
802 for oname, ovalue := range options {
803 so.SetOption(oname, ovalue)
804 }
805 }
806 p.paragraphTransformers = append(p.paragraphTransformers, pt)
807}
808
809func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
810 at, ok := v.Value.(ASTTransformer)
811 if !ok {
812 panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
813 }
814 so, ok := v.Value.(SetOptioner)
815 if ok {
816 for oname, ovalue := range options {
817 so.SetOption(oname, ovalue)
818 }
819 }
820 p.astTransformers = append(p.astTransformers, at)
821}
822
823// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
824type ParseConfig struct {
825 Context Context
826}
827
828// A ParseOption is a functional option type for the Parser.Parse.
829type ParseOption func(c *ParseConfig)
830
831// WithContext is a functional option that allow you to override
832// a default context.
833func WithContext(context Context) ParseOption {
834 return func(c *ParseConfig) {
835 c.Context = context
836 }
837}
838
839func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
840 p.initSync.Do(func() {
841 p.config.BlockParsers.Sort()
842 for _, v := range p.config.BlockParsers {
843 p.addBlockParser(v, p.config.Options)
844 }
845 for i := range p.blockParsers {
846 if p.blockParsers[i] != nil {
847 p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
848 }
849 }
850
851 p.config.InlineParsers.Sort()
852 for _, v := range p.config.InlineParsers {
853 p.addInlineParser(v, p.config.Options)
854 }
855 p.config.ParagraphTransformers.Sort()
856 for _, v := range p.config.ParagraphTransformers {
857 p.addParagraphTransformer(v, p.config.Options)
858 }
859 p.config.ASTTransformers.Sort()
860 for _, v := range p.config.ASTTransformers {
861 p.addASTTransformer(v, p.config.Options)
862 }
863 p.escapedSpace = p.config.EscapedSpace
864 p.config = nil
865 })
866 c := &ParseConfig{}
867 for _, opt := range opts {
868 opt(c)
869 }
870 if c.Context == nil {
871 c.Context = NewContext()
872 }
873 pc := c.Context
874 root := ast.NewDocument()
875 p.parseBlocks(root, reader, pc)
876
877 blockReader := text.NewBlockReader(reader.Source(), nil)
878 p.walkBlock(root, func(node ast.Node) {
879 p.parseBlock(blockReader, node, pc)
880 })
881 for _, at := range p.astTransformers {
882 at.Transform(root, reader, pc)
883 }
884 // root.Dump(reader.Source(), 0)
885 return root
886}
887
888func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
889 for _, pt := range p.paragraphTransformers {
890 pt.Transform(node, reader, pc)
891 if node.Parent() == nil {
892 return true
893 }
894 }
895 return false
896}
897
898func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
899 blocks := pc.OpenedBlocks()
900 for i := from; i >= to; i-- {
901 node := blocks[i].Node
902 paragraph, ok := node.(*ast.Paragraph)
903 if ok && node.Parent() != nil {
904 p.transformParagraph(paragraph, reader, pc)
905 }
906 if node.Parent() != nil { // closes only if node has not been transformed
907 blocks[i].Parser.Close(blocks[i].Node, reader, pc)
908 }
909 }
910 if from == len(blocks)-1 {
911 blocks = blocks[0:to]
912 } else {
913 blocks = append(blocks[0:to], blocks[from+1:]...)
914 }
915 pc.SetOpenedBlocks(blocks)
916}
917
918type blockOpenResult int
919
920const (
921 paragraphContinuation blockOpenResult = iota + 1
922 newBlocksOpened
923 noBlocksOpened
924)
925
926func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
927 result := blockOpenResult(noBlocksOpened)
928 continuable := false
929 lastBlock := pc.LastOpenedBlock()
930 if lastBlock.Node != nil {
931 continuable = ast.IsParagraph(lastBlock.Node)
932 }
933retry:
934 var bps []BlockParser
935 line, _ := reader.PeekLine()
936 w, pos := util.IndentWidth(line, reader.LineOffset())
937 if w >= len(line) {
938 pc.SetBlockOffset(-1)
939 pc.SetBlockIndent(-1)
940 } else {
941 pc.SetBlockOffset(pos)
942 pc.SetBlockIndent(w)
943 }
944 if line == nil || line[0] == '\n' {
945 goto continuable
946 }
947 bps = p.freeBlockParsers
948 if pos < len(line) {
949 bps = p.blockParsers[line[pos]]
950 if bps == nil {
951 bps = p.freeBlockParsers
952 }
953 }
954 if bps == nil {
955 goto continuable
956 }
957
958 for _, bp := range bps {
959 if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
960 continue
961 }
962 if w > 3 && !bp.CanAcceptIndentedLine() {
963 continue
964 }
965 lastBlock = pc.LastOpenedBlock()
966 last := lastBlock.Node
967 node, state := bp.Open(parent, reader, pc)
968 if node != nil {
969 // Parser requires last node to be a paragraph.
970 // With table extension:
971 //
972 // 0
973 // -:
974 // -
975 //
976 // '-' on 3rd line seems a Setext heading because 1st and 2nd lines
977 // are being paragraph when the Settext heading parser tries to parse the 3rd
978 // line.
979 // But 1st line and 2nd line are a table. Thus this paragraph will be transformed
980 // by a paragraph transformer. So this text should be converted to a table and
981 // an empty list.
982 if state&RequireParagraph != 0 {
983 if last == parent.LastChild() {
984 // Opened paragraph may be transformed by ParagraphTransformers in
985 // closeBlocks().
986 lastBlock.Parser.Close(last, reader, pc)
987 blocks := pc.OpenedBlocks()
988 pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
989 if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
990 // Paragraph has been transformed.
991 // So this parser is considered as failing.
992 continuable = false
993 goto retry
994 }
995 }
996 }
997 node.SetBlankPreviousLines(blankLine)
998 if last != nil && last.Parent() == nil {
999 lastPos := len(pc.OpenedBlocks()) - 1
1000 p.closeBlocks(lastPos, lastPos, reader, pc)
1001 }
1002 parent.AppendChild(parent, node)
1003 result = newBlocksOpened
1004 be := Block{node, bp}
1005 pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
1006 if state&HasChildren != 0 {
1007 parent = node
1008 goto retry // try child block
1009 }
1010 break // no children, can not open more blocks on this line
1011 }
1012 }
1013
1014continuable:
1015 if result == noBlocksOpened && continuable {
1016 state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
1017 if state&Continue != 0 {
1018 result = paragraphContinuation
1019 }
1020 }
1021 return result
1022}
1023
1024type lineStat struct {
1025 lineNum int
1026 level int
1027 isBlank bool
1028}
1029
1030func isBlankLine(lineNum, level int, stats []lineStat) bool {
1031 ret := true
1032 for i := len(stats) - 1 - level; i >= 0; i-- {
1033 ret = false
1034 s := stats[i]
1035 if s.lineNum == lineNum {
1036 if s.level < level && s.isBlank {
1037 return true
1038 } else if s.level == level {
1039 return s.isBlank
1040 }
1041 }
1042 if s.lineNum < lineNum {
1043 return ret
1044 }
1045 }
1046 return ret
1047}
1048
1049func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
1050 pc.SetOpenedBlocks([]Block{})
1051 blankLines := make([]lineStat, 0, 128)
1052 isBlank := false
1053 for { // process blocks separated by blank lines
1054 _, lines, ok := reader.SkipBlankLines()
1055 if !ok {
1056 return
1057 }
1058 lineNum, _ := reader.Position()
1059 if lines != 0 {
1060 blankLines = blankLines[0:0]
1061 l := len(pc.OpenedBlocks())
1062 for i := 0; i < l; i++ {
1063 blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
1064 }
1065 }
1066 isBlank = isBlankLine(lineNum-1, 0, blankLines)
1067 // first, we try to open blocks
1068 if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
1069 return
1070 }
1071 reader.AdvanceLine()
1072 for { // process opened blocks line by line
1073 openedBlocks := pc.OpenedBlocks()
1074 l := len(openedBlocks)
1075 if l == 0 {
1076 break
1077 }
1078 lastIndex := l - 1
1079 for i := 0; i < l; i++ {
1080 be := openedBlocks[i]
1081 line, _ := reader.PeekLine()
1082 if line == nil {
1083 p.closeBlocks(lastIndex, 0, reader, pc)
1084 reader.AdvanceLine()
1085 return
1086 }
1087 lineNum, _ := reader.Position()
1088 blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
1089 // If node is a paragraph, p.openBlocks determines whether it is continuable.
1090 // So we do not process paragraphs here.
1091 if !ast.IsParagraph(be.Node) {
1092 state := be.Parser.Continue(be.Node, reader, pc)
1093 if state&Continue != 0 {
1094 // When current node is a container block and has no children,
1095 // we try to open new child nodes
1096 if state&HasChildren != 0 && i == lastIndex {
1097 isBlank = isBlankLine(lineNum-1, i, blankLines)
1098 p.openBlocks(be.Node, isBlank, reader, pc)
1099 break
1100 }
1101 continue
1102 }
1103 }
1104 // current node may be closed or lazy continuation
1105 isBlank = isBlankLine(lineNum-1, i, blankLines)
1106 thisParent := parent
1107 if i != 0 {
1108 thisParent = openedBlocks[i-1].Node
1109 }
1110 lastNode := openedBlocks[lastIndex].Node
1111 result := p.openBlocks(thisParent, isBlank, reader, pc)
1112 if result != paragraphContinuation {
1113 // lastNode is a paragraph and was transformed by the paragraph
1114 // transformers.
1115 if openedBlocks[lastIndex].Node != lastNode {
1116 lastIndex--
1117 }
1118 p.closeBlocks(lastIndex, i, reader, pc)
1119 }
1120 break
1121 }
1122
1123 reader.AdvanceLine()
1124 }
1125 }
1126}
1127
1128func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
1129 for c := block.FirstChild(); c != nil; c = c.NextSibling() {
1130 p.walkBlock(c, cb)
1131 }
1132 cb(block)
1133}
1134
1135const (
1136 lineBreakHard uint8 = 1 << iota
1137 lineBreakSoft
1138 lineBreakVisible
1139)
1140
1141func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
1142 if parent.IsRaw() {
1143 return
1144 }
1145 escaped := false
1146 source := block.Source()
1147 block.Reset(parent.Lines())
1148 for {
1149 retry:
1150 line, _ := block.PeekLine()
1151 if line == nil {
1152 break
1153 }
1154 lineLength := len(line)
1155 var lineBreakFlags uint8 = 0
1156 hasNewLine := line[lineLength-1] == '\n'
1157 if ((lineLength >= 3 && line[lineLength-2] == '\\' && line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
1158 lineLength -= 2
1159 lineBreakFlags |= lineBreakHard | lineBreakVisible
1160 } else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' && line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) && hasNewLine { // ends with \\r\n
1161 lineLength -= 3
1162 lineBreakFlags |= lineBreakHard | lineBreakVisible
1163 } else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' && hasNewLine { // ends with [space][space]\n
1164 lineLength -= 3
1165 lineBreakFlags |= lineBreakHard
1166 } else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' && line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
1167 lineLength -= 4
1168 lineBreakFlags |= lineBreakHard
1169 } else if hasNewLine {
1170 // If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
1171 // If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
1172 // See https://spec.commonmark.org/0.30/#soft-line-breaks
1173 lineBreakFlags |= lineBreakSoft
1174 }
1175
1176 l, startPosition := block.Position()
1177 n := 0
1178 for i := 0; i < lineLength; i++ {
1179 c := line[i]
1180 if c == '\n' {
1181 break
1182 }
1183 isSpace := util.IsSpace(c) && c != '\r' && c != '\n'
1184 isPunct := util.IsPunct(c)
1185 if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 {
1186 parserChar := c
1187 if isSpace || (i == 0 && !isPunct) {
1188 parserChar = ' '
1189 }
1190 ips := p.inlineParsers[parserChar]
1191 if ips != nil {
1192 block.Advance(n)
1193 n = 0
1194 savedLine, savedPosition := block.Position()
1195 if i != 0 {
1196 _, currentPosition := block.Position()
1197 ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
1198 _, startPosition = block.Position()
1199 }
1200 var inlineNode ast.Node
1201 for _, ip := range ips {
1202 inlineNode = ip.Parse(parent, block, pc)
1203 if inlineNode != nil {
1204 break
1205 }
1206 block.SetPosition(savedLine, savedPosition)
1207 }
1208 if inlineNode != nil {
1209 parent.AppendChild(parent, inlineNode)
1210 goto retry
1211 }
1212 }
1213 }
1214 if escaped {
1215 escaped = false
1216 n++
1217 continue
1218 }
1219
1220 if c == '\\' {
1221 escaped = true
1222 n++
1223 continue
1224 }
1225
1226 escaped = false
1227 n++
1228 }
1229 if n != 0 {
1230 block.Advance(n)
1231 }
1232 currentL, currentPosition := block.Position()
1233 if l != currentL {
1234 continue
1235 }
1236 diff := startPosition.Between(currentPosition)
1237 var text *ast.Text
1238 if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible {
1239 text = ast.NewTextSegment(diff)
1240 } else {
1241 text = ast.NewTextSegment(diff.TrimRightSpace(source))
1242 }
1243 text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
1244 text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
1245 parent.AppendChild(parent, text)
1246 block.AdvanceLine()
1247 }
1248
1249 ProcessDelimiters(nil, pc)
1250 for _, ip := range p.closeBlockers {
1251 ip.CloseBlock(parent, block, pc)
1252 }
1253}