1package text
2
3import (
4 "bytes"
5 "io"
6 "regexp"
7 "unicode/utf8"
8
9 "github.com/yuin/goldmark/util"
10)
11
12const invalidValue = -1
13
14// EOF indicates the end of file.
15const EOF = byte(0xff)
16
17// A Reader interface provides abstracted method for reading text.
18type Reader interface {
19 io.RuneReader
20
21 // Source returns a source of the reader.
22 Source() []byte
23
24 // ResetPosition resets positions.
25 ResetPosition()
26
27 // Peek returns a byte at current position without advancing the internal pointer.
28 Peek() byte
29
30 // PeekLine returns the current line without advancing the internal pointer.
31 PeekLine() ([]byte, Segment)
32
33 // PrecendingCharacter returns a character just before current internal pointer.
34 PrecendingCharacter() rune
35
36 // Value returns a value of the given segment.
37 Value(Segment) []byte
38
39 // LineOffset returns a distance from the line head to current position.
40 LineOffset() int
41
42 // Position returns current line number and position.
43 Position() (int, Segment)
44
45 // SetPosition sets current line number and position.
46 SetPosition(int, Segment)
47
48 // SetPadding sets padding to the reader.
49 SetPadding(int)
50
51 // Advance advances the internal pointer.
52 Advance(int)
53
54 // AdvanceAndSetPadding advances the internal pointer and add padding to the
55 // reader.
56 AdvanceAndSetPadding(int, int)
57
58 // AdvanceToEOL advances the internal pointer to the end of line.
59 // If the line ends with a newline, it will be included in the segment.
60 // If the line ends with EOF, it will not be included in the segment.
61 AdvanceToEOL()
62
63 // AdvanceLine advances the internal pointer to the next line head.
64 AdvanceLine()
65
66 // SkipSpaces skips space characters and returns a non-blank line.
67 // If it reaches EOF, returns false.
68 SkipSpaces() (Segment, int, bool)
69
70 // SkipSpaces skips blank lines and returns a non-blank line.
71 // If it reaches EOF, returns false.
72 SkipBlankLines() (Segment, int, bool)
73
74 // Match performs regular expression matching to current line.
75 Match(reg *regexp.Regexp) bool
76
77 // Match performs regular expression searching to current line.
78 FindSubMatch(reg *regexp.Regexp) [][]byte
79
80 // FindClosure finds corresponding closure.
81 FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
82}
83
84// FindClosureOptions is options for Reader.FindClosure.
85type FindClosureOptions struct {
86 // CodeSpan is a flag for the FindClosure. If this is set to true,
87 // FindClosure ignores closers in codespans.
88 CodeSpan bool
89
90 // Nesting is a flag for the FindClosure. If this is set to true,
91 // FindClosure allows nesting.
92 Nesting bool
93
94 // Newline is a flag for the FindClosure. If this is set to true,
95 // FindClosure searches for a closer over multiple lines.
96 Newline bool
97
98 // Advance is a flag for the FindClosure. If this is set to true,
99 // FindClosure advances pointers when closer is found.
100 Advance bool
101}
102
103type reader struct {
104 source []byte
105 sourceLength int
106 line int
107 peekedLine []byte
108 pos Segment
109 head int
110 lineOffset int
111}
112
113// NewReader return a new Reader that can read UTF-8 bytes .
114func NewReader(source []byte) Reader {
115 r := &reader{
116 source: source,
117 sourceLength: len(source),
118 }
119 r.ResetPosition()
120 return r
121}
122
123func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
124 return findClosureReader(r, opener, closer, options)
125}
126
127func (r *reader) ResetPosition() {
128 r.line = -1
129 r.head = 0
130 r.lineOffset = -1
131 r.AdvanceLine()
132}
133
134func (r *reader) Source() []byte {
135 return r.source
136}
137
138func (r *reader) Value(seg Segment) []byte {
139 return seg.Value(r.source)
140}
141
142func (r *reader) Peek() byte {
143 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
144 if r.pos.Padding != 0 {
145 return space[0]
146 }
147 return r.source[r.pos.Start]
148 }
149 return EOF
150}
151
152func (r *reader) PeekLine() ([]byte, Segment) {
153 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
154 if r.peekedLine == nil {
155 r.peekedLine = r.pos.Value(r.Source())
156 }
157 return r.peekedLine, r.pos
158 }
159 return nil, r.pos
160}
161
162// io.RuneReader interface.
163func (r *reader) ReadRune() (rune, int, error) {
164 return readRuneReader(r)
165}
166
167func (r *reader) LineOffset() int {
168 if r.lineOffset < 0 {
169 v := 0
170 for i := r.head; i < r.pos.Start; i++ {
171 if r.source[i] == '\t' {
172 v += util.TabWidth(v)
173 } else {
174 v++
175 }
176 }
177 r.lineOffset = v - r.pos.Padding
178 }
179 return r.lineOffset
180}
181
182func (r *reader) PrecendingCharacter() rune {
183 if r.pos.Start <= 0 {
184 if r.pos.Padding != 0 {
185 return rune(' ')
186 }
187 return rune('\n')
188 }
189 i := r.pos.Start - 1
190 for ; i >= 0; i-- {
191 if utf8.RuneStart(r.source[i]) {
192 break
193 }
194 }
195 rn, _ := utf8.DecodeRune(r.source[i:])
196 return rn
197}
198
199func (r *reader) Advance(n int) {
200 r.lineOffset = -1
201 if n < len(r.peekedLine) && r.pos.Padding == 0 {
202 r.pos.Start += n
203 r.peekedLine = nil
204 return
205 }
206 r.peekedLine = nil
207 l := r.sourceLength
208 for ; n > 0 && r.pos.Start < l; n-- {
209 if r.pos.Padding != 0 {
210 r.pos.Padding--
211 continue
212 }
213 if r.source[r.pos.Start] == '\n' {
214 r.AdvanceLine()
215 continue
216 }
217 r.pos.Start++
218 }
219}
220
221func (r *reader) AdvanceAndSetPadding(n, padding int) {
222 r.Advance(n)
223 if padding > r.pos.Padding {
224 r.SetPadding(padding)
225 }
226}
227
228func (r *reader) AdvanceToEOL() {
229 if r.pos.Start >= r.sourceLength {
230 return
231 }
232
233 r.lineOffset = -1
234 i := -1
235 if r.peekedLine != nil {
236 r.pos.Start += len(r.peekedLine) - r.pos.Padding - 1
237 if r.source[r.pos.Start] == '\n' {
238 i = 0
239 }
240 }
241 if i == -1 {
242 i = bytes.IndexByte(r.source[r.pos.Start:], '\n')
243 }
244 r.peekedLine = nil
245 if i != -1 {
246 r.pos.Start += i
247 } else {
248 r.pos.Start = r.sourceLength
249 }
250 r.pos.Padding = 0
251}
252
253func (r *reader) AdvanceLine() {
254 r.lineOffset = -1
255 r.peekedLine = nil
256 r.pos.Start = r.pos.Stop
257 r.head = r.pos.Start
258 if r.pos.Start < 0 || r.pos.Start >= r.sourceLength {
259 return
260 }
261 r.pos.Stop = r.sourceLength
262 i := 0
263 if r.source[r.pos.Start] != '\n' {
264 i = bytes.IndexByte(r.source[r.pos.Start:], '\n')
265 }
266 if i != -1 {
267 r.pos.Stop = r.pos.Start + i + 1
268 }
269 r.line++
270 r.pos.Padding = 0
271}
272
273func (r *reader) Position() (int, Segment) {
274 return r.line, r.pos
275}
276
277func (r *reader) SetPosition(line int, pos Segment) {
278 r.lineOffset = -1
279 r.line = line
280 r.pos = pos
281}
282
283func (r *reader) SetPadding(v int) {
284 r.pos.Padding = v
285}
286
287func (r *reader) SkipSpaces() (Segment, int, bool) {
288 return skipSpacesReader(r)
289}
290
291func (r *reader) SkipBlankLines() (Segment, int, bool) {
292 return skipBlankLinesReader(r)
293}
294
295func (r *reader) Match(reg *regexp.Regexp) bool {
296 return matchReader(r, reg)
297}
298
299func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
300 return findSubMatchReader(r, reg)
301}
302
303// A BlockReader interface is a reader that is optimized for Blocks.
304type BlockReader interface {
305 Reader
306 // Reset resets current state and sets new segments to the reader.
307 Reset(segment *Segments)
308}
309
310type blockReader struct {
311 source []byte
312 segments *Segments
313 segmentsLength int
314 line int
315 pos Segment
316 head int
317 last int
318 lineOffset int
319}
320
321// NewBlockReader returns a new BlockReader.
322func NewBlockReader(source []byte, segments *Segments) BlockReader {
323 r := &blockReader{
324 source: source,
325 }
326 if segments != nil {
327 r.Reset(segments)
328 }
329 return r
330}
331
332func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
333 return findClosureReader(r, opener, closer, options)
334}
335
336func (r *blockReader) ResetPosition() {
337 r.line = -1
338 r.head = 0
339 r.last = 0
340 r.lineOffset = -1
341 r.pos.Start = -1
342 r.pos.Stop = -1
343 r.pos.Padding = 0
344 if r.segmentsLength > 0 {
345 last := r.segments.At(r.segmentsLength - 1)
346 r.last = last.Stop
347 }
348 r.AdvanceLine()
349}
350
351func (r *blockReader) Reset(segments *Segments) {
352 r.segments = segments
353 r.segmentsLength = segments.Len()
354 r.ResetPosition()
355}
356
357func (r *blockReader) Source() []byte {
358 return r.source
359}
360
361func (r *blockReader) Value(seg Segment) []byte {
362 line := r.segmentsLength - 1
363 ret := make([]byte, 0, seg.Stop-seg.Start+1)
364 for ; line >= 0; line-- {
365 if seg.Start >= r.segments.At(line).Start {
366 break
367 }
368 }
369 i := seg.Start
370 for ; line < r.segmentsLength; line++ {
371 s := r.segments.At(line)
372 if i < 0 {
373 i = s.Start
374 }
375 ret = s.ConcatPadding(ret)
376 for ; i < seg.Stop && i < s.Stop; i++ {
377 ret = append(ret, r.source[i])
378 }
379 i = -1
380 if s.Stop > seg.Stop {
381 break
382 }
383 }
384 return ret
385}
386
387// io.RuneReader interface.
388func (r *blockReader) ReadRune() (rune, int, error) {
389 return readRuneReader(r)
390}
391
392func (r *blockReader) PrecendingCharacter() rune {
393 if r.pos.Padding != 0 {
394 return rune(' ')
395 }
396 if r.segments.Len() < 1 {
397 return rune('\n')
398 }
399 firstSegment := r.segments.At(0)
400 if r.line == 0 && r.pos.Start <= firstSegment.Start {
401 return rune('\n')
402 }
403 l := len(r.source)
404 i := r.pos.Start - 1
405 for ; i < l && i >= 0; i-- {
406 if utf8.RuneStart(r.source[i]) {
407 break
408 }
409 }
410 if i < 0 || i >= l {
411 return rune('\n')
412 }
413 rn, _ := utf8.DecodeRune(r.source[i:])
414 return rn
415}
416
417func (r *blockReader) LineOffset() int {
418 if r.lineOffset < 0 {
419 v := 0
420 for i := r.head; i < r.pos.Start; i++ {
421 if r.source[i] == '\t' {
422 v += util.TabWidth(v)
423 } else {
424 v++
425 }
426 }
427 r.lineOffset = v - r.pos.Padding
428 }
429 return r.lineOffset
430}
431
432func (r *blockReader) Peek() byte {
433 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
434 if r.pos.Padding != 0 {
435 return space[0]
436 }
437 return r.source[r.pos.Start]
438 }
439 return EOF
440}
441
442func (r *blockReader) PeekLine() ([]byte, Segment) {
443 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
444 return r.pos.Value(r.source), r.pos
445 }
446 return nil, r.pos
447}
448
449func (r *blockReader) Advance(n int) {
450 r.lineOffset = -1
451
452 if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
453 r.pos.Start += n
454 return
455 }
456
457 for ; n > 0; n-- {
458 if r.pos.Padding != 0 {
459 r.pos.Padding--
460 continue
461 }
462 if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
463 r.AdvanceLine()
464 continue
465 }
466 r.pos.Start++
467 }
468}
469
470func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
471 r.Advance(n)
472 if padding > r.pos.Padding {
473 r.SetPadding(padding)
474 }
475}
476
477func (r *blockReader) AdvanceToEOL() {
478 r.lineOffset = -1
479 r.pos.Padding = 0
480 c := r.source[r.pos.Stop-1]
481 if c == '\n' {
482 r.pos.Start = r.pos.Stop - 1
483 } else {
484 r.pos.Start = r.pos.Stop
485 }
486}
487
488func (r *blockReader) AdvanceLine() {
489 r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
490 r.head = r.pos.Start
491}
492
493func (r *blockReader) Position() (int, Segment) {
494 return r.line, r.pos
495}
496
497func (r *blockReader) SetPosition(line int, pos Segment) {
498 r.lineOffset = -1
499 r.line = line
500 if pos.Start == invalidValue {
501 if r.line < r.segmentsLength {
502 s := r.segments.At(line)
503 r.head = s.Start
504 r.pos = s
505 }
506 } else {
507 r.pos = pos
508 if r.line < r.segmentsLength {
509 s := r.segments.At(line)
510 r.head = s.Start
511 }
512 }
513}
514
515func (r *blockReader) SetPadding(v int) {
516 r.lineOffset = -1
517 r.pos.Padding = v
518}
519
520func (r *blockReader) SkipSpaces() (Segment, int, bool) {
521 return skipSpacesReader(r)
522}
523
524func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
525 return skipBlankLinesReader(r)
526}
527
528func (r *blockReader) Match(reg *regexp.Regexp) bool {
529 return matchReader(r, reg)
530}
531
532func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
533 return findSubMatchReader(r, reg)
534}
535
536func skipBlankLinesReader(r Reader) (Segment, int, bool) {
537 lines := 0
538 for {
539 line, seg := r.PeekLine()
540 if line == nil {
541 return seg, lines, false
542 }
543 if util.IsBlank(line) {
544 lines++
545 r.AdvanceLine()
546 } else {
547 return seg, lines, true
548 }
549 }
550}
551
552func skipSpacesReader(r Reader) (Segment, int, bool) {
553 chars := 0
554 for {
555 line, segment := r.PeekLine()
556 if line == nil {
557 return segment, chars, false
558 }
559 for i, c := range line {
560 if util.IsSpace(c) {
561 chars++
562 r.Advance(1)
563 continue
564 }
565 return segment.WithStart(segment.Start + i + 1), chars, true
566 }
567 }
568}
569
570func matchReader(r Reader, reg *regexp.Regexp) bool {
571 oldline, oldseg := r.Position()
572 match := reg.FindReaderSubmatchIndex(r)
573 r.SetPosition(oldline, oldseg)
574 if match == nil {
575 return false
576 }
577 r.Advance(match[1] - match[0])
578 return true
579}
580
581func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
582 oldLine, oldSeg := r.Position()
583 match := reg.FindReaderSubmatchIndex(r)
584 r.SetPosition(oldLine, oldSeg)
585 if match == nil {
586 return nil
587 }
588 var bb bytes.Buffer
589 bb.Grow(match[1] - match[0])
590 for i := 0; i < match[1]; {
591 r, size, _ := readRuneReader(r)
592 i += size
593 bb.WriteRune(r)
594 }
595 bs := bb.Bytes()
596 var result [][]byte
597 for i := 0; i < len(match); i += 2 {
598 if match[i] < 0 {
599 result = append(result, []byte{})
600 continue
601 }
602 result = append(result, bs[match[i]:match[i+1]])
603 }
604
605 r.SetPosition(oldLine, oldSeg)
606 r.Advance(match[1] - match[0])
607 return result
608}
609
610func readRuneReader(r Reader) (rune, int, error) {
611 line, _ := r.PeekLine()
612 if line == nil {
613 return 0, 0, io.EOF
614 }
615 rn, size := utf8.DecodeRune(line)
616 if rn == utf8.RuneError {
617 return 0, 0, io.EOF
618 }
619 r.Advance(size)
620 return rn, size, nil
621}
622
623func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
624 opened := 1
625 codeSpanOpener := 0
626 closed := false
627 orgline, orgpos := r.Position()
628 var ret *Segments
629
630 for {
631 bs, seg := r.PeekLine()
632 if bs == nil {
633 goto end
634 }
635 i := 0
636 for i < len(bs) {
637 c := bs[i]
638 if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
639 codeSpanCloser := 0
640 for ; i < len(bs); i++ {
641 if bs[i] == '`' {
642 codeSpanCloser++
643 } else {
644 i--
645 break
646 }
647 }
648 if codeSpanCloser == codeSpanOpener {
649 codeSpanOpener = 0
650 }
651 } else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
652 i += 2
653 continue
654 } else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
655 for ; i < len(bs); i++ {
656 if bs[i] == '`' {
657 codeSpanOpener++
658 } else {
659 i--
660 break
661 }
662 }
663 } else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
664 if c == closer {
665 opened--
666 if opened == 0 {
667 if ret == nil {
668 ret = NewSegments()
669 }
670 ret.Append(seg.WithStop(seg.Start + i))
671 r.Advance(i + 1)
672 closed = true
673 goto end
674 }
675 } else if c == opener {
676 if !opts.Nesting {
677 goto end
678 }
679 opened++
680 }
681 }
682 i++
683 }
684 if !opts.Newline {
685 goto end
686 }
687 r.AdvanceLine()
688 if ret == nil {
689 ret = NewSegments()
690 }
691 ret.Append(seg)
692 }
693end:
694 if !opts.Advance {
695 r.SetPosition(orgline, orgpos)
696 }
697 if closed {
698 return ret, true
699 }
700 return nil, false
701}