1package text
2
3import (
4 "io"
5 "regexp"
6 "unicode/utf8"
7
8 "github.com/yuin/goldmark/util"
9)
10
11const invalidValue = -1
12
13// EOF indicates the end of file.
14const EOF = byte(0xff)
15
16// A Reader interface provides abstracted method for reading text.
17type Reader interface {
18 io.RuneReader
19
20 // Source returns a source of the reader.
21 Source() []byte
22
23 // ResetPosition resets positions.
24 ResetPosition()
25
26 // Peek returns a byte at current position without advancing the internal pointer.
27 Peek() byte
28
29 // PeekLine returns the current line without advancing the internal pointer.
30 PeekLine() ([]byte, Segment)
31
32 // PrecendingCharacter returns a character just before current internal pointer.
33 PrecendingCharacter() rune
34
35 // Value returns a value of the given segment.
36 Value(Segment) []byte
37
38 // LineOffset returns a distance from the line head to current position.
39 LineOffset() int
40
41 // Position returns current line number and position.
42 Position() (int, Segment)
43
44 // SetPosition sets current line number and position.
45 SetPosition(int, Segment)
46
47 // SetPadding sets padding to the reader.
48 SetPadding(int)
49
50 // Advance advances the internal pointer.
51 Advance(int)
52
53 // AdvanceAndSetPadding advances the internal pointer and add padding to the
54 // reader.
55 AdvanceAndSetPadding(int, int)
56
57 // AdvanceLine advances the internal pointer to the next line head.
58 AdvanceLine()
59
60 // SkipSpaces skips space characters and returns a non-blank line.
61 // If it reaches EOF, returns false.
62 SkipSpaces() (Segment, int, bool)
63
64 // SkipSpaces skips blank lines and returns a non-blank line.
65 // If it reaches EOF, returns false.
66 SkipBlankLines() (Segment, int, bool)
67
68 // Match performs regular expression matching to current line.
69 Match(reg *regexp.Regexp) bool
70
71 // Match performs regular expression searching to current line.
72 FindSubMatch(reg *regexp.Regexp) [][]byte
73
74 // FindClosure finds corresponding closure.
75 FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
76}
77
78// FindClosureOptions is options for Reader.FindClosure
79type FindClosureOptions struct {
80 // CodeSpan is a flag for the FindClosure. If this is set to true,
81 // FindClosure ignores closers in codespans.
82 CodeSpan bool
83
84 // Nesting is a flag for the FindClosure. If this is set to true,
85 // FindClosure allows nesting.
86 Nesting bool
87
88 // Newline is a flag for the FindClosure. If this is set to true,
89 // FindClosure searches for a closer over multiple lines.
90 Newline bool
91
92 // Advance is a flag for the FindClosure. If this is set to true,
93 // FindClosure advances pointers when closer is found.
94 Advance bool
95}
96
97type reader struct {
98 source []byte
99 sourceLength int
100 line int
101 peekedLine []byte
102 pos Segment
103 head int
104 lineOffset int
105}
106
107// NewReader return a new Reader that can read UTF-8 bytes .
108func NewReader(source []byte) Reader {
109 r := &reader{
110 source: source,
111 sourceLength: len(source),
112 }
113 r.ResetPosition()
114 return r
115}
116
117func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
118 return findClosureReader(r, opener, closer, options)
119}
120
121func (r *reader) ResetPosition() {
122 r.line = -1
123 r.head = 0
124 r.lineOffset = -1
125 r.AdvanceLine()
126}
127
128func (r *reader) Source() []byte {
129 return r.source
130}
131
132func (r *reader) Value(seg Segment) []byte {
133 return seg.Value(r.source)
134}
135
136func (r *reader) Peek() byte {
137 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
138 if r.pos.Padding != 0 {
139 return space[0]
140 }
141 return r.source[r.pos.Start]
142 }
143 return EOF
144}
145
146func (r *reader) PeekLine() ([]byte, Segment) {
147 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
148 if r.peekedLine == nil {
149 r.peekedLine = r.pos.Value(r.Source())
150 }
151 return r.peekedLine, r.pos
152 }
153 return nil, r.pos
154}
155
156// io.RuneReader interface
157func (r *reader) ReadRune() (rune, int, error) {
158 return readRuneReader(r)
159}
160
161func (r *reader) LineOffset() int {
162 if r.lineOffset < 0 {
163 v := 0
164 for i := r.head; i < r.pos.Start; i++ {
165 if r.source[i] == '\t' {
166 v += util.TabWidth(v)
167 } else {
168 v++
169 }
170 }
171 r.lineOffset = v - r.pos.Padding
172 }
173 return r.lineOffset
174}
175
176func (r *reader) PrecendingCharacter() rune {
177 if r.pos.Start <= 0 {
178 if r.pos.Padding != 0 {
179 return rune(' ')
180 }
181 return rune('\n')
182 }
183 i := r.pos.Start - 1
184 for ; i >= 0; i-- {
185 if utf8.RuneStart(r.source[i]) {
186 break
187 }
188 }
189 rn, _ := utf8.DecodeRune(r.source[i:])
190 return rn
191}
192
193func (r *reader) Advance(n int) {
194 r.lineOffset = -1
195 if n < len(r.peekedLine) && r.pos.Padding == 0 {
196 r.pos.Start += n
197 r.peekedLine = nil
198 return
199 }
200 r.peekedLine = nil
201 l := r.sourceLength
202 for ; n > 0 && r.pos.Start < l; n-- {
203 if r.pos.Padding != 0 {
204 r.pos.Padding--
205 continue
206 }
207 if r.source[r.pos.Start] == '\n' {
208 r.AdvanceLine()
209 continue
210 }
211 r.pos.Start++
212 }
213}
214
215func (r *reader) AdvanceAndSetPadding(n, padding int) {
216 r.Advance(n)
217 if padding > r.pos.Padding {
218 r.SetPadding(padding)
219 }
220}
221
222func (r *reader) AdvanceLine() {
223 r.lineOffset = -1
224 r.peekedLine = nil
225 r.pos.Start = r.pos.Stop
226 r.head = r.pos.Start
227 if r.pos.Start < 0 {
228 return
229 }
230 r.pos.Stop = r.sourceLength
231 for i := r.pos.Start; i < r.sourceLength; i++ {
232 c := r.source[i]
233 if c == '\n' {
234 r.pos.Stop = i + 1
235 break
236 }
237 }
238 r.line++
239 r.pos.Padding = 0
240}
241
242func (r *reader) Position() (int, Segment) {
243 return r.line, r.pos
244}
245
246func (r *reader) SetPosition(line int, pos Segment) {
247 r.lineOffset = -1
248 r.line = line
249 r.pos = pos
250}
251
252func (r *reader) SetPadding(v int) {
253 r.pos.Padding = v
254}
255
256func (r *reader) SkipSpaces() (Segment, int, bool) {
257 return skipSpacesReader(r)
258}
259
260func (r *reader) SkipBlankLines() (Segment, int, bool) {
261 return skipBlankLinesReader(r)
262}
263
264func (r *reader) Match(reg *regexp.Regexp) bool {
265 return matchReader(r, reg)
266}
267
268func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
269 return findSubMatchReader(r, reg)
270}
271
272// A BlockReader interface is a reader that is optimized for Blocks.
273type BlockReader interface {
274 Reader
275 // Reset resets current state and sets new segments to the reader.
276 Reset(segment *Segments)
277}
278
279type blockReader struct {
280 source []byte
281 segments *Segments
282 segmentsLength int
283 line int
284 pos Segment
285 head int
286 last int
287 lineOffset int
288}
289
290// NewBlockReader returns a new BlockReader.
291func NewBlockReader(source []byte, segments *Segments) BlockReader {
292 r := &blockReader{
293 source: source,
294 }
295 if segments != nil {
296 r.Reset(segments)
297 }
298 return r
299}
300
301func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
302 return findClosureReader(r, opener, closer, options)
303}
304
305func (r *blockReader) ResetPosition() {
306 r.line = -1
307 r.head = 0
308 r.last = 0
309 r.lineOffset = -1
310 r.pos.Start = -1
311 r.pos.Stop = -1
312 r.pos.Padding = 0
313 if r.segmentsLength > 0 {
314 last := r.segments.At(r.segmentsLength - 1)
315 r.last = last.Stop
316 }
317 r.AdvanceLine()
318}
319
320func (r *blockReader) Reset(segments *Segments) {
321 r.segments = segments
322 r.segmentsLength = segments.Len()
323 r.ResetPosition()
324}
325
326func (r *blockReader) Source() []byte {
327 return r.source
328}
329
330func (r *blockReader) Value(seg Segment) []byte {
331 line := r.segmentsLength - 1
332 ret := make([]byte, 0, seg.Stop-seg.Start+1)
333 for ; line >= 0; line-- {
334 if seg.Start >= r.segments.At(line).Start {
335 break
336 }
337 }
338 i := seg.Start
339 for ; line < r.segmentsLength; line++ {
340 s := r.segments.At(line)
341 if i < 0 {
342 i = s.Start
343 }
344 ret = s.ConcatPadding(ret)
345 for ; i < seg.Stop && i < s.Stop; i++ {
346 ret = append(ret, r.source[i])
347 }
348 i = -1
349 if s.Stop > seg.Stop {
350 break
351 }
352 }
353 return ret
354}
355
356// io.RuneReader interface
357func (r *blockReader) ReadRune() (rune, int, error) {
358 return readRuneReader(r)
359}
360
361func (r *blockReader) PrecendingCharacter() rune {
362 if r.pos.Padding != 0 {
363 return rune(' ')
364 }
365 if r.segments.Len() < 1 {
366 return rune('\n')
367 }
368 firstSegment := r.segments.At(0)
369 if r.line == 0 && r.pos.Start <= firstSegment.Start {
370 return rune('\n')
371 }
372 l := len(r.source)
373 i := r.pos.Start - 1
374 for ; i < l && i >= 0; i-- {
375 if utf8.RuneStart(r.source[i]) {
376 break
377 }
378 }
379 if i < 0 || i >= l {
380 return rune('\n')
381 }
382 rn, _ := utf8.DecodeRune(r.source[i:])
383 return rn
384}
385
386func (r *blockReader) LineOffset() int {
387 if r.lineOffset < 0 {
388 v := 0
389 for i := r.head; i < r.pos.Start; i++ {
390 if r.source[i] == '\t' {
391 v += util.TabWidth(v)
392 } else {
393 v++
394 }
395 }
396 r.lineOffset = v - r.pos.Padding
397 }
398 return r.lineOffset
399}
400
401func (r *blockReader) Peek() byte {
402 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
403 if r.pos.Padding != 0 {
404 return space[0]
405 }
406 return r.source[r.pos.Start]
407 }
408 return EOF
409}
410
411func (r *blockReader) PeekLine() ([]byte, Segment) {
412 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
413 return r.pos.Value(r.source), r.pos
414 }
415 return nil, r.pos
416}
417
418func (r *blockReader) Advance(n int) {
419 r.lineOffset = -1
420
421 if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
422 r.pos.Start += n
423 return
424 }
425
426 for ; n > 0; n-- {
427 if r.pos.Padding != 0 {
428 r.pos.Padding--
429 continue
430 }
431 if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
432 r.AdvanceLine()
433 continue
434 }
435 r.pos.Start++
436 }
437}
438
439func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
440 r.Advance(n)
441 if padding > r.pos.Padding {
442 r.SetPadding(padding)
443 }
444}
445
446func (r *blockReader) AdvanceLine() {
447 r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
448 r.head = r.pos.Start
449}
450
451func (r *blockReader) Position() (int, Segment) {
452 return r.line, r.pos
453}
454
455func (r *blockReader) SetPosition(line int, pos Segment) {
456 r.lineOffset = -1
457 r.line = line
458 if pos.Start == invalidValue {
459 if r.line < r.segmentsLength {
460 s := r.segments.At(line)
461 r.head = s.Start
462 r.pos = s
463 }
464 } else {
465 r.pos = pos
466 if r.line < r.segmentsLength {
467 s := r.segments.At(line)
468 r.head = s.Start
469 }
470 }
471}
472
473func (r *blockReader) SetPadding(v int) {
474 r.lineOffset = -1
475 r.pos.Padding = v
476}
477
478func (r *blockReader) SkipSpaces() (Segment, int, bool) {
479 return skipSpacesReader(r)
480}
481
482func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
483 return skipBlankLinesReader(r)
484}
485
486func (r *blockReader) Match(reg *regexp.Regexp) bool {
487 return matchReader(r, reg)
488}
489
490func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
491 return findSubMatchReader(r, reg)
492}
493
494func skipBlankLinesReader(r Reader) (Segment, int, bool) {
495 lines := 0
496 for {
497 line, seg := r.PeekLine()
498 if line == nil {
499 return seg, lines, false
500 }
501 if util.IsBlank(line) {
502 lines++
503 r.AdvanceLine()
504 } else {
505 return seg, lines, true
506 }
507 }
508}
509
510func skipSpacesReader(r Reader) (Segment, int, bool) {
511 chars := 0
512 for {
513 line, segment := r.PeekLine()
514 if line == nil {
515 return segment, chars, false
516 }
517 for i, c := range line {
518 if util.IsSpace(c) {
519 chars++
520 r.Advance(1)
521 continue
522 }
523 return segment.WithStart(segment.Start + i + 1), chars, true
524 }
525 }
526}
527
528func matchReader(r Reader, reg *regexp.Regexp) bool {
529 oldline, oldseg := r.Position()
530 match := reg.FindReaderSubmatchIndex(r)
531 r.SetPosition(oldline, oldseg)
532 if match == nil {
533 return false
534 }
535 r.Advance(match[1] - match[0])
536 return true
537}
538
539func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
540 oldline, oldseg := r.Position()
541 match := reg.FindReaderSubmatchIndex(r)
542 r.SetPosition(oldline, oldseg)
543 if match == nil {
544 return nil
545 }
546 runes := make([]rune, 0, match[1]-match[0])
547 for i := 0; i < match[1]; {
548 r, size, _ := readRuneReader(r)
549 i += size
550 runes = append(runes, r)
551 }
552 result := [][]byte{}
553 for i := 0; i < len(match); i += 2 {
554 result = append(result, []byte(string(runes[match[i]:match[i+1]])))
555 }
556
557 r.SetPosition(oldline, oldseg)
558 r.Advance(match[1] - match[0])
559 return result
560}
561
562func readRuneReader(r Reader) (rune, int, error) {
563 line, _ := r.PeekLine()
564 if line == nil {
565 return 0, 0, io.EOF
566 }
567 rn, size := utf8.DecodeRune(line)
568 if rn == utf8.RuneError {
569 return 0, 0, io.EOF
570 }
571 r.Advance(size)
572 return rn, size, nil
573}
574
575func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
576 opened := 1
577 codeSpanOpener := 0
578 closed := false
579 orgline, orgpos := r.Position()
580 var ret *Segments
581
582 for {
583 bs, seg := r.PeekLine()
584 if bs == nil {
585 goto end
586 }
587 i := 0
588 for i < len(bs) {
589 c := bs[i]
590 if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
591 codeSpanCloser := 0
592 for ; i < len(bs); i++ {
593 if bs[i] == '`' {
594 codeSpanCloser++
595 } else {
596 i--
597 break
598 }
599 }
600 if codeSpanCloser == codeSpanOpener {
601 codeSpanOpener = 0
602 }
603 } else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
604 i += 2
605 continue
606 } else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
607 for ; i < len(bs); i++ {
608 if bs[i] == '`' {
609 codeSpanOpener++
610 } else {
611 i--
612 break
613 }
614 }
615 } else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
616 if c == closer {
617 opened--
618 if opened == 0 {
619 if ret == nil {
620 ret = NewSegments()
621 }
622 ret.Append(seg.WithStop(seg.Start + i))
623 r.Advance(i + 1)
624 closed = true
625 goto end
626 }
627 } else if c == opener {
628 if !opts.Nesting {
629 goto end
630 }
631 opened++
632 }
633 }
634 i++
635 }
636 if !opts.Newline {
637 goto end
638 }
639 r.AdvanceLine()
640 if ret == nil {
641 ret = NewSegments()
642 }
643 ret.Append(seg)
644 }
645end:
646 if !opts.Advance {
647 r.SetPosition(orgline, orgpos)
648 }
649 if closed {
650 return ret, true
651 }
652 return nil, false
653}