1package text
  2
  3import (
  4	"bytes"
  5	"io"
  6	"regexp"
  7	"unicode/utf8"
  8
  9	"github.com/yuin/goldmark/util"
 10)
 11
 12const invalidValue = -1
 13
 14// EOF indicates the end of file.
 15const EOF = byte(0xff)
 16
 17// A Reader interface provides abstracted method for reading text.
 18type Reader interface {
 19	io.RuneReader
 20
 21	// Source returns a source of the reader.
 22	Source() []byte
 23
 24	// ResetPosition resets positions.
 25	ResetPosition()
 26
 27	// Peek returns a byte at current position without advancing the internal pointer.
 28	Peek() byte
 29
 30	// PeekLine returns the current line without advancing the internal pointer.
 31	PeekLine() ([]byte, Segment)
 32
 33	// PrecendingCharacter returns a character just before current internal pointer.
 34	PrecendingCharacter() rune
 35
 36	// Value returns a value of the given segment.
 37	Value(Segment) []byte
 38
 39	// LineOffset returns a distance from the line head to current position.
 40	LineOffset() int
 41
 42	// Position returns current line number and position.
 43	Position() (int, Segment)
 44
 45	// SetPosition sets current line number and position.
 46	SetPosition(int, Segment)
 47
 48	// SetPadding sets padding to the reader.
 49	SetPadding(int)
 50
 51	// Advance advances the internal pointer.
 52	Advance(int)
 53
 54	// AdvanceAndSetPadding advances the internal pointer and add padding to the
 55	// reader.
 56	AdvanceAndSetPadding(int, int)
 57
 58	// AdvanceToEOL advances the internal pointer to the end of line.
 59	// If the line ends with a newline, it will be included in the segment.
 60	// If the line ends with EOF, it will not be included in the segment.
 61	AdvanceToEOL()
 62
 63	// AdvanceLine advances the internal pointer to the next line head.
 64	AdvanceLine()
 65
 66	// SkipSpaces skips space characters and returns a non-blank line.
 67	// If it reaches EOF, returns false.
 68	SkipSpaces() (Segment, int, bool)
 69
 70	// SkipSpaces skips blank lines and returns a non-blank line.
 71	// If it reaches EOF, returns false.
 72	SkipBlankLines() (Segment, int, bool)
 73
 74	// Match performs regular expression matching to current line.
 75	Match(reg *regexp.Regexp) bool
 76
 77	// Match performs regular expression searching to current line.
 78	FindSubMatch(reg *regexp.Regexp) [][]byte
 79
 80	// FindClosure finds corresponding closure.
 81	FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
 82}
 83
 84// FindClosureOptions is options for Reader.FindClosure.
 85type FindClosureOptions struct {
 86	// CodeSpan is a flag for the FindClosure. If this is set to true,
 87	// FindClosure ignores closers in codespans.
 88	CodeSpan bool
 89
 90	// Nesting is a flag for the FindClosure. If this is set to true,
 91	// FindClosure allows nesting.
 92	Nesting bool
 93
 94	// Newline is a flag for the FindClosure. If this is set to true,
 95	// FindClosure searches for a closer over multiple lines.
 96	Newline bool
 97
 98	// Advance is a flag for the FindClosure. If this is set to true,
 99	// FindClosure advances pointers when closer is found.
100	Advance bool
101}
102
103type reader struct {
104	source       []byte
105	sourceLength int
106	line         int
107	peekedLine   []byte
108	pos          Segment
109	head         int
110	lineOffset   int
111}
112
113// NewReader return a new Reader that can read UTF-8 bytes .
114func NewReader(source []byte) Reader {
115	r := &reader{
116		source:       source,
117		sourceLength: len(source),
118	}
119	r.ResetPosition()
120	return r
121}
122
123func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
124	return findClosureReader(r, opener, closer, options)
125}
126
127func (r *reader) ResetPosition() {
128	r.line = -1
129	r.head = 0
130	r.lineOffset = -1
131	r.AdvanceLine()
132}
133
134func (r *reader) Source() []byte {
135	return r.source
136}
137
138func (r *reader) Value(seg Segment) []byte {
139	return seg.Value(r.source)
140}
141
142func (r *reader) Peek() byte {
143	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
144		if r.pos.Padding != 0 {
145			return space[0]
146		}
147		return r.source[r.pos.Start]
148	}
149	return EOF
150}
151
152func (r *reader) PeekLine() ([]byte, Segment) {
153	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
154		if r.peekedLine == nil {
155			r.peekedLine = r.pos.Value(r.Source())
156		}
157		return r.peekedLine, r.pos
158	}
159	return nil, r.pos
160}
161
162// io.RuneReader interface.
163func (r *reader) ReadRune() (rune, int, error) {
164	return readRuneReader(r)
165}
166
167func (r *reader) LineOffset() int {
168	if r.lineOffset < 0 {
169		v := 0
170		for i := r.head; i < r.pos.Start; i++ {
171			if r.source[i] == '\t' {
172				v += util.TabWidth(v)
173			} else {
174				v++
175			}
176		}
177		r.lineOffset = v - r.pos.Padding
178	}
179	return r.lineOffset
180}
181
182func (r *reader) PrecendingCharacter() rune {
183	if r.pos.Start <= 0 {
184		if r.pos.Padding != 0 {
185			return rune(' ')
186		}
187		return rune('\n')
188	}
189	i := r.pos.Start - 1
190	for ; i >= 0; i-- {
191		if utf8.RuneStart(r.source[i]) {
192			break
193		}
194	}
195	rn, _ := utf8.DecodeRune(r.source[i:])
196	return rn
197}
198
199func (r *reader) Advance(n int) {
200	r.lineOffset = -1
201	if n < len(r.peekedLine) && r.pos.Padding == 0 {
202		r.pos.Start += n
203		r.peekedLine = nil
204		return
205	}
206	r.peekedLine = nil
207	l := r.sourceLength
208	for ; n > 0 && r.pos.Start < l; n-- {
209		if r.pos.Padding != 0 {
210			r.pos.Padding--
211			continue
212		}
213		if r.source[r.pos.Start] == '\n' {
214			r.AdvanceLine()
215			continue
216		}
217		r.pos.Start++
218	}
219}
220
221func (r *reader) AdvanceAndSetPadding(n, padding int) {
222	r.Advance(n)
223	if padding > r.pos.Padding {
224		r.SetPadding(padding)
225	}
226}
227
228func (r *reader) AdvanceToEOL() {
229	if r.pos.Start >= r.sourceLength {
230		return
231	}
232
233	r.lineOffset = -1
234	i := -1
235	if r.peekedLine != nil {
236		r.pos.Start += len(r.peekedLine) - r.pos.Padding - 1
237		if r.source[r.pos.Start] == '\n' {
238			i = 0
239		}
240	}
241	if i == -1 {
242		i = bytes.IndexByte(r.source[r.pos.Start:], '\n')
243	}
244	r.peekedLine = nil
245	if i != -1 {
246		r.pos.Start += i
247	} else {
248		r.pos.Start = r.sourceLength
249	}
250	r.pos.Padding = 0
251}
252
253func (r *reader) AdvanceLine() {
254	r.lineOffset = -1
255	r.peekedLine = nil
256	r.pos.Start = r.pos.Stop
257	r.head = r.pos.Start
258	if r.pos.Start < 0 || r.pos.Start >= r.sourceLength {
259		return
260	}
261	r.pos.Stop = r.sourceLength
262	i := 0
263	if r.source[r.pos.Start] != '\n' {
264		i = bytes.IndexByte(r.source[r.pos.Start:], '\n')
265	}
266	if i != -1 {
267		r.pos.Stop = r.pos.Start + i + 1
268	}
269	r.line++
270	r.pos.Padding = 0
271}
272
273func (r *reader) Position() (int, Segment) {
274	return r.line, r.pos
275}
276
277func (r *reader) SetPosition(line int, pos Segment) {
278	r.lineOffset = -1
279	r.line = line
280	r.pos = pos
281}
282
283func (r *reader) SetPadding(v int) {
284	r.pos.Padding = v
285}
286
287func (r *reader) SkipSpaces() (Segment, int, bool) {
288	return skipSpacesReader(r)
289}
290
291func (r *reader) SkipBlankLines() (Segment, int, bool) {
292	return skipBlankLinesReader(r)
293}
294
295func (r *reader) Match(reg *regexp.Regexp) bool {
296	return matchReader(r, reg)
297}
298
299func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
300	return findSubMatchReader(r, reg)
301}
302
303// A BlockReader interface is a reader that is optimized for Blocks.
304type BlockReader interface {
305	Reader
306	// Reset resets current state and sets new segments to the reader.
307	Reset(segment *Segments)
308}
309
310type blockReader struct {
311	source         []byte
312	segments       *Segments
313	segmentsLength int
314	line           int
315	pos            Segment
316	head           int
317	last           int
318	lineOffset     int
319}
320
321// NewBlockReader returns a new BlockReader.
322func NewBlockReader(source []byte, segments *Segments) BlockReader {
323	r := &blockReader{
324		source: source,
325	}
326	if segments != nil {
327		r.Reset(segments)
328	}
329	return r
330}
331
332func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
333	return findClosureReader(r, opener, closer, options)
334}
335
336func (r *blockReader) ResetPosition() {
337	r.line = -1
338	r.head = 0
339	r.last = 0
340	r.lineOffset = -1
341	r.pos.Start = -1
342	r.pos.Stop = -1
343	r.pos.Padding = 0
344	if r.segmentsLength > 0 {
345		last := r.segments.At(r.segmentsLength - 1)
346		r.last = last.Stop
347	}
348	r.AdvanceLine()
349}
350
351func (r *blockReader) Reset(segments *Segments) {
352	r.segments = segments
353	r.segmentsLength = segments.Len()
354	r.ResetPosition()
355}
356
357func (r *blockReader) Source() []byte {
358	return r.source
359}
360
361func (r *blockReader) Value(seg Segment) []byte {
362	line := r.segmentsLength - 1
363	ret := make([]byte, 0, seg.Stop-seg.Start+1)
364	for ; line >= 0; line-- {
365		if seg.Start >= r.segments.At(line).Start {
366			break
367		}
368	}
369	i := seg.Start
370	for ; line < r.segmentsLength; line++ {
371		s := r.segments.At(line)
372		if i < 0 {
373			i = s.Start
374		}
375		ret = s.ConcatPadding(ret)
376		for ; i < seg.Stop && i < s.Stop; i++ {
377			ret = append(ret, r.source[i])
378		}
379		i = -1
380		if s.Stop > seg.Stop {
381			break
382		}
383	}
384	return ret
385}
386
387// io.RuneReader interface.
388func (r *blockReader) ReadRune() (rune, int, error) {
389	return readRuneReader(r)
390}
391
392func (r *blockReader) PrecendingCharacter() rune {
393	if r.pos.Padding != 0 {
394		return rune(' ')
395	}
396	if r.segments.Len() < 1 {
397		return rune('\n')
398	}
399	firstSegment := r.segments.At(0)
400	if r.line == 0 && r.pos.Start <= firstSegment.Start {
401		return rune('\n')
402	}
403	l := len(r.source)
404	i := r.pos.Start - 1
405	for ; i < l && i >= 0; i-- {
406		if utf8.RuneStart(r.source[i]) {
407			break
408		}
409	}
410	if i < 0 || i >= l {
411		return rune('\n')
412	}
413	rn, _ := utf8.DecodeRune(r.source[i:])
414	return rn
415}
416
417func (r *blockReader) LineOffset() int {
418	if r.lineOffset < 0 {
419		v := 0
420		for i := r.head; i < r.pos.Start; i++ {
421			if r.source[i] == '\t' {
422				v += util.TabWidth(v)
423			} else {
424				v++
425			}
426		}
427		r.lineOffset = v - r.pos.Padding
428	}
429	return r.lineOffset
430}
431
432func (r *blockReader) Peek() byte {
433	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
434		if r.pos.Padding != 0 {
435			return space[0]
436		}
437		return r.source[r.pos.Start]
438	}
439	return EOF
440}
441
442func (r *blockReader) PeekLine() ([]byte, Segment) {
443	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
444		return r.pos.Value(r.source), r.pos
445	}
446	return nil, r.pos
447}
448
449func (r *blockReader) Advance(n int) {
450	r.lineOffset = -1
451
452	if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
453		r.pos.Start += n
454		return
455	}
456
457	for ; n > 0; n-- {
458		if r.pos.Padding != 0 {
459			r.pos.Padding--
460			continue
461		}
462		if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
463			r.AdvanceLine()
464			continue
465		}
466		r.pos.Start++
467	}
468}
469
470func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
471	r.Advance(n)
472	if padding > r.pos.Padding {
473		r.SetPadding(padding)
474	}
475}
476
477func (r *blockReader) AdvanceToEOL() {
478	r.lineOffset = -1
479	r.pos.Padding = 0
480	c := r.source[r.pos.Stop-1]
481	if c == '\n' {
482		r.pos.Start = r.pos.Stop - 1
483	} else {
484		r.pos.Start = r.pos.Stop
485	}
486}
487
488func (r *blockReader) AdvanceLine() {
489	r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
490	r.head = r.pos.Start
491}
492
493func (r *blockReader) Position() (int, Segment) {
494	return r.line, r.pos
495}
496
497func (r *blockReader) SetPosition(line int, pos Segment) {
498	r.lineOffset = -1
499	r.line = line
500	if pos.Start == invalidValue {
501		if r.line < r.segmentsLength {
502			s := r.segments.At(line)
503			r.head = s.Start
504			r.pos = s
505		}
506	} else {
507		r.pos = pos
508		if r.line < r.segmentsLength {
509			s := r.segments.At(line)
510			r.head = s.Start
511		}
512	}
513}
514
515func (r *blockReader) SetPadding(v int) {
516	r.lineOffset = -1
517	r.pos.Padding = v
518}
519
520func (r *blockReader) SkipSpaces() (Segment, int, bool) {
521	return skipSpacesReader(r)
522}
523
524func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
525	return skipBlankLinesReader(r)
526}
527
528func (r *blockReader) Match(reg *regexp.Regexp) bool {
529	return matchReader(r, reg)
530}
531
532func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
533	return findSubMatchReader(r, reg)
534}
535
536func skipBlankLinesReader(r Reader) (Segment, int, bool) {
537	lines := 0
538	for {
539		line, seg := r.PeekLine()
540		if line == nil {
541			return seg, lines, false
542		}
543		if util.IsBlank(line) {
544			lines++
545			r.AdvanceLine()
546		} else {
547			return seg, lines, true
548		}
549	}
550}
551
552func skipSpacesReader(r Reader) (Segment, int, bool) {
553	chars := 0
554	for {
555		line, segment := r.PeekLine()
556		if line == nil {
557			return segment, chars, false
558		}
559		for i, c := range line {
560			if util.IsSpace(c) {
561				chars++
562				r.Advance(1)
563				continue
564			}
565			return segment.WithStart(segment.Start + i + 1), chars, true
566		}
567	}
568}
569
570func matchReader(r Reader, reg *regexp.Regexp) bool {
571	oldline, oldseg := r.Position()
572	match := reg.FindReaderSubmatchIndex(r)
573	r.SetPosition(oldline, oldseg)
574	if match == nil {
575		return false
576	}
577	r.Advance(match[1] - match[0])
578	return true
579}
580
581func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
582	oldLine, oldSeg := r.Position()
583	match := reg.FindReaderSubmatchIndex(r)
584	r.SetPosition(oldLine, oldSeg)
585	if match == nil {
586		return nil
587	}
588	var bb bytes.Buffer
589	bb.Grow(match[1] - match[0])
590	for i := 0; i < match[1]; {
591		r, size, _ := readRuneReader(r)
592		i += size
593		bb.WriteRune(r)
594	}
595	bs := bb.Bytes()
596	var result [][]byte
597	for i := 0; i < len(match); i += 2 {
598		if match[i] < 0 {
599			result = append(result, []byte{})
600			continue
601		}
602		result = append(result, bs[match[i]:match[i+1]])
603	}
604
605	r.SetPosition(oldLine, oldSeg)
606	r.Advance(match[1] - match[0])
607	return result
608}
609
610func readRuneReader(r Reader) (rune, int, error) {
611	line, _ := r.PeekLine()
612	if line == nil {
613		return 0, 0, io.EOF
614	}
615	rn, size := utf8.DecodeRune(line)
616	if rn == utf8.RuneError {
617		return 0, 0, io.EOF
618	}
619	r.Advance(size)
620	return rn, size, nil
621}
622
623func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
624	opened := 1
625	codeSpanOpener := 0
626	closed := false
627	orgline, orgpos := r.Position()
628	var ret *Segments
629
630	for {
631		bs, seg := r.PeekLine()
632		if bs == nil {
633			goto end
634		}
635		i := 0
636		for i < len(bs) {
637			c := bs[i]
638			if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
639				codeSpanCloser := 0
640				for ; i < len(bs); i++ {
641					if bs[i] == '`' {
642						codeSpanCloser++
643					} else {
644						i--
645						break
646					}
647				}
648				if codeSpanCloser == codeSpanOpener {
649					codeSpanOpener = 0
650				}
651			} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
652				i += 2
653				continue
654			} else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
655				for ; i < len(bs); i++ {
656					if bs[i] == '`' {
657						codeSpanOpener++
658					} else {
659						i--
660						break
661					}
662				}
663			} else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
664				if c == closer {
665					opened--
666					if opened == 0 {
667						if ret == nil {
668							ret = NewSegments()
669						}
670						ret.Append(seg.WithStop(seg.Start + i))
671						r.Advance(i + 1)
672						closed = true
673						goto end
674					}
675				} else if c == opener {
676					if !opts.Nesting {
677						goto end
678					}
679					opened++
680				}
681			}
682			i++
683		}
684		if !opts.Newline {
685			goto end
686		}
687		r.AdvanceLine()
688		if ret == nil {
689			ret = NewSegments()
690		}
691		ret.Append(seg)
692	}
693end:
694	if !opts.Advance {
695		r.SetPosition(orgline, orgpos)
696	}
697	if closed {
698		return ret, true
699	}
700	return nil, false
701}