1package extension
  2
  3import (
  4	"bytes"
  5	"regexp"
  6
  7	"github.com/yuin/goldmark"
  8	"github.com/yuin/goldmark/ast"
  9	"github.com/yuin/goldmark/parser"
 10	"github.com/yuin/goldmark/text"
 11	"github.com/yuin/goldmark/util"
 12)
 13
 14var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?:[/#?][-a-zA-Z0-9@:%_\+.~#!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
 15
 16var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp)://[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?::\d+)?(?:[/#?][-a-zA-Z0-9@:%_+.~#$!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
 17
 18// An LinkifyConfig struct is a data structure that holds configuration of the
 19// Linkify extension.
 20type LinkifyConfig struct {
 21	AllowedProtocols [][]byte
 22	URLRegexp        *regexp.Regexp
 23	WWWRegexp        *regexp.Regexp
 24	EmailRegexp      *regexp.Regexp
 25}
 26
 27const (
 28	optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols"
 29	optLinkifyURLRegexp        parser.OptionName = "LinkifyURLRegexp"
 30	optLinkifyWWWRegexp        parser.OptionName = "LinkifyWWWRegexp"
 31	optLinkifyEmailRegexp      parser.OptionName = "LinkifyEmailRegexp"
 32)
 33
 34// SetOption implements SetOptioner.
 35func (c *LinkifyConfig) SetOption(name parser.OptionName, value interface{}) {
 36	switch name {
 37	case optLinkifyAllowedProtocols:
 38		c.AllowedProtocols = value.([][]byte)
 39	case optLinkifyURLRegexp:
 40		c.URLRegexp = value.(*regexp.Regexp)
 41	case optLinkifyWWWRegexp:
 42		c.WWWRegexp = value.(*regexp.Regexp)
 43	case optLinkifyEmailRegexp:
 44		c.EmailRegexp = value.(*regexp.Regexp)
 45	}
 46}
 47
 48// A LinkifyOption interface sets options for the LinkifyOption.
 49type LinkifyOption interface {
 50	parser.Option
 51	SetLinkifyOption(*LinkifyConfig)
 52}
 53
 54type withLinkifyAllowedProtocols struct {
 55	value [][]byte
 56}
 57
 58func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) {
 59	c.Options[optLinkifyAllowedProtocols] = o.value
 60}
 61
 62func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) {
 63	p.AllowedProtocols = o.value
 64}
 65
 66// WithLinkifyAllowedProtocols is a functional option that specify allowed
 67// protocols in autolinks. Each protocol must end with ':' like
 68// 'http:' .
 69func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption {
 70	return &withLinkifyAllowedProtocols{
 71		value: value,
 72	}
 73}
 74
 75type withLinkifyURLRegexp struct {
 76	value *regexp.Regexp
 77}
 78
 79func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) {
 80	c.Options[optLinkifyURLRegexp] = o.value
 81}
 82
 83func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) {
 84	p.URLRegexp = o.value
 85}
 86
 87// WithLinkifyURLRegexp is a functional option that specify
 88// a pattern of the URL including a protocol.
 89func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption {
 90	return &withLinkifyURLRegexp{
 91		value: value,
 92	}
 93}
 94
 95// WithLinkifyWWWRegexp is a functional option that specify
 96// a pattern of the URL without a protocol.
 97// This pattern must start with 'www.' .
 98type withLinkifyWWWRegexp struct {
 99	value *regexp.Regexp
100}
101
102func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) {
103	c.Options[optLinkifyWWWRegexp] = o.value
104}
105
106func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) {
107	p.WWWRegexp = o.value
108}
109
110func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption {
111	return &withLinkifyWWWRegexp{
112		value: value,
113	}
114}
115
116// WithLinkifyWWWRegexp is a functional otpion that specify
117// a pattern of the email address.
118type withLinkifyEmailRegexp struct {
119	value *regexp.Regexp
120}
121
122func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) {
123	c.Options[optLinkifyEmailRegexp] = o.value
124}
125
126func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) {
127	p.EmailRegexp = o.value
128}
129
130func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption {
131	return &withLinkifyEmailRegexp{
132		value: value,
133	}
134}
135
136type linkifyParser struct {
137	LinkifyConfig
138}
139
140// NewLinkifyParser return a new InlineParser can parse
141// text that seems like a URL.
142func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser {
143	p := &linkifyParser{
144		LinkifyConfig: LinkifyConfig{
145			AllowedProtocols: nil,
146			URLRegexp:        urlRegexp,
147			WWWRegexp:        wwwURLRegxp,
148		},
149	}
150	for _, o := range opts {
151		o.SetLinkifyOption(&p.LinkifyConfig)
152	}
153	return p
154}
155
156func (s *linkifyParser) Trigger() []byte {
157	// ' ' indicates any white spaces and a line head
158	return []byte{' ', '*', '_', '~', '('}
159}
160
161var (
162	protoHTTP  = []byte("http:")
163	protoHTTPS = []byte("https:")
164	protoFTP   = []byte("ftp:")
165	domainWWW  = []byte("www.")
166)
167
168func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
169	if pc.IsInLinkLabel() {
170		return nil
171	}
172	line, segment := block.PeekLine()
173	consumes := 0
174	start := segment.Start
175	c := line[0]
176	// advance if current position is not a line head.
177	if c == ' ' || c == '*' || c == '_' || c == '~' || c == '(' {
178		consumes++
179		start++
180		line = line[1:]
181	}
182
183	var m []int
184	var protocol []byte
185	var typ ast.AutoLinkType = ast.AutoLinkURL
186	if s.LinkifyConfig.AllowedProtocols == nil {
187		if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
188			m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
189		}
190	} else {
191		for _, prefix := range s.LinkifyConfig.AllowedProtocols {
192			if bytes.HasPrefix(line, prefix) {
193				m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
194				break
195			}
196		}
197	}
198	if m == nil && bytes.HasPrefix(line, domainWWW) {
199		m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line)
200		protocol = []byte("http")
201	}
202	if m != nil && m[0] != 0 {
203		m = nil
204	}
205	if m != nil && m[0] == 0 {
206		lastChar := line[m[1]-1]
207		if lastChar == '.' {
208			m[1]--
209		} else if lastChar == ')' {
210			closing := 0
211			for i := m[1] - 1; i >= m[0]; i-- {
212				if line[i] == ')' {
213					closing++
214				} else if line[i] == '(' {
215					closing--
216				}
217			}
218			if closing > 0 {
219				m[1] -= closing
220			}
221		} else if lastChar == ';' {
222			i := m[1] - 2
223			for ; i >= m[0]; i-- {
224				if util.IsAlphaNumeric(line[i]) {
225					continue
226				}
227				break
228			}
229			if i != m[1]-2 {
230				if line[i] == '&' {
231					m[1] -= m[1] - i
232				}
233			}
234		}
235	}
236	if m == nil {
237		if len(line) > 0 && util.IsPunct(line[0]) {
238			return nil
239		}
240		typ = ast.AutoLinkEmail
241		stop := -1
242		if s.LinkifyConfig.EmailRegexp == nil {
243			stop = util.FindEmailIndex(line)
244		} else {
245			m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line)
246			if m != nil && m[0] == 0 {
247				stop = m[1]
248			}
249		}
250		if stop < 0 {
251			return nil
252		}
253		at := bytes.IndexByte(line, '@')
254		m = []int{0, stop, at, stop - 1}
255		if m == nil || bytes.IndexByte(line[m[2]:m[3]], '.') < 0 {
256			return nil
257		}
258		lastChar := line[m[1]-1]
259		if lastChar == '.' {
260			m[1]--
261		}
262		if m[1] < len(line) {
263			nextChar := line[m[1]]
264			if nextChar == '-' || nextChar == '_' {
265				return nil
266			}
267		}
268	}
269	if m == nil {
270		return nil
271	}
272	if consumes != 0 {
273		s := segment.WithStop(segment.Start + 1)
274		ast.MergeOrAppendTextSegment(parent, s)
275	}
276	i := m[1] - 1
277	for ; i > 0; i-- {
278		c := line[i]
279		switch c {
280		case '?', '!', '.', ',', ':', '*', '_', '~':
281		default:
282			goto endfor
283		}
284	}
285endfor:
286	i++
287	consumes += i
288	block.Advance(consumes)
289	n := ast.NewTextSegment(text.NewSegment(start, start+i))
290	link := ast.NewAutoLink(typ, n)
291	link.Protocol = protocol
292	return link
293}
294
295func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) {
296	// nothing to do
297}
298
299type linkify struct {
300	options []LinkifyOption
301}
302
303// Linkify is an extension that allow you to parse text that seems like a URL.
304var Linkify = &linkify{}
305
306func NewLinkify(opts ...LinkifyOption) goldmark.Extender {
307	return &linkify{
308		options: opts,
309	}
310}
311
312func (e *linkify) Extend(m goldmark.Markdown) {
313	m.Parser().AddOptions(
314		parser.WithInlineParsers(
315			util.Prioritized(NewLinkifyParser(e.options...), 999),
316		),
317	)
318}