1package extension
  2
  3import (
  4	"bytes"
  5	"regexp"
  6
  7	"github.com/yuin/goldmark"
  8	"github.com/yuin/goldmark/ast"
  9	"github.com/yuin/goldmark/parser"
 10	"github.com/yuin/goldmark/text"
 11	"github.com/yuin/goldmark/util"
 12)
 13
 14var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?:[/#?][-a-zA-Z0-9@:%_\+.~#!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`) //nolint:golint,lll
 15
 16var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp)://[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?::\d+)?(?:[/#?][-a-zA-Z0-9@:%_+.~#$!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`) //nolint:golint,lll
 17
 18// An LinkifyConfig struct is a data structure that holds configuration of the
 19// Linkify extension.
 20type LinkifyConfig struct {
 21	AllowedProtocols [][]byte
 22	URLRegexp        *regexp.Regexp
 23	WWWRegexp        *regexp.Regexp
 24	EmailRegexp      *regexp.Regexp
 25}
 26
 27const (
 28	optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols"
 29	optLinkifyURLRegexp        parser.OptionName = "LinkifyURLRegexp"
 30	optLinkifyWWWRegexp        parser.OptionName = "LinkifyWWWRegexp"
 31	optLinkifyEmailRegexp      parser.OptionName = "LinkifyEmailRegexp"
 32)
 33
 34// SetOption implements SetOptioner.
 35func (c *LinkifyConfig) SetOption(name parser.OptionName, value any) {
 36	switch name {
 37	case optLinkifyAllowedProtocols:
 38		c.AllowedProtocols = value.([][]byte)
 39	case optLinkifyURLRegexp:
 40		c.URLRegexp = value.(*regexp.Regexp)
 41	case optLinkifyWWWRegexp:
 42		c.WWWRegexp = value.(*regexp.Regexp)
 43	case optLinkifyEmailRegexp:
 44		c.EmailRegexp = value.(*regexp.Regexp)
 45	}
 46}
 47
 48// A LinkifyOption interface sets options for the LinkifyOption.
 49type LinkifyOption interface {
 50	parser.Option
 51	SetLinkifyOption(*LinkifyConfig)
 52}
 53
 54type withLinkifyAllowedProtocols struct {
 55	value [][]byte
 56}
 57
 58func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) {
 59	c.Options[optLinkifyAllowedProtocols] = o.value
 60}
 61
 62func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) {
 63	p.AllowedProtocols = o.value
 64}
 65
 66// WithLinkifyAllowedProtocols is a functional option that specify allowed
 67// protocols in autolinks. Each protocol must end with ':' like
 68// 'http:' .
 69func WithLinkifyAllowedProtocols[T []byte | string](value []T) LinkifyOption {
 70	opt := &withLinkifyAllowedProtocols{}
 71	for _, v := range value {
 72		opt.value = append(opt.value, []byte(v))
 73	}
 74	return opt
 75}
 76
 77type withLinkifyURLRegexp struct {
 78	value *regexp.Regexp
 79}
 80
 81func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) {
 82	c.Options[optLinkifyURLRegexp] = o.value
 83}
 84
 85func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) {
 86	p.URLRegexp = o.value
 87}
 88
 89// WithLinkifyURLRegexp is a functional option that specify
 90// a pattern of the URL including a protocol.
 91func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption {
 92	return &withLinkifyURLRegexp{
 93		value: value,
 94	}
 95}
 96
 97type withLinkifyWWWRegexp struct {
 98	value *regexp.Regexp
 99}
100
101func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) {
102	c.Options[optLinkifyWWWRegexp] = o.value
103}
104
105func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) {
106	p.WWWRegexp = o.value
107}
108
109// WithLinkifyWWWRegexp is a functional option that specify
110// a pattern of the URL without a protocol.
111// This pattern must start with 'www.' .
112func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption {
113	return &withLinkifyWWWRegexp{
114		value: value,
115	}
116}
117
118type withLinkifyEmailRegexp struct {
119	value *regexp.Regexp
120}
121
122func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) {
123	c.Options[optLinkifyEmailRegexp] = o.value
124}
125
126func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) {
127	p.EmailRegexp = o.value
128}
129
130// WithLinkifyEmailRegexp is a functional otpion that specify
131// a pattern of the email address.
132func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption {
133	return &withLinkifyEmailRegexp{
134		value: value,
135	}
136}
137
138type linkifyParser struct {
139	LinkifyConfig
140}
141
142// NewLinkifyParser return a new InlineParser can parse
143// text that seems like a URL.
144func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser {
145	p := &linkifyParser{
146		LinkifyConfig: LinkifyConfig{
147			AllowedProtocols: nil,
148			URLRegexp:        urlRegexp,
149			WWWRegexp:        wwwURLRegxp,
150		},
151	}
152	for _, o := range opts {
153		o.SetLinkifyOption(&p.LinkifyConfig)
154	}
155	return p
156}
157
158func (s *linkifyParser) Trigger() []byte {
159	// ' ' indicates any white spaces and a line head
160	return []byte{' ', '*', '_', '~', '('}
161}
162
163var (
164	protoHTTP  = []byte("http:")
165	protoHTTPS = []byte("https:")
166	protoFTP   = []byte("ftp:")
167	domainWWW  = []byte("www.")
168)
169
170func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
171	if pc.IsInLinkLabel() {
172		return nil
173	}
174	line, segment := block.PeekLine()
175	consumes := 0
176	start := segment.Start
177	c := line[0]
178	// advance if current position is not a line head.
179	if c == ' ' || c == '*' || c == '_' || c == '~' || c == '(' {
180		consumes++
181		start++
182		line = line[1:]
183	}
184
185	var m []int
186	var protocol []byte
187	var typ ast.AutoLinkType = ast.AutoLinkURL
188	if s.LinkifyConfig.AllowedProtocols == nil {
189		if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
190			m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
191		}
192	} else {
193		for _, prefix := range s.LinkifyConfig.AllowedProtocols {
194			if bytes.HasPrefix(line, prefix) {
195				m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
196				break
197			}
198		}
199	}
200	if m == nil && bytes.HasPrefix(line, domainWWW) {
201		m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line)
202		protocol = []byte("http")
203	}
204	if m != nil && m[0] != 0 {
205		m = nil
206	}
207	if m != nil && m[0] == 0 {
208		lastChar := line[m[1]-1]
209		if lastChar == '.' {
210			m[1]--
211		} else if lastChar == ')' {
212			closing := 0
213			for i := m[1] - 1; i >= m[0]; i-- {
214				switch line[i] {
215				case ')':
216					closing++
217				case '(':
218					closing--
219				}
220			}
221			if closing > 0 {
222				m[1] -= closing
223			}
224		} else if lastChar == ';' {
225			i := m[1] - 2
226			for ; i >= m[0]; i-- {
227				if util.IsAlphaNumeric(line[i]) {
228					continue
229				}
230				break
231			}
232			if i != m[1]-2 {
233				if line[i] == '&' {
234					m[1] -= m[1] - i
235				}
236			}
237		}
238	}
239	if m == nil {
240		if len(line) > 0 && util.IsPunct(line[0]) {
241			return nil
242		}
243		typ = ast.AutoLinkEmail
244		stop := -1
245		if s.LinkifyConfig.EmailRegexp == nil {
246			stop = util.FindEmailIndex(line)
247		} else {
248			m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line)
249			if m != nil && m[0] == 0 {
250				stop = m[1]
251			}
252		}
253		if stop < 0 {
254			return nil
255		}
256		at := bytes.IndexByte(line, '@')
257		m = []int{0, stop, at, stop - 1}
258		if bytes.IndexByte(line[m[2]:m[3]], '.') < 0 {
259			return nil
260		}
261		lastChar := line[m[1]-1]
262		if lastChar == '.' {
263			m[1]--
264		}
265		if m[1] < len(line) {
266			nextChar := line[m[1]]
267			if nextChar == '-' || nextChar == '_' {
268				return nil
269			}
270		}
271	}
272	if m == nil {
273		return nil
274	}
275	if consumes != 0 {
276		s := segment.WithStop(segment.Start + 1)
277		ast.MergeOrAppendTextSegment(parent, s)
278	}
279	i := m[1] - 1
280	for ; i > 0; i-- {
281		c := line[i]
282		switch c {
283		case '?', '!', '.', ',', ':', '*', '_', '~':
284		default:
285			goto endfor
286		}
287	}
288endfor:
289	i++
290	consumes += i
291	block.Advance(consumes)
292	n := ast.NewTextSegment(text.NewSegment(start, start+i))
293	link := ast.NewAutoLink(typ, n)
294	link.Protocol = protocol
295	return link
296}
297
298func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) {
299	// nothing to do
300}
301
302type linkify struct {
303	options []LinkifyOption
304}
305
306// Linkify is an extension that allow you to parse text that seems like a URL.
307var Linkify = &linkify{}
308
309// NewLinkify creates a new [goldmark.Extender] that
310// allow you to parse text that seems like a URL.
311func NewLinkify(opts ...LinkifyOption) goldmark.Extender {
312	return &linkify{
313		options: opts,
314	}
315}
316
317func (e *linkify) Extend(m goldmark.Markdown) {
318	m.Parser().AddOptions(
319		parser.WithInlineParsers(
320			util.Prioritized(NewLinkifyParser(e.options...), 999),
321		),
322	)
323}