1package extension
  2
  3import (
  4	"unicode"
  5
  6	"github.com/yuin/goldmark"
  7	gast "github.com/yuin/goldmark/ast"
  8	"github.com/yuin/goldmark/parser"
  9	"github.com/yuin/goldmark/text"
 10	"github.com/yuin/goldmark/util"
 11)
 12
 13var uncloseCounterKey = parser.NewContextKey()
 14
 15type unclosedCounter struct {
 16	Single int
 17	Double int
 18}
 19
 20func (u *unclosedCounter) Reset() {
 21	u.Single = 0
 22	u.Double = 0
 23}
 24
 25func getUnclosedCounter(pc parser.Context) *unclosedCounter {
 26	v := pc.Get(uncloseCounterKey)
 27	if v == nil {
 28		v = &unclosedCounter{}
 29		pc.Set(uncloseCounterKey, v)
 30	}
 31	return v.(*unclosedCounter)
 32}
 33
 34// TypographicPunctuation is a key of the punctuations that can be replaced with
 35// typographic entities.
 36type TypographicPunctuation int
 37
 38const (
 39	// LeftSingleQuote is '
 40	LeftSingleQuote TypographicPunctuation = iota + 1
 41	// RightSingleQuote is '
 42	RightSingleQuote
 43	// LeftDoubleQuote is "
 44	LeftDoubleQuote
 45	// RightDoubleQuote is "
 46	RightDoubleQuote
 47	// EnDash is --
 48	EnDash
 49	// EmDash is ---
 50	EmDash
 51	// Ellipsis is ...
 52	Ellipsis
 53	// LeftAngleQuote is <<
 54	LeftAngleQuote
 55	// RightAngleQuote is >>
 56	RightAngleQuote
 57	// Apostrophe is '
 58	Apostrophe
 59
 60	typographicPunctuationMax
 61)
 62
 63// An TypographerConfig struct is a data structure that holds configuration of the
 64// Typographer extension.
 65type TypographerConfig struct {
 66	Substitutions [][]byte
 67}
 68
 69func newDefaultSubstitutions() [][]byte {
 70	replacements := make([][]byte, typographicPunctuationMax)
 71	replacements[LeftSingleQuote] = []byte("&lsquo;")
 72	replacements[RightSingleQuote] = []byte("&rsquo;")
 73	replacements[LeftDoubleQuote] = []byte("&ldquo;")
 74	replacements[RightDoubleQuote] = []byte("&rdquo;")
 75	replacements[EnDash] = []byte("&ndash;")
 76	replacements[EmDash] = []byte("&mdash;")
 77	replacements[Ellipsis] = []byte("&hellip;")
 78	replacements[LeftAngleQuote] = []byte("&laquo;")
 79	replacements[RightAngleQuote] = []byte("&raquo;")
 80	replacements[Apostrophe] = []byte("&rsquo;")
 81
 82	return replacements
 83}
 84
 85// SetOption implements SetOptioner.
 86func (b *TypographerConfig) SetOption(name parser.OptionName, value interface{}) {
 87	switch name {
 88	case optTypographicSubstitutions:
 89		b.Substitutions = value.([][]byte)
 90	}
 91}
 92
 93// A TypographerOption interface sets options for the TypographerParser.
 94type TypographerOption interface {
 95	parser.Option
 96	SetTypographerOption(*TypographerConfig)
 97}
 98
 99const optTypographicSubstitutions parser.OptionName = "TypographicSubstitutions"
100
101// TypographicSubstitutions is a list of the substitutions for the Typographer extension.
102type TypographicSubstitutions map[TypographicPunctuation][]byte
103
104type withTypographicSubstitutions struct {
105	value [][]byte
106}
107
108func (o *withTypographicSubstitutions) SetParserOption(c *parser.Config) {
109	c.Options[optTypographicSubstitutions] = o.value
110}
111
112func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig) {
113	p.Substitutions = o.value
114}
115
116// WithTypographicSubstitutions is a functional otpion that specify replacement text
117// for punctuations.
118func WithTypographicSubstitutions(values map[TypographicPunctuation][]byte) TypographerOption {
119	replacements := newDefaultSubstitutions()
120	for k, v := range values {
121		replacements[k] = v
122	}
123
124	return &withTypographicSubstitutions{replacements}
125}
126
127type typographerDelimiterProcessor struct {
128}
129
130func (p *typographerDelimiterProcessor) IsDelimiter(b byte) bool {
131	return b == '\'' || b == '"'
132}
133
134func (p *typographerDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool {
135	return opener.Char == closer.Char
136}
137
138func (p *typographerDelimiterProcessor) OnMatch(consumes int) gast.Node {
139	return nil
140}
141
142var defaultTypographerDelimiterProcessor = &typographerDelimiterProcessor{}
143
144type typographerParser struct {
145	TypographerConfig
146}
147
148// NewTypographerParser return a new InlineParser that parses
149// typographer expressions.
150func NewTypographerParser(opts ...TypographerOption) parser.InlineParser {
151	p := &typographerParser{
152		TypographerConfig: TypographerConfig{
153			Substitutions: newDefaultSubstitutions(),
154		},
155	}
156	for _, o := range opts {
157		o.SetTypographerOption(&p.TypographerConfig)
158	}
159	return p
160}
161
162func (s *typographerParser) Trigger() []byte {
163	return []byte{'\'', '"', '-', '.', ',', '<', '>', '*', '['}
164}
165
166func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
167	line, _ := block.PeekLine()
168	c := line[0]
169	if len(line) > 2 {
170		if c == '-' {
171			if s.Substitutions[EmDash] != nil && line[1] == '-' && line[2] == '-' { // ---
172				node := gast.NewString(s.Substitutions[EmDash])
173				node.SetCode(true)
174				block.Advance(3)
175				return node
176			}
177		} else if c == '.' {
178			if s.Substitutions[Ellipsis] != nil && line[1] == '.' && line[2] == '.' { // ...
179				node := gast.NewString(s.Substitutions[Ellipsis])
180				node.SetCode(true)
181				block.Advance(3)
182				return node
183			}
184			return nil
185		}
186	}
187	if len(line) > 1 {
188		if c == '<' {
189			if s.Substitutions[LeftAngleQuote] != nil && line[1] == '<' { // <<
190				node := gast.NewString(s.Substitutions[LeftAngleQuote])
191				node.SetCode(true)
192				block.Advance(2)
193				return node
194			}
195			return nil
196		} else if c == '>' {
197			if s.Substitutions[RightAngleQuote] != nil && line[1] == '>' { // >>
198				node := gast.NewString(s.Substitutions[RightAngleQuote])
199				node.SetCode(true)
200				block.Advance(2)
201				return node
202			}
203			return nil
204		} else if s.Substitutions[EnDash] != nil && c == '-' && line[1] == '-' { // --
205			node := gast.NewString(s.Substitutions[EnDash])
206			node.SetCode(true)
207			block.Advance(2)
208			return node
209		}
210	}
211	if c == '\'' || c == '"' {
212		before := block.PrecendingCharacter()
213		d := parser.ScanDelimiter(line, before, 1, defaultTypographerDelimiterProcessor)
214		if d == nil {
215			return nil
216		}
217		counter := getUnclosedCounter(pc)
218		if c == '\'' {
219			if s.Substitutions[Apostrophe] != nil {
220				// Handle decade abbrevations such as '90s
221				if d.CanOpen && !d.CanClose && len(line) > 3 && util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' {
222					after := rune(' ')
223					if len(line) > 4 {
224						after = util.ToRune(line, 4)
225					}
226					if len(line) == 3 || util.IsSpaceRune(after) || util.IsPunctRune(after) {
227						node := gast.NewString(s.Substitutions[Apostrophe])
228						node.SetCode(true)
229						block.Advance(1)
230						return node
231					}
232				}
233				// special cases: 'twas, 'em, 'net
234				if len(line) > 1 && (unicode.IsPunct(before) || unicode.IsSpace(before)) && (line[1] == 't' || line[1] == 'e' || line[1] == 'n' || line[1] == 'l') {
235					node := gast.NewString(s.Substitutions[Apostrophe])
236					node.SetCode(true)
237					block.Advance(1)
238					return node
239				}
240				// Convert normal apostrophes. This is probably more flexible than necessary but
241				// converts any apostrophe in between two alphanumerics.
242				if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) && (unicode.IsLetter(util.ToRune(line, 1))) {
243					node := gast.NewString(s.Substitutions[Apostrophe])
244					node.SetCode(true)
245					block.Advance(1)
246					return node
247				}
248			}
249			if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose {
250				nt := LeftSingleQuote
251				// special cases: Alice's, I'm, Don't, You'd
252				if len(line) > 1 && (line[1] == 's' || line[1] == 'm' || line[1] == 't' || line[1] == 'd') && (len(line) < 3 || util.IsPunct(line[2]) || util.IsSpace(line[2])) {
253					nt = RightSingleQuote
254				}
255				// special cases: I've, I'll, You're
256				if len(line) > 2 && ((line[1] == 'v' && line[2] == 'e') || (line[1] == 'l' && line[2] == 'l') || (line[1] == 'r' && line[2] == 'e')) && (len(line) < 4 || util.IsPunct(line[3]) || util.IsSpace(line[3])) {
257					nt = RightSingleQuote
258				}
259				if nt == LeftSingleQuote {
260					counter.Single++
261				}
262
263				node := gast.NewString(s.Substitutions[nt])
264				node.SetCode(true)
265				block.Advance(1)
266				return node
267			}
268			if s.Substitutions[RightSingleQuote] != nil {
269				// plural possesives and abbreviations: Smiths', doin'
270				if len(line) > 1 && unicode.IsSpace(util.ToRune(line, 0)) || unicode.IsPunct(util.ToRune(line, 0)) && (len(line) > 2 && !unicode.IsDigit(util.ToRune(line, 1))) {
271					node := gast.NewString(s.Substitutions[RightSingleQuote])
272					node.SetCode(true)
273					block.Advance(1)
274					return node
275				}
276			}
277			if s.Substitutions[RightSingleQuote] != nil && counter.Single > 0 {
278				isClose := d.CanClose && !d.CanOpen
279				maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && unicode.IsPunct(util.ToRune(line, 1)) && (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
280				if isClose || maybeClose {
281					node := gast.NewString(s.Substitutions[RightSingleQuote])
282					node.SetCode(true)
283					block.Advance(1)
284					counter.Single--
285					return node
286				}
287			}
288		}
289		if c == '"' {
290			if s.Substitutions[LeftDoubleQuote] != nil && d.CanOpen && !d.CanClose {
291				node := gast.NewString(s.Substitutions[LeftDoubleQuote])
292				node.SetCode(true)
293				block.Advance(1)
294				counter.Double++
295				return node
296			}
297			if s.Substitutions[RightDoubleQuote] != nil && counter.Double > 0 {
298				isClose := d.CanClose && !d.CanOpen
299				maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && (unicode.IsPunct(util.ToRune(line, 1))) && (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
300				if isClose || maybeClose {
301					// special case: "Monitor 21""
302					if len(line) > 1 && line[1] == '"' && unicode.IsDigit(before) {
303						return nil
304					}
305					node := gast.NewString(s.Substitutions[RightDoubleQuote])
306					node.SetCode(true)
307					block.Advance(1)
308					counter.Double--
309					return node
310				}
311			}
312		}
313	}
314	return nil
315}
316
317func (s *typographerParser) CloseBlock(parent gast.Node, pc parser.Context) {
318	getUnclosedCounter(pc).Reset()
319}
320
321type typographer struct {
322	options []TypographerOption
323}
324
325// Typographer is an extension that replaces punctuations with typographic entities.
326var Typographer = &typographer{}
327
328// NewTypographer returns a new Extender that replaces punctuations with typographic entities.
329func NewTypographer(opts ...TypographerOption) goldmark.Extender {
330	return &typographer{
331		options: opts,
332	}
333}
334
335func (e *typographer) Extend(m goldmark.Markdown) {
336	m.Parser().AddOptions(parser.WithInlineParsers(
337		util.Prioritized(NewTypographerParser(e.options...), 9999),
338	))
339}