1// Copyright 2013 by Dobrosław Żybort. All rights reserved.
  2// This Source Code Form is subject to the terms of the Mozilla Public
  3// License, v. 2.0. If a copy of the MPL was not distributed with this
  4// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  5
  6package slug
  7
  8import (
  9	"bytes"
 10	"regexp"
 11	"sort"
 12	"strings"
 13
 14	"github.com/gosimple/unidecode"
 15)
 16
 17var (
 18	// CustomSub stores custom substitution map
 19	CustomSub map[string]string
 20	// CustomRuneSub stores custom rune substitution map
 21	CustomRuneSub map[rune]string
 22
 23	// MaxLength stores maximum slug length.
 24	// By default slugs aren't shortened.
 25	// If MaxLength is smaller than length of the first word, then returned
 26	// slug will contain only substring from the first word truncated
 27	// after MaxLength.
 28	MaxLength int
 29
 30	// EnableSmartTruncate defines if cutting with MaxLength is smart.
 31	// Smart algorithm will cat slug after full word.
 32	// Default is true.
 33	EnableSmartTruncate = true
 34
 35	// Lowercase defines if the resulting slug is transformed to lowercase.
 36	// Default is true.
 37	Lowercase = true
 38
 39	regexpNonAuthorizedChars = regexp.MustCompile("[^a-zA-Z0-9-_]")
 40	regexpMultipleDashes     = regexp.MustCompile("-+")
 41)
 42
 43//=============================================================================
 44
 45// Make returns slug generated from provided string. Will use "en" as language
 46// substitution.
 47func Make(s string) (slug string) {
 48	return MakeLang(s, "en")
 49}
 50
 51// MakeLang returns slug generated from provided string and will use provided
 52// language for chars substitution.
 53func MakeLang(s string, lang string) (slug string) {
 54	slug = strings.TrimSpace(s)
 55
 56	// Custom substitutions
 57	// Always substitute runes first
 58	slug = SubstituteRune(slug, CustomRuneSub)
 59	slug = Substitute(slug, CustomSub)
 60
 61	// Process string with selected substitution language.
 62	// Catch ISO 3166-1, ISO 639-1:2002 and ISO 639-3:2007.
 63	switch strings.ToLower(lang) {
 64	case "bg", "bgr":
 65		slug = SubstituteRune(slug, bgSub)
 66	case "cs", "ces":
 67		slug = SubstituteRune(slug, csSub)
 68	case "de", "deu":
 69		slug = SubstituteRune(slug, deSub)
 70	case "en", "eng":
 71		slug = SubstituteRune(slug, enSub)
 72	case "es", "spa":
 73		slug = SubstituteRune(slug, esSub)
 74	case "fi", "fin":
 75		slug = SubstituteRune(slug, fiSub)
 76	case "fr", "fra":
 77		slug = SubstituteRune(slug, frSub)
 78	case "gr", "el", "ell":
 79		slug = SubstituteRune(slug, grSub)
 80	case "hu", "hun":
 81		slug = SubstituteRune(slug, huSub)
 82	case "id", "idn", "ind":
 83		slug = SubstituteRune(slug, idSub)
 84	case "it", "ita":
 85		slug = SubstituteRune(slug, itSub)
 86	case "kz", "kk", "kaz":
 87		slug = SubstituteRune(slug, kkSub)
 88	case "nb", "nob":
 89		slug = SubstituteRune(slug, nbSub)
 90	case "nl", "nld":
 91		slug = SubstituteRune(slug, nlSub)
 92	case "nn", "nno":
 93		slug = SubstituteRune(slug, nnSub)
 94	case "pl", "pol":
 95		slug = SubstituteRune(slug, plSub)
 96	case "ro", "rou":
 97		slug = SubstituteRune(slug, roSub)
 98	case "sl", "slv":
 99		slug = SubstituteRune(slug, slSub)
100	case "sv", "swe":
101		slug = SubstituteRune(slug, svSub)
102	case "tr", "tur":
103		slug = SubstituteRune(slug, trSub)
104	default: // fallback to "en" if lang not found
105		slug = SubstituteRune(slug, enSub)
106	}
107
108	// Process all non ASCII symbols
109	slug = unidecode.Unidecode(slug)
110
111	if Lowercase {
112		slug = strings.ToLower(slug)
113	}
114
115	if !EnableSmartTruncate && len(slug) >= MaxLength {
116		slug = slug[:MaxLength]
117	}
118
119	// Process all remaining symbols
120	slug = regexpNonAuthorizedChars.ReplaceAllString(slug, "-")
121	slug = regexpMultipleDashes.ReplaceAllString(slug, "-")
122	slug = strings.Trim(slug, "-_")
123
124	if MaxLength > 0 && EnableSmartTruncate {
125		slug = smartTruncate(slug)
126	}
127
128	return slug
129}
130
131// Substitute returns string with superseded all substrings from
132// provided substitution map. Substitution map will be applied in alphabetic
133// order. Many passes, on one substitution another one could apply.
134func Substitute(s string, sub map[string]string) (buf string) {
135	buf = s
136	var keys []string
137	for k := range sub {
138		keys = append(keys, k)
139	}
140	sort.Strings(keys)
141
142	for _, key := range keys {
143		buf = strings.Replace(buf, key, sub[key], -1)
144	}
145	return
146}
147
148// SubstituteRune substitutes string chars with provided rune
149// substitution map. One pass.
150func SubstituteRune(s string, sub map[rune]string) string {
151	var buf bytes.Buffer
152	for _, c := range s {
153		if d, ok := sub[c]; ok {
154			buf.WriteString(d)
155		} else {
156			buf.WriteRune(c)
157		}
158	}
159	return buf.String()
160}
161
162func smartTruncate(text string) string {
163	if len(text) <= MaxLength {
164		return text
165	}
166
167	// If slug is too long, we need to find the last '-' before MaxLength, and
168	// we cut there.
169	// If we don't find any, we have only one word, and we cut at MaxLength.
170	for i := MaxLength; i >= 0; i-- {
171		if text[i] == '-' {
172			return text[:i]
173		}
174	}
175	return text[:MaxLength]
176}
177
178// IsSlug returns True if provided text does not contain white characters,
179// punctuation, all letters are lower case and only from ASCII range.
180// It could contain `-` and `_` but not at the beginning or end of the text.
181// It should be in range of the MaxLength var if specified.
182// All output from slug.Make(text) should pass this test.
183func IsSlug(text string) bool {
184	if text == "" ||
185		(MaxLength > 0 && len(text) > MaxLength) ||
186		text[0] == '-' || text[0] == '_' ||
187		text[len(text)-1] == '-' || text[len(text)-1] == '_' {
188		return false
189	}
190	for _, c := range text {
191		if (c < 'a' || c > 'z') && c != '-' && c != '_' && (c < '0' || c > '9') {
192			return false
193		}
194	}
195	return true
196}