aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/gorilla/css/scanner
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/gorilla/css/scanner')
-rw-r--r--vendor/github.com/gorilla/css/scanner/doc.go33
-rw-r--r--vendor/github.com/gorilla/css/scanner/scanner.go356
2 files changed, 389 insertions, 0 deletions
diff --git a/vendor/github.com/gorilla/css/scanner/doc.go b/vendor/github.com/gorilla/css/scanner/doc.go
new file mode 100644
index 0000000..f19850e
--- /dev/null
+++ b/vendor/github.com/gorilla/css/scanner/doc.go
@@ -0,0 +1,33 @@
1// Copyright 2012 The Gorilla Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5/*
6Package gorilla/css/scanner generates tokens for a CSS3 input.
7
8It follows the CSS3 specification located at:
9
10 http://www.w3.org/TR/css3-syntax/
11
12To use it, create a new scanner for a given CSS string and call Next() until
13the token returned has type TokenEOF or TokenError:
14
15 s := scanner.New(myCSS)
16 for {
17 token := s.Next()
18 if token.Type == scanner.TokenEOF || token.Type == scanner.TokenError {
19 break
20 }
21 // Do something with the token...
22 }
23
24Following the CSS3 specification, an error can only occur when the scanner
25finds an unclosed quote or unclosed comment. In these cases the text becomes
26"untokenizable". Everything else is tokenizable and it is up to a parser
27to make sense of the token stream (or ignore nonsensical token sequences).
28
29Note: the scanner doesn't perform lexical analysis or, in other words, it
30doesn't care about the token context. It is intended to be used by a
31lexer or parser.
32*/
33package scanner
diff --git a/vendor/github.com/gorilla/css/scanner/scanner.go b/vendor/github.com/gorilla/css/scanner/scanner.go
new file mode 100644
index 0000000..23fa740
--- /dev/null
+++ b/vendor/github.com/gorilla/css/scanner/scanner.go
@@ -0,0 +1,356 @@
1// Copyright 2012 The Gorilla Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package scanner
6
7import (
8 "fmt"
9 "regexp"
10 "strings"
11 "unicode"
12 "unicode/utf8"
13)
14
15// tokenType identifies the type of lexical tokens.
16type tokenType int
17
18// String returns a string representation of the token type.
19func (t tokenType) String() string {
20 return tokenNames[t]
21}
22
23// Token represents a token and the corresponding string.
24type Token struct {
25 Type tokenType
26 Value string
27 Line int
28 Column int
29}
30
31// String returns a string representation of the token.
32func (t *Token) String() string {
33 if len(t.Value) > 10 {
34 return fmt.Sprintf("%s (line: %d, column: %d): %.10q...",
35 t.Type, t.Line, t.Column, t.Value)
36 }
37 return fmt.Sprintf("%s (line: %d, column: %d): %q",
38 t.Type, t.Line, t.Column, t.Value)
39}
40
41// All tokens -----------------------------------------------------------------
42
43// The complete list of tokens in CSS3.
44const (
45 // Scanner flags.
46 TokenError tokenType = iota
47 TokenEOF
48 // From now on, only tokens from the CSS specification.
49 TokenIdent
50 TokenAtKeyword
51 TokenString
52 TokenHash
53 TokenNumber
54 TokenPercentage
55 TokenDimension
56 TokenURI
57 TokenUnicodeRange
58 TokenCDO
59 TokenCDC
60 TokenS
61 TokenComment
62 TokenFunction
63 TokenIncludes
64 TokenDashMatch
65 TokenPrefixMatch
66 TokenSuffixMatch
67 TokenSubstringMatch
68 TokenChar
69 TokenBOM
70)
71
72// tokenNames maps tokenType's to their names. Used for conversion to string.
73var tokenNames = map[tokenType]string{
74 TokenError: "error",
75 TokenEOF: "EOF",
76 TokenIdent: "IDENT",
77 TokenAtKeyword: "ATKEYWORD",
78 TokenString: "STRING",
79 TokenHash: "HASH",
80 TokenNumber: "NUMBER",
81 TokenPercentage: "PERCENTAGE",
82 TokenDimension: "DIMENSION",
83 TokenURI: "URI",
84 TokenUnicodeRange: "UNICODE-RANGE",
85 TokenCDO: "CDO",
86 TokenCDC: "CDC",
87 TokenS: "S",
88 TokenComment: "COMMENT",
89 TokenFunction: "FUNCTION",
90 TokenIncludes: "INCLUDES",
91 TokenDashMatch: "DASHMATCH",
92 TokenPrefixMatch: "PREFIXMATCH",
93 TokenSuffixMatch: "SUFFIXMATCH",
94 TokenSubstringMatch: "SUBSTRINGMATCH",
95 TokenChar: "CHAR",
96 TokenBOM: "BOM",
97}
98
99// Macros and productions -----------------------------------------------------
100// http://www.w3.org/TR/css3-syntax/#tokenization
101
102var macroRegexp = regexp.MustCompile(`\{[a-z]+\}`)
103
104// macros maps macro names to patterns to be expanded.
105var macros = map[string]string{
106 // must be escaped: `\.+*?()|[]{}^$`
107 "ident": `-?{nmstart}{nmchar}*`,
108 "name": `{nmchar}+`,
109 "nmstart": `[a-zA-Z_]|{nonascii}|{escape}`,
110 "nonascii": "[\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
111 "unicode": `\\[0-9a-fA-F]{1,6}{wc}?`,
112 "escape": "{unicode}|\\\\[\u0020-\u007E\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
113 "nmchar": `[a-zA-Z0-9_-]|{nonascii}|{escape}`,
114 "num": `[0-9]*\.[0-9]+|[0-9]+`,
115 "string": `"(?:{stringchar}|')*"|'(?:{stringchar}|")*'`,
116 "stringchar": `{urlchar}|[ ]|\\{nl}`,
117 "nl": `[\n\r\f]|\r\n`,
118 "w": `{wc}*`,
119 "wc": `[\t\n\f\r ]`,
120
121 // urlchar should accept [(ascii characters minus those that need escaping)|{nonascii}|{escape}]
122 // ASCII characters range = `[\u0020-\u007e]`
123 // Skip space \u0020 = `[\u0021-\u007e]`
124 // Skip quotation mark \0022 = `[\u0021\u0023-\u007e]`
125 // Skip apostrophe \u0027 = `[\u0021\u0023-\u0026\u0028-\u007e]`
126 // Skip reverse solidus \u005c = `[\u0021\u0023-\u0026\u0028-\u005b\u005d\u007e]`
127 // Finally, the left square bracket (\u005b) and right (\u005d) needs escaping themselves
128 "urlchar": "[\u0021\u0023-\u0026\u0028-\\\u005b\\\u005d-\u007E]|{nonascii}|{escape}",
129}
130
131// productions maps the list of tokens to patterns to be expanded.
132var productions = map[tokenType]string{
133 // Unused regexps (matched using other methods) are commented out.
134 TokenIdent: `{ident}`,
135 TokenAtKeyword: `@{ident}`,
136 TokenString: `{string}`,
137 TokenHash: `#{name}`,
138 TokenNumber: `{num}`,
139 TokenPercentage: `{num}%`,
140 TokenDimension: `{num}{ident}`,
141 TokenURI: `url\({w}(?:{string}|{urlchar}*?){w}\)`,
142 TokenUnicodeRange: `U\+[0-9A-F\?]{1,6}(?:-[0-9A-F]{1,6})?`,
143 //TokenCDO: `<!--`,
144 TokenCDC: `-->`,
145 TokenS: `{wc}+`,
146 TokenComment: `/\*[^\*]*[\*]+(?:[^/][^\*]*[\*]+)*/`,
147 TokenFunction: `{ident}\(`,
148 //TokenIncludes: `~=`,
149 //TokenDashMatch: `\|=`,
150 //TokenPrefixMatch: `\^=`,
151 //TokenSuffixMatch: `\$=`,
152 //TokenSubstringMatch: `\*=`,
153 //TokenChar: `[^"']`,
154 //TokenBOM: "\uFEFF",
155}
156
157// matchers maps the list of tokens to compiled regular expressions.
158//
159// The map is filled on init() using the macros and productions defined in
160// the CSS specification.
161var matchers = map[tokenType]*regexp.Regexp{}
162
163// matchOrder is the order to test regexps when first-char shortcuts
164// can't be used.
165var matchOrder = []tokenType{
166 TokenURI,
167 TokenFunction,
168 TokenUnicodeRange,
169 TokenIdent,
170 TokenDimension,
171 TokenPercentage,
172 TokenNumber,
173 TokenCDC,
174}
175
176func init() {
177 // replace macros and compile regexps for productions.
178 replaceMacro := func(s string) string {
179 return "(?:" + macros[s[1:len(s)-1]] + ")"
180 }
181 for t, s := range productions {
182 for macroRegexp.MatchString(s) {
183 s = macroRegexp.ReplaceAllStringFunc(s, replaceMacro)
184 }
185 matchers[t] = regexp.MustCompile("^(?:" + s + ")")
186 }
187}
188
189// Scanner --------------------------------------------------------------------
190
191// New returns a new CSS scanner for the given input.
192func New(input string) *Scanner {
193 // Normalize newlines.
194 input = strings.Replace(input, "\r\n", "\n", -1)
195 return &Scanner{
196 input: input,
197 row: 1,
198 col: 1,
199 }
200}
201
202// Scanner scans an input and emits tokens following the CSS3 specification.
203type Scanner struct {
204 input string
205 pos int
206 row int
207 col int
208 err *Token
209}
210
211// Next returns the next token from the input.
212//
213// At the end of the input the token type is TokenEOF.
214//
215// If the input can't be tokenized the token type is TokenError. This occurs
216// in case of unclosed quotation marks or comments.
217func (s *Scanner) Next() *Token {
218 if s.err != nil {
219 return s.err
220 }
221 if s.pos >= len(s.input) {
222 s.err = &Token{TokenEOF, "", s.row, s.col}
223 return s.err
224 }
225 if s.pos == 0 {
226 // Test BOM only once, at the beginning of the file.
227 if strings.HasPrefix(s.input, "\uFEFF") {
228 return s.emitSimple(TokenBOM, "\uFEFF")
229 }
230 }
231 // There's a lot we can guess based on the first byte so we'll take a
232 // shortcut before testing multiple regexps.
233 input := s.input[s.pos:]
234 switch input[0] {
235 case '\t', '\n', '\f', '\r', ' ':
236 // Whitespace.
237 return s.emitToken(TokenS, matchers[TokenS].FindString(input))
238 case '.':
239 // Dot is too common to not have a quick check.
240 // We'll test if this is a Char; if it is followed by a number it is a
241 // dimension/percentage/number, and this will be matched later.
242 if len(input) > 1 && !unicode.IsDigit(rune(input[1])) {
243 return s.emitSimple(TokenChar, ".")
244 }
245 case '#':
246 // Another common one: Hash or Char.
247 if match := matchers[TokenHash].FindString(input); match != "" {
248 return s.emitToken(TokenHash, match)
249 }
250 return s.emitSimple(TokenChar, "#")
251 case '@':
252 // Another common one: AtKeyword or Char.
253 if match := matchers[TokenAtKeyword].FindString(input); match != "" {
254 return s.emitSimple(TokenAtKeyword, match)
255 }
256 return s.emitSimple(TokenChar, "@")
257 case ':', ',', ';', '%', '&', '+', '=', '>', '(', ')', '[', ']', '{', '}':
258 // More common chars.
259 return s.emitSimple(TokenChar, string(input[0]))
260 case '"', '\'':
261 // String or error.
262 match := matchers[TokenString].FindString(input)
263 if match != "" {
264 return s.emitToken(TokenString, match)
265 }
266
267 s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col}
268 return s.err
269 case '/':
270 // Comment, error or Char.
271 if len(input) > 1 && input[1] == '*' {
272 match := matchers[TokenComment].FindString(input)
273 if match != "" {
274 return s.emitToken(TokenComment, match)
275 } else {
276 s.err = &Token{TokenError, "unclosed comment", s.row, s.col}
277 return s.err
278 }
279 }
280 return s.emitSimple(TokenChar, "/")
281 case '~':
282 // Includes or Char.
283 return s.emitPrefixOrChar(TokenIncludes, "~=")
284 case '|':
285 // DashMatch or Char.
286 return s.emitPrefixOrChar(TokenDashMatch, "|=")
287 case '^':
288 // PrefixMatch or Char.
289 return s.emitPrefixOrChar(TokenPrefixMatch, "^=")
290 case '$':
291 // SuffixMatch or Char.
292 return s.emitPrefixOrChar(TokenSuffixMatch, "$=")
293 case '*':
294 // SubstringMatch or Char.
295 return s.emitPrefixOrChar(TokenSubstringMatch, "*=")
296 case '<':
297 // CDO or Char.
298 return s.emitPrefixOrChar(TokenCDO, "<!--")
299 }
300 // Test all regexps, in order.
301 for _, token := range matchOrder {
302 if match := matchers[token].FindString(input); match != "" {
303 return s.emitToken(token, match)
304 }
305 }
306 // We already handled unclosed quotation marks and comments,
307 // so this can only be a Char.
308 r, width := utf8.DecodeRuneInString(input)
309 token := &Token{TokenChar, string(r), s.row, s.col}
310 s.col += width
311 s.pos += width
312 return token
313}
314
315// updatePosition updates input coordinates based on the consumed text.
316func (s *Scanner) updatePosition(text string) {
317 width := utf8.RuneCountInString(text)
318 lines := strings.Count(text, "\n")
319 s.row += lines
320 if lines == 0 {
321 s.col += width
322 } else {
323 s.col = utf8.RuneCountInString(text[strings.LastIndex(text, "\n"):])
324 }
325 s.pos += len(text) // while col is a rune index, pos is a byte index
326}
327
328// emitToken returns a Token for the string v and updates the scanner position.
329func (s *Scanner) emitToken(t tokenType, v string) *Token {
330 token := &Token{t, v, s.row, s.col}
331 s.updatePosition(v)
332 return token
333}
334
335// emitSimple returns a Token for the string v and updates the scanner
336// position in a simplified manner.
337//
338// The string is known to have only ASCII characters and to not have a newline.
339func (s *Scanner) emitSimple(t tokenType, v string) *Token {
340 token := &Token{t, v, s.row, s.col}
341 s.col += len(v)
342 s.pos += len(v)
343 return token
344}
345
346// emitPrefixOrChar returns a Token for type t if the current position
347// matches the given prefix. Otherwise it returns a Char token using the
348// first character from the prefix.
349//
350// The prefix is known to have only ASCII characters and to not have a newline.
351func (s *Scanner) emitPrefixOrChar(t tokenType, prefix string) *Token {
352 if strings.HasPrefix(s.input[s.pos:], prefix) {
353 return s.emitSimple(t, prefix)
354 }
355 return s.emitSimple(TokenChar, string(prefix[0]))
356}