diff options
Diffstat (limited to 'vendor/github.com/gorilla/css/scanner')
| -rw-r--r-- | vendor/github.com/gorilla/css/scanner/doc.go | 33 | ||||
| -rw-r--r-- | vendor/github.com/gorilla/css/scanner/scanner.go | 356 |
2 files changed, 389 insertions, 0 deletions
diff --git a/vendor/github.com/gorilla/css/scanner/doc.go b/vendor/github.com/gorilla/css/scanner/doc.go new file mode 100644 index 0000000..f19850e --- /dev/null +++ b/vendor/github.com/gorilla/css/scanner/doc.go | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | // Copyright 2012 The Gorilla Authors. All rights reserved. | ||
| 2 | // Use of this source code is governed by a BSD-style | ||
| 3 | // license that can be found in the LICENSE file. | ||
| 4 | |||
| 5 | /* | ||
| 6 | Package gorilla/css/scanner generates tokens for a CSS3 input. | ||
| 7 | |||
| 8 | It follows the CSS3 specification located at: | ||
| 9 | |||
| 10 | http://www.w3.org/TR/css3-syntax/ | ||
| 11 | |||
| 12 | To use it, create a new scanner for a given CSS string and call Next() until | ||
| 13 | the token returned has type TokenEOF or TokenError: | ||
| 14 | |||
| 15 | s := scanner.New(myCSS) | ||
| 16 | for { | ||
| 17 | token := s.Next() | ||
| 18 | if token.Type == scanner.TokenEOF || token.Type == scanner.TokenError { | ||
| 19 | break | ||
| 20 | } | ||
| 21 | // Do something with the token... | ||
| 22 | } | ||
| 23 | |||
| 24 | Following the CSS3 specification, an error can only occur when the scanner | ||
| 25 | finds an unclosed quote or unclosed comment. In these cases the text becomes | ||
| 26 | "untokenizable". Everything else is tokenizable and it is up to a parser | ||
| 27 | to make sense of the token stream (or ignore nonsensical token sequences). | ||
| 28 | |||
| 29 | Note: the scanner doesn't perform lexical analysis or, in other words, it | ||
| 30 | doesn't care about the token context. It is intended to be used by a | ||
| 31 | lexer or parser. | ||
| 32 | */ | ||
| 33 | package scanner | ||
diff --git a/vendor/github.com/gorilla/css/scanner/scanner.go b/vendor/github.com/gorilla/css/scanner/scanner.go new file mode 100644 index 0000000..23fa740 --- /dev/null +++ b/vendor/github.com/gorilla/css/scanner/scanner.go | |||
| @@ -0,0 +1,356 @@ | |||
| 1 | // Copyright 2012 The Gorilla Authors. All rights reserved. | ||
| 2 | // Use of this source code is governed by a BSD-style | ||
| 3 | // license that can be found in the LICENSE file. | ||
| 4 | |||
| 5 | package scanner | ||
| 6 | |||
| 7 | import ( | ||
| 8 | "fmt" | ||
| 9 | "regexp" | ||
| 10 | "strings" | ||
| 11 | "unicode" | ||
| 12 | "unicode/utf8" | ||
| 13 | ) | ||
| 14 | |||
| 15 | // tokenType identifies the type of lexical tokens. | ||
| 16 | type tokenType int | ||
| 17 | |||
| 18 | // String returns a string representation of the token type. | ||
| 19 | func (t tokenType) String() string { | ||
| 20 | return tokenNames[t] | ||
| 21 | } | ||
| 22 | |||
| 23 | // Token represents a token and the corresponding string. | ||
| 24 | type Token struct { | ||
| 25 | Type tokenType | ||
| 26 | Value string | ||
| 27 | Line int | ||
| 28 | Column int | ||
| 29 | } | ||
| 30 | |||
| 31 | // String returns a string representation of the token. | ||
| 32 | func (t *Token) String() string { | ||
| 33 | if len(t.Value) > 10 { | ||
| 34 | return fmt.Sprintf("%s (line: %d, column: %d): %.10q...", | ||
| 35 | t.Type, t.Line, t.Column, t.Value) | ||
| 36 | } | ||
| 37 | return fmt.Sprintf("%s (line: %d, column: %d): %q", | ||
| 38 | t.Type, t.Line, t.Column, t.Value) | ||
| 39 | } | ||
| 40 | |||
| 41 | // All tokens ----------------------------------------------------------------- | ||
| 42 | |||
| 43 | // The complete list of tokens in CSS3. | ||
| 44 | const ( | ||
| 45 | // Scanner flags. | ||
| 46 | TokenError tokenType = iota | ||
| 47 | TokenEOF | ||
| 48 | // From now on, only tokens from the CSS specification. | ||
| 49 | TokenIdent | ||
| 50 | TokenAtKeyword | ||
| 51 | TokenString | ||
| 52 | TokenHash | ||
| 53 | TokenNumber | ||
| 54 | TokenPercentage | ||
| 55 | TokenDimension | ||
| 56 | TokenURI | ||
| 57 | TokenUnicodeRange | ||
| 58 | TokenCDO | ||
| 59 | TokenCDC | ||
| 60 | TokenS | ||
| 61 | TokenComment | ||
| 62 | TokenFunction | ||
| 63 | TokenIncludes | ||
| 64 | TokenDashMatch | ||
| 65 | TokenPrefixMatch | ||
| 66 | TokenSuffixMatch | ||
| 67 | TokenSubstringMatch | ||
| 68 | TokenChar | ||
| 69 | TokenBOM | ||
| 70 | ) | ||
| 71 | |||
| 72 | // tokenNames maps tokenType's to their names. Used for conversion to string. | ||
| 73 | var tokenNames = map[tokenType]string{ | ||
| 74 | TokenError: "error", | ||
| 75 | TokenEOF: "EOF", | ||
| 76 | TokenIdent: "IDENT", | ||
| 77 | TokenAtKeyword: "ATKEYWORD", | ||
| 78 | TokenString: "STRING", | ||
| 79 | TokenHash: "HASH", | ||
| 80 | TokenNumber: "NUMBER", | ||
| 81 | TokenPercentage: "PERCENTAGE", | ||
| 82 | TokenDimension: "DIMENSION", | ||
| 83 | TokenURI: "URI", | ||
| 84 | TokenUnicodeRange: "UNICODE-RANGE", | ||
| 85 | TokenCDO: "CDO", | ||
| 86 | TokenCDC: "CDC", | ||
| 87 | TokenS: "S", | ||
| 88 | TokenComment: "COMMENT", | ||
| 89 | TokenFunction: "FUNCTION", | ||
| 90 | TokenIncludes: "INCLUDES", | ||
| 91 | TokenDashMatch: "DASHMATCH", | ||
| 92 | TokenPrefixMatch: "PREFIXMATCH", | ||
| 93 | TokenSuffixMatch: "SUFFIXMATCH", | ||
| 94 | TokenSubstringMatch: "SUBSTRINGMATCH", | ||
| 95 | TokenChar: "CHAR", | ||
| 96 | TokenBOM: "BOM", | ||
| 97 | } | ||
| 98 | |||
| 99 | // Macros and productions ----------------------------------------------------- | ||
| 100 | // http://www.w3.org/TR/css3-syntax/#tokenization | ||
| 101 | |||
| 102 | var macroRegexp = regexp.MustCompile(`\{[a-z]+\}`) | ||
| 103 | |||
| 104 | // macros maps macro names to patterns to be expanded. | ||
| 105 | var macros = map[string]string{ | ||
| 106 | // must be escaped: `\.+*?()|[]{}^$` | ||
| 107 | "ident": `-?{nmstart}{nmchar}*`, | ||
| 108 | "name": `{nmchar}+`, | ||
| 109 | "nmstart": `[a-zA-Z_]|{nonascii}|{escape}`, | ||
| 110 | "nonascii": "[\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]", | ||
| 111 | "unicode": `\\[0-9a-fA-F]{1,6}{wc}?`, | ||
| 112 | "escape": "{unicode}|\\\\[\u0020-\u007E\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]", | ||
| 113 | "nmchar": `[a-zA-Z0-9_-]|{nonascii}|{escape}`, | ||
| 114 | "num": `[0-9]*\.[0-9]+|[0-9]+`, | ||
| 115 | "string": `"(?:{stringchar}|')*"|'(?:{stringchar}|")*'`, | ||
| 116 | "stringchar": `{urlchar}|[ ]|\\{nl}`, | ||
| 117 | "nl": `[\n\r\f]|\r\n`, | ||
| 118 | "w": `{wc}*`, | ||
| 119 | "wc": `[\t\n\f\r ]`, | ||
| 120 | |||
| 121 | // urlchar should accept [(ascii characters minus those that need escaping)|{nonascii}|{escape}] | ||
| 122 | // ASCII characters range = `[\u0020-\u007e]` | ||
| 123 | // Skip space \u0020 = `[\u0021-\u007e]` | ||
| 124 | // Skip quotation mark \0022 = `[\u0021\u0023-\u007e]` | ||
| 125 | // Skip apostrophe \u0027 = `[\u0021\u0023-\u0026\u0028-\u007e]` | ||
| 126 | // Skip reverse solidus \u005c = `[\u0021\u0023-\u0026\u0028-\u005b\u005d\u007e]` | ||
| 127 | // Finally, the left square bracket (\u005b) and right (\u005d) needs escaping themselves | ||
| 128 | "urlchar": "[\u0021\u0023-\u0026\u0028-\\\u005b\\\u005d-\u007E]|{nonascii}|{escape}", | ||
| 129 | } | ||
| 130 | |||
| 131 | // productions maps the list of tokens to patterns to be expanded. | ||
| 132 | var productions = map[tokenType]string{ | ||
| 133 | // Unused regexps (matched using other methods) are commented out. | ||
| 134 | TokenIdent: `{ident}`, | ||
| 135 | TokenAtKeyword: `@{ident}`, | ||
| 136 | TokenString: `{string}`, | ||
| 137 | TokenHash: `#{name}`, | ||
| 138 | TokenNumber: `{num}`, | ||
| 139 | TokenPercentage: `{num}%`, | ||
| 140 | TokenDimension: `{num}{ident}`, | ||
| 141 | TokenURI: `url\({w}(?:{string}|{urlchar}*?){w}\)`, | ||
| 142 | TokenUnicodeRange: `U\+[0-9A-F\?]{1,6}(?:-[0-9A-F]{1,6})?`, | ||
| 143 | //TokenCDO: `<!--`, | ||
| 144 | TokenCDC: `-->`, | ||
| 145 | TokenS: `{wc}+`, | ||
| 146 | TokenComment: `/\*[^\*]*[\*]+(?:[^/][^\*]*[\*]+)*/`, | ||
| 147 | TokenFunction: `{ident}\(`, | ||
| 148 | //TokenIncludes: `~=`, | ||
| 149 | //TokenDashMatch: `\|=`, | ||
| 150 | //TokenPrefixMatch: `\^=`, | ||
| 151 | //TokenSuffixMatch: `\$=`, | ||
| 152 | //TokenSubstringMatch: `\*=`, | ||
| 153 | //TokenChar: `[^"']`, | ||
| 154 | //TokenBOM: "\uFEFF", | ||
| 155 | } | ||
| 156 | |||
| 157 | // matchers maps the list of tokens to compiled regular expressions. | ||
| 158 | // | ||
| 159 | // The map is filled on init() using the macros and productions defined in | ||
| 160 | // the CSS specification. | ||
| 161 | var matchers = map[tokenType]*regexp.Regexp{} | ||
| 162 | |||
| 163 | // matchOrder is the order to test regexps when first-char shortcuts | ||
| 164 | // can't be used. | ||
| 165 | var matchOrder = []tokenType{ | ||
| 166 | TokenURI, | ||
| 167 | TokenFunction, | ||
| 168 | TokenUnicodeRange, | ||
| 169 | TokenIdent, | ||
| 170 | TokenDimension, | ||
| 171 | TokenPercentage, | ||
| 172 | TokenNumber, | ||
| 173 | TokenCDC, | ||
| 174 | } | ||
| 175 | |||
| 176 | func init() { | ||
| 177 | // replace macros and compile regexps for productions. | ||
| 178 | replaceMacro := func(s string) string { | ||
| 179 | return "(?:" + macros[s[1:len(s)-1]] + ")" | ||
| 180 | } | ||
| 181 | for t, s := range productions { | ||
| 182 | for macroRegexp.MatchString(s) { | ||
| 183 | s = macroRegexp.ReplaceAllStringFunc(s, replaceMacro) | ||
| 184 | } | ||
| 185 | matchers[t] = regexp.MustCompile("^(?:" + s + ")") | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | // Scanner -------------------------------------------------------------------- | ||
| 190 | |||
| 191 | // New returns a new CSS scanner for the given input. | ||
| 192 | func New(input string) *Scanner { | ||
| 193 | // Normalize newlines. | ||
| 194 | input = strings.Replace(input, "\r\n", "\n", -1) | ||
| 195 | return &Scanner{ | ||
| 196 | input: input, | ||
| 197 | row: 1, | ||
| 198 | col: 1, | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | // Scanner scans an input and emits tokens following the CSS3 specification. | ||
| 203 | type Scanner struct { | ||
| 204 | input string | ||
| 205 | pos int | ||
| 206 | row int | ||
| 207 | col int | ||
| 208 | err *Token | ||
| 209 | } | ||
| 210 | |||
| 211 | // Next returns the next token from the input. | ||
| 212 | // | ||
| 213 | // At the end of the input the token type is TokenEOF. | ||
| 214 | // | ||
| 215 | // If the input can't be tokenized the token type is TokenError. This occurs | ||
| 216 | // in case of unclosed quotation marks or comments. | ||
| 217 | func (s *Scanner) Next() *Token { | ||
| 218 | if s.err != nil { | ||
| 219 | return s.err | ||
| 220 | } | ||
| 221 | if s.pos >= len(s.input) { | ||
| 222 | s.err = &Token{TokenEOF, "", s.row, s.col} | ||
| 223 | return s.err | ||
| 224 | } | ||
| 225 | if s.pos == 0 { | ||
| 226 | // Test BOM only once, at the beginning of the file. | ||
| 227 | if strings.HasPrefix(s.input, "\uFEFF") { | ||
| 228 | return s.emitSimple(TokenBOM, "\uFEFF") | ||
| 229 | } | ||
| 230 | } | ||
| 231 | // There's a lot we can guess based on the first byte so we'll take a | ||
| 232 | // shortcut before testing multiple regexps. | ||
| 233 | input := s.input[s.pos:] | ||
| 234 | switch input[0] { | ||
| 235 | case '\t', '\n', '\f', '\r', ' ': | ||
| 236 | // Whitespace. | ||
| 237 | return s.emitToken(TokenS, matchers[TokenS].FindString(input)) | ||
| 238 | case '.': | ||
| 239 | // Dot is too common to not have a quick check. | ||
| 240 | // We'll test if this is a Char; if it is followed by a number it is a | ||
| 241 | // dimension/percentage/number, and this will be matched later. | ||
| 242 | if len(input) > 1 && !unicode.IsDigit(rune(input[1])) { | ||
| 243 | return s.emitSimple(TokenChar, ".") | ||
| 244 | } | ||
| 245 | case '#': | ||
| 246 | // Another common one: Hash or Char. | ||
| 247 | if match := matchers[TokenHash].FindString(input); match != "" { | ||
| 248 | return s.emitToken(TokenHash, match) | ||
| 249 | } | ||
| 250 | return s.emitSimple(TokenChar, "#") | ||
| 251 | case '@': | ||
| 252 | // Another common one: AtKeyword or Char. | ||
| 253 | if match := matchers[TokenAtKeyword].FindString(input); match != "" { | ||
| 254 | return s.emitSimple(TokenAtKeyword, match) | ||
| 255 | } | ||
| 256 | return s.emitSimple(TokenChar, "@") | ||
| 257 | case ':', ',', ';', '%', '&', '+', '=', '>', '(', ')', '[', ']', '{', '}': | ||
| 258 | // More common chars. | ||
| 259 | return s.emitSimple(TokenChar, string(input[0])) | ||
| 260 | case '"', '\'': | ||
| 261 | // String or error. | ||
| 262 | match := matchers[TokenString].FindString(input) | ||
| 263 | if match != "" { | ||
| 264 | return s.emitToken(TokenString, match) | ||
| 265 | } | ||
| 266 | |||
| 267 | s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col} | ||
| 268 | return s.err | ||
| 269 | case '/': | ||
| 270 | // Comment, error or Char. | ||
| 271 | if len(input) > 1 && input[1] == '*' { | ||
| 272 | match := matchers[TokenComment].FindString(input) | ||
| 273 | if match != "" { | ||
| 274 | return s.emitToken(TokenComment, match) | ||
| 275 | } else { | ||
| 276 | s.err = &Token{TokenError, "unclosed comment", s.row, s.col} | ||
| 277 | return s.err | ||
| 278 | } | ||
| 279 | } | ||
| 280 | return s.emitSimple(TokenChar, "/") | ||
| 281 | case '~': | ||
| 282 | // Includes or Char. | ||
| 283 | return s.emitPrefixOrChar(TokenIncludes, "~=") | ||
| 284 | case '|': | ||
| 285 | // DashMatch or Char. | ||
| 286 | return s.emitPrefixOrChar(TokenDashMatch, "|=") | ||
| 287 | case '^': | ||
| 288 | // PrefixMatch or Char. | ||
| 289 | return s.emitPrefixOrChar(TokenPrefixMatch, "^=") | ||
| 290 | case '$': | ||
| 291 | // SuffixMatch or Char. | ||
| 292 | return s.emitPrefixOrChar(TokenSuffixMatch, "$=") | ||
| 293 | case '*': | ||
| 294 | // SubstringMatch or Char. | ||
| 295 | return s.emitPrefixOrChar(TokenSubstringMatch, "*=") | ||
| 296 | case '<': | ||
| 297 | // CDO or Char. | ||
| 298 | return s.emitPrefixOrChar(TokenCDO, "<!--") | ||
| 299 | } | ||
| 300 | // Test all regexps, in order. | ||
| 301 | for _, token := range matchOrder { | ||
| 302 | if match := matchers[token].FindString(input); match != "" { | ||
| 303 | return s.emitToken(token, match) | ||
| 304 | } | ||
| 305 | } | ||
| 306 | // We already handled unclosed quotation marks and comments, | ||
| 307 | // so this can only be a Char. | ||
| 308 | r, width := utf8.DecodeRuneInString(input) | ||
| 309 | token := &Token{TokenChar, string(r), s.row, s.col} | ||
| 310 | s.col += width | ||
| 311 | s.pos += width | ||
| 312 | return token | ||
| 313 | } | ||
| 314 | |||
| 315 | // updatePosition updates input coordinates based on the consumed text. | ||
| 316 | func (s *Scanner) updatePosition(text string) { | ||
| 317 | width := utf8.RuneCountInString(text) | ||
| 318 | lines := strings.Count(text, "\n") | ||
| 319 | s.row += lines | ||
| 320 | if lines == 0 { | ||
| 321 | s.col += width | ||
| 322 | } else { | ||
| 323 | s.col = utf8.RuneCountInString(text[strings.LastIndex(text, "\n"):]) | ||
| 324 | } | ||
| 325 | s.pos += len(text) // while col is a rune index, pos is a byte index | ||
| 326 | } | ||
| 327 | |||
| 328 | // emitToken returns a Token for the string v and updates the scanner position. | ||
| 329 | func (s *Scanner) emitToken(t tokenType, v string) *Token { | ||
| 330 | token := &Token{t, v, s.row, s.col} | ||
| 331 | s.updatePosition(v) | ||
| 332 | return token | ||
| 333 | } | ||
| 334 | |||
| 335 | // emitSimple returns a Token for the string v and updates the scanner | ||
| 336 | // position in a simplified manner. | ||
| 337 | // | ||
| 338 | // The string is known to have only ASCII characters and to not have a newline. | ||
| 339 | func (s *Scanner) emitSimple(t tokenType, v string) *Token { | ||
| 340 | token := &Token{t, v, s.row, s.col} | ||
| 341 | s.col += len(v) | ||
| 342 | s.pos += len(v) | ||
| 343 | return token | ||
| 344 | } | ||
| 345 | |||
| 346 | // emitPrefixOrChar returns a Token for type t if the current position | ||
| 347 | // matches the given prefix. Otherwise it returns a Char token using the | ||
| 348 | // first character from the prefix. | ||
| 349 | // | ||
| 350 | // The prefix is known to have only ASCII characters and to not have a newline. | ||
| 351 | func (s *Scanner) emitPrefixOrChar(t tokenType, prefix string) *Token { | ||
| 352 | if strings.HasPrefix(s.input[s.pos:], prefix) { | ||
| 353 | return s.emitSimple(t, prefix) | ||
| 354 | } | ||
| 355 | return s.emitSimple(TokenChar, string(prefix[0])) | ||
| 356 | } | ||
