diff options
Diffstat (limited to 'vendor/github.com/microcosm-cc/bluemonday/sanitize.go')
| -rw-r--r-- | vendor/github.com/microcosm-cc/bluemonday/sanitize.go | 1116 |
1 files changed, 1116 insertions, 0 deletions
diff --git a/vendor/github.com/microcosm-cc/bluemonday/sanitize.go b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go new file mode 100644 index 0000000..9121aef --- /dev/null +++ b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go | |||
| @@ -0,0 +1,1116 @@ | |||
| 1 | // Copyright (c) 2014, David Kitchen <david@buro9.com> | ||
| 2 | // | ||
| 3 | // All rights reserved. | ||
| 4 | // | ||
| 5 | // Redistribution and use in source and binary forms, with or without | ||
| 6 | // modification, are permitted provided that the following conditions are met: | ||
| 7 | // | ||
| 8 | // * Redistributions of source code must retain the above copyright notice, this | ||
| 9 | // list of conditions and the following disclaimer. | ||
| 10 | // | ||
| 11 | // * Redistributions in binary form must reproduce the above copyright notice, | ||
| 12 | // this list of conditions and the following disclaimer in the documentation | ||
| 13 | // and/or other materials provided with the distribution. | ||
| 14 | // | ||
| 15 | // * Neither the name of the organisation (Microcosm) nor the names of its | ||
| 16 | // contributors may be used to endorse or promote products derived from | ||
| 17 | // this software without specific prior written permission. | ||
| 18 | // | ||
| 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
| 20 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 21 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 22 | // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
| 23 | // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 24 | // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 25 | // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
| 26 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| 27 | // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 29 | |||
| 30 | package bluemonday | ||
| 31 | |||
| 32 | import ( | ||
| 33 | "bytes" | ||
| 34 | "fmt" | ||
| 35 | "io" | ||
| 36 | "net/url" | ||
| 37 | "regexp" | ||
| 38 | "strconv" | ||
| 39 | "strings" | ||
| 40 | |||
| 41 | "golang.org/x/net/html" | ||
| 42 | |||
| 43 | "github.com/aymerick/douceur/parser" | ||
| 44 | ) | ||
| 45 | |||
| 46 | var ( | ||
| 47 | dataAttribute = regexp.MustCompile("^data-.+") | ||
| 48 | dataAttributeXMLPrefix = regexp.MustCompile("^xml.+") | ||
| 49 | dataAttributeInvalidChars = regexp.MustCompile("[A-Z;]+") | ||
| 50 | cssUnicodeChar = regexp.MustCompile(`\\[0-9a-f]{1,6} ?`) | ||
| 51 | dataURIbase64Prefix = regexp.MustCompile(`^data:[^,]*;base64,`) | ||
| 52 | ) | ||
| 53 | |||
| 54 | // Sanitize takes a string that contains a HTML fragment or document and applies | ||
| 55 | // the given policy allowlist. | ||
| 56 | // | ||
| 57 | // It returns a HTML string that has been sanitized by the policy or an empty | ||
| 58 | // string if an error has occurred (most likely as a consequence of extremely | ||
| 59 | // malformed input) | ||
| 60 | func (p *Policy) Sanitize(s string) string { | ||
| 61 | if strings.TrimSpace(s) == "" { | ||
| 62 | return s | ||
| 63 | } | ||
| 64 | |||
| 65 | return p.sanitizeWithBuff(strings.NewReader(s)).String() | ||
| 66 | } | ||
| 67 | |||
| 68 | // SanitizeBytes takes a []byte that contains a HTML fragment or document and applies | ||
| 69 | // the given policy allowlist. | ||
| 70 | // | ||
| 71 | // It returns a []byte containing the HTML that has been sanitized by the policy | ||
| 72 | // or an empty []byte if an error has occurred (most likely as a consequence of | ||
| 73 | // extremely malformed input) | ||
| 74 | func (p *Policy) SanitizeBytes(b []byte) []byte { | ||
| 75 | if len(bytes.TrimSpace(b)) == 0 { | ||
| 76 | return b | ||
| 77 | } | ||
| 78 | |||
| 79 | return p.sanitizeWithBuff(bytes.NewReader(b)).Bytes() | ||
| 80 | } | ||
| 81 | |||
| 82 | // SanitizeReader takes an io.Reader that contains a HTML fragment or document | ||
| 83 | // and applies the given policy allowlist. | ||
| 84 | // | ||
| 85 | // It returns a bytes.Buffer containing the HTML that has been sanitized by the | ||
| 86 | // policy. Errors during sanitization will merely return an empty result. | ||
| 87 | func (p *Policy) SanitizeReader(r io.Reader) *bytes.Buffer { | ||
| 88 | return p.sanitizeWithBuff(r) | ||
| 89 | } | ||
| 90 | |||
| 91 | // SanitizeReaderToWriter takes an io.Reader that contains a HTML fragment or document | ||
| 92 | // and applies the given policy allowlist and writes to the provided writer returning | ||
| 93 | // an error if there is one. | ||
| 94 | func (p *Policy) SanitizeReaderToWriter(r io.Reader, w io.Writer) error { | ||
| 95 | return p.sanitize(r, w) | ||
| 96 | } | ||
| 97 | |||
| 98 | const escapedURLChars = "'<>\"\r" | ||
| 99 | |||
| 100 | func escapeUrlComponent(w stringWriterWriter, val string) error { | ||
| 101 | i := strings.IndexAny(val, escapedURLChars) | ||
| 102 | for i != -1 { | ||
| 103 | if _, err := w.WriteString(val[:i]); err != nil { | ||
| 104 | return err | ||
| 105 | } | ||
| 106 | var esc string | ||
| 107 | switch val[i] { | ||
| 108 | case '\'': | ||
| 109 | // "'" is shorter than "'" and apos was not in HTML until HTML5. | ||
| 110 | esc = "'" | ||
| 111 | case '<': | ||
| 112 | esc = "<" | ||
| 113 | case '>': | ||
| 114 | esc = ">" | ||
| 115 | case '"': | ||
| 116 | // """ is shorter than """. | ||
| 117 | esc = """ | ||
| 118 | case '\r': | ||
| 119 | esc = " " | ||
| 120 | default: | ||
| 121 | panic("unrecognized escape character") | ||
| 122 | } | ||
| 123 | val = val[i+1:] | ||
| 124 | if _, err := w.WriteString(esc); err != nil { | ||
| 125 | return err | ||
| 126 | } | ||
| 127 | i = strings.IndexAny(val, escapedURLChars) | ||
| 128 | } | ||
| 129 | _, err := w.WriteString(val) | ||
| 130 | return err | ||
| 131 | } | ||
| 132 | |||
| 133 | // Query represents a single part of the query string, a query param | ||
| 134 | type Query struct { | ||
| 135 | Key string | ||
| 136 | Value string | ||
| 137 | HasValue bool | ||
| 138 | } | ||
| 139 | |||
| 140 | func parseQuery(query string) (values []Query, err error) { | ||
| 141 | // This is essentially a copy of parseQuery from | ||
| 142 | // https://golang.org/src/net/url/url.go but adjusted to build our values | ||
| 143 | // based on our type, which we need to preserve the ordering of the query | ||
| 144 | // string | ||
| 145 | for query != "" { | ||
| 146 | key := query | ||
| 147 | if i := strings.IndexAny(key, "&;"); i >= 0 { | ||
| 148 | key, query = key[:i], key[i+1:] | ||
| 149 | } else { | ||
| 150 | query = "" | ||
| 151 | } | ||
| 152 | if key == "" { | ||
| 153 | continue | ||
| 154 | } | ||
| 155 | value := "" | ||
| 156 | hasValue := false | ||
| 157 | if i := strings.Index(key, "="); i >= 0 { | ||
| 158 | key, value = key[:i], key[i+1:] | ||
| 159 | hasValue = true | ||
| 160 | } | ||
| 161 | key, err1 := url.QueryUnescape(key) | ||
| 162 | if err1 != nil { | ||
| 163 | if err == nil { | ||
| 164 | err = err1 | ||
| 165 | } | ||
| 166 | continue | ||
| 167 | } | ||
| 168 | value, err1 = url.QueryUnescape(value) | ||
| 169 | if err1 != nil { | ||
| 170 | if err == nil { | ||
| 171 | err = err1 | ||
| 172 | } | ||
| 173 | continue | ||
| 174 | } | ||
| 175 | values = append(values, Query{ | ||
| 176 | Key: key, | ||
| 177 | Value: value, | ||
| 178 | HasValue: hasValue, | ||
| 179 | }) | ||
| 180 | } | ||
| 181 | return values, err | ||
| 182 | } | ||
| 183 | |||
| 184 | func encodeQueries(queries []Query) string { | ||
| 185 | var buff bytes.Buffer | ||
| 186 | for i, query := range queries { | ||
| 187 | buff.WriteString(url.QueryEscape(query.Key)) | ||
| 188 | if query.HasValue { | ||
| 189 | buff.WriteString("=") | ||
| 190 | buff.WriteString(url.QueryEscape(query.Value)) | ||
| 191 | } | ||
| 192 | if i < len(queries)-1 { | ||
| 193 | buff.WriteString("&") | ||
| 194 | } | ||
| 195 | } | ||
| 196 | return buff.String() | ||
| 197 | } | ||
| 198 | |||
| 199 | func sanitizedURL(val string) (string, error) { | ||
| 200 | u, err := url.Parse(val) | ||
| 201 | if err != nil { | ||
| 202 | return "", err | ||
| 203 | } | ||
| 204 | |||
| 205 | // we use parseQuery but not u.Query to keep the order not change because | ||
| 206 | // url.Values is a map which has a random order. | ||
| 207 | queryValues, err := parseQuery(u.RawQuery) | ||
| 208 | if err != nil { | ||
| 209 | return "", err | ||
| 210 | } | ||
| 211 | // sanitize the url query params | ||
| 212 | for i, query := range queryValues { | ||
| 213 | queryValues[i].Key = html.EscapeString(query.Key) | ||
| 214 | } | ||
| 215 | u.RawQuery = encodeQueries(queryValues) | ||
| 216 | // u.String() will also sanitize host/scheme/user/pass | ||
| 217 | return u.String(), nil | ||
| 218 | } | ||
| 219 | |||
| 220 | // Performs the actual sanitization process. | ||
| 221 | func (p *Policy) sanitizeWithBuff(r io.Reader) *bytes.Buffer { | ||
| 222 | var buff bytes.Buffer | ||
| 223 | if err := p.sanitize(r, &buff); err != nil { | ||
| 224 | return &bytes.Buffer{} | ||
| 225 | } | ||
| 226 | return &buff | ||
| 227 | } | ||
| 228 | |||
| 229 | type asStringWriter struct { | ||
| 230 | io.Writer | ||
| 231 | } | ||
| 232 | |||
| 233 | func (a *asStringWriter) WriteString(s string) (int, error) { | ||
| 234 | return a.Write([]byte(s)) | ||
| 235 | } | ||
| 236 | |||
| 237 | func (p *Policy) sanitize(r io.Reader, w io.Writer) error { | ||
| 238 | // It is possible that the developer has created the policy via: | ||
| 239 | // p := bluemonday.Policy{} | ||
| 240 | // rather than: | ||
| 241 | // p := bluemonday.NewPolicy() | ||
| 242 | // If this is the case, and if they haven't yet triggered an action that | ||
| 243 | // would initialize the maps, then we need to do that. | ||
| 244 | p.init() | ||
| 245 | |||
| 246 | buff, ok := w.(stringWriterWriter) | ||
| 247 | if !ok { | ||
| 248 | buff = &asStringWriter{w} | ||
| 249 | } | ||
| 250 | |||
| 251 | var ( | ||
| 252 | skipElementContent bool | ||
| 253 | skippingElementsCount int64 | ||
| 254 | skipClosingTag bool | ||
| 255 | closingTagToSkipStack []string | ||
| 256 | mostRecentlyStartedToken string | ||
| 257 | ) | ||
| 258 | |||
| 259 | tokenizer := html.NewTokenizer(r) | ||
| 260 | for { | ||
| 261 | if tokenizer.Next() == html.ErrorToken { | ||
| 262 | err := tokenizer.Err() | ||
| 263 | if err == io.EOF { | ||
| 264 | // End of input means end of processing | ||
| 265 | return nil | ||
| 266 | } | ||
| 267 | |||
| 268 | // Raw tokenizer error | ||
| 269 | return err | ||
| 270 | } | ||
| 271 | |||
| 272 | token := tokenizer.Token() | ||
| 273 | switch token.Type { | ||
| 274 | case html.DoctypeToken: | ||
| 275 | |||
| 276 | // DocType is not handled as there is no safe parsing mechanism | ||
| 277 | // provided by golang.org/x/net/html for the content, and this can | ||
| 278 | // be misused to insert HTML tags that are not then sanitized | ||
| 279 | // | ||
| 280 | // One might wish to recursively sanitize here using the same policy | ||
| 281 | // but I will need to do some further testing before considering | ||
| 282 | // this. | ||
| 283 | |||
| 284 | case html.CommentToken: | ||
| 285 | |||
| 286 | // Comments are ignored by default | ||
| 287 | if p.allowComments { | ||
| 288 | // But if allowed then write the comment out as-is | ||
| 289 | buff.WriteString(token.String()) | ||
| 290 | } | ||
| 291 | |||
| 292 | case html.StartTagToken: | ||
| 293 | |||
| 294 | mostRecentlyStartedToken = normaliseElementName(token.Data) | ||
| 295 | |||
| 296 | switch normaliseElementName(token.Data) { | ||
| 297 | case `script`: | ||
| 298 | if !p.allowUnsafe { | ||
| 299 | continue | ||
| 300 | } | ||
| 301 | case `style`: | ||
| 302 | if !p.allowUnsafe { | ||
| 303 | continue | ||
| 304 | } | ||
| 305 | } | ||
| 306 | |||
| 307 | aps, ok := p.elsAndAttrs[token.Data] | ||
| 308 | if !ok { | ||
| 309 | aa, matched := p.matchRegex(token.Data) | ||
| 310 | if !matched { | ||
| 311 | if _, ok := p.setOfElementsToSkipContent[token.Data]; ok { | ||
| 312 | skipElementContent = true | ||
| 313 | skippingElementsCount++ | ||
| 314 | } | ||
| 315 | if p.addSpaces { | ||
| 316 | if _, err := buff.WriteString(" "); err != nil { | ||
| 317 | return err | ||
| 318 | } | ||
| 319 | } | ||
| 320 | break | ||
| 321 | } | ||
| 322 | aps = aa | ||
| 323 | } | ||
| 324 | if len(token.Attr) != 0 { | ||
| 325 | token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps) | ||
| 326 | } | ||
| 327 | |||
| 328 | if len(token.Attr) == 0 { | ||
| 329 | if !p.allowNoAttrs(token.Data) { | ||
| 330 | skipClosingTag = true | ||
| 331 | closingTagToSkipStack = append(closingTagToSkipStack, token.Data) | ||
| 332 | if p.addSpaces { | ||
| 333 | if _, err := buff.WriteString(" "); err != nil { | ||
| 334 | return err | ||
| 335 | } | ||
| 336 | } | ||
| 337 | break | ||
| 338 | } | ||
| 339 | } | ||
| 340 | |||
| 341 | if !skipElementContent { | ||
| 342 | if _, err := buff.WriteString(token.String()); err != nil { | ||
| 343 | return err | ||
| 344 | } | ||
| 345 | } | ||
| 346 | |||
| 347 | case html.EndTagToken: | ||
| 348 | |||
| 349 | if mostRecentlyStartedToken == normaliseElementName(token.Data) { | ||
| 350 | mostRecentlyStartedToken = "" | ||
| 351 | } | ||
| 352 | |||
| 353 | switch normaliseElementName(token.Data) { | ||
| 354 | case `script`: | ||
| 355 | if !p.allowUnsafe { | ||
| 356 | continue | ||
| 357 | } | ||
| 358 | case `style`: | ||
| 359 | if !p.allowUnsafe { | ||
| 360 | continue | ||
| 361 | } | ||
| 362 | } | ||
| 363 | |||
| 364 | if skipClosingTag && closingTagToSkipStack[len(closingTagToSkipStack)-1] == token.Data { | ||
| 365 | closingTagToSkipStack = closingTagToSkipStack[:len(closingTagToSkipStack)-1] | ||
| 366 | if len(closingTagToSkipStack) == 0 { | ||
| 367 | skipClosingTag = false | ||
| 368 | } | ||
| 369 | if p.addSpaces { | ||
| 370 | if _, err := buff.WriteString(" "); err != nil { | ||
| 371 | return err | ||
| 372 | } | ||
| 373 | } | ||
| 374 | break | ||
| 375 | } | ||
| 376 | if _, ok := p.elsAndAttrs[token.Data]; !ok { | ||
| 377 | match := false | ||
| 378 | for regex := range p.elsMatchingAndAttrs { | ||
| 379 | if regex.MatchString(token.Data) { | ||
| 380 | skipElementContent = false | ||
| 381 | match = true | ||
| 382 | break | ||
| 383 | } | ||
| 384 | } | ||
| 385 | if _, ok := p.setOfElementsToSkipContent[token.Data]; ok && !match { | ||
| 386 | skippingElementsCount-- | ||
| 387 | if skippingElementsCount == 0 { | ||
| 388 | skipElementContent = false | ||
| 389 | } | ||
| 390 | } | ||
| 391 | if !match { | ||
| 392 | if p.addSpaces { | ||
| 393 | if _, err := buff.WriteString(" "); err != nil { | ||
| 394 | return err | ||
| 395 | } | ||
| 396 | } | ||
| 397 | break | ||
| 398 | } | ||
| 399 | } | ||
| 400 | |||
| 401 | if !skipElementContent { | ||
| 402 | if _, err := buff.WriteString(token.String()); err != nil { | ||
| 403 | return err | ||
| 404 | } | ||
| 405 | } | ||
| 406 | |||
| 407 | case html.SelfClosingTagToken: | ||
| 408 | |||
| 409 | switch normaliseElementName(token.Data) { | ||
| 410 | case `script`: | ||
| 411 | if !p.allowUnsafe { | ||
| 412 | continue | ||
| 413 | } | ||
| 414 | case `style`: | ||
| 415 | if !p.allowUnsafe { | ||
| 416 | continue | ||
| 417 | } | ||
| 418 | } | ||
| 419 | |||
| 420 | aps, ok := p.elsAndAttrs[token.Data] | ||
| 421 | if !ok { | ||
| 422 | aa, matched := p.matchRegex(token.Data) | ||
| 423 | if !matched { | ||
| 424 | if p.addSpaces && !matched { | ||
| 425 | if _, err := buff.WriteString(" "); err != nil { | ||
| 426 | return err | ||
| 427 | } | ||
| 428 | } | ||
| 429 | break | ||
| 430 | } | ||
| 431 | aps = aa | ||
| 432 | } | ||
| 433 | |||
| 434 | if len(token.Attr) != 0 { | ||
| 435 | token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps) | ||
| 436 | } | ||
| 437 | |||
| 438 | if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) { | ||
| 439 | if p.addSpaces { | ||
| 440 | if _, err := buff.WriteString(" "); err != nil { | ||
| 441 | return err | ||
| 442 | } | ||
| 443 | } | ||
| 444 | break | ||
| 445 | } | ||
| 446 | if !skipElementContent { | ||
| 447 | if _, err := buff.WriteString(token.String()); err != nil { | ||
| 448 | return err | ||
| 449 | } | ||
| 450 | } | ||
| 451 | |||
| 452 | case html.TextToken: | ||
| 453 | |||
| 454 | if !skipElementContent { | ||
| 455 | switch mostRecentlyStartedToken { | ||
| 456 | case `script`: | ||
| 457 | // not encouraged, but if a policy allows JavaScript we | ||
| 458 | // should not HTML escape it as that would break the output | ||
| 459 | // | ||
| 460 | // requires p.AllowUnsafe() | ||
| 461 | if p.allowUnsafe { | ||
| 462 | if _, err := buff.WriteString(token.Data); err != nil { | ||
| 463 | return err | ||
| 464 | } | ||
| 465 | } | ||
| 466 | case "style": | ||
| 467 | // not encouraged, but if a policy allows CSS styles we | ||
| 468 | // should not HTML escape it as that would break the output | ||
| 469 | // | ||
| 470 | // requires p.AllowUnsafe() | ||
| 471 | if p.allowUnsafe { | ||
| 472 | if _, err := buff.WriteString(token.Data); err != nil { | ||
| 473 | return err | ||
| 474 | } | ||
| 475 | } | ||
| 476 | default: | ||
| 477 | // HTML escape the text | ||
| 478 | if _, err := buff.WriteString(token.String()); err != nil { | ||
| 479 | return err | ||
| 480 | } | ||
| 481 | } | ||
| 482 | } | ||
| 483 | |||
| 484 | default: | ||
| 485 | // A token that didn't exist in the html package when we wrote this | ||
| 486 | return fmt.Errorf("unknown token: %v", token) | ||
| 487 | } | ||
| 488 | } | ||
| 489 | } | ||
| 490 | |||
| 491 | // sanitizeAttrs takes a set of element attribute policies and the global | ||
| 492 | // attribute policies and applies them to the []html.Attribute returning a set | ||
| 493 | // of html.Attributes that match the policies | ||
| 494 | func (p *Policy) sanitizeAttrs( | ||
| 495 | elementName string, | ||
| 496 | attrs []html.Attribute, | ||
| 497 | aps map[string][]attrPolicy, | ||
| 498 | ) []html.Attribute { | ||
| 499 | |||
| 500 | if len(attrs) == 0 { | ||
| 501 | return attrs | ||
| 502 | } | ||
| 503 | |||
| 504 | hasStylePolicies := false | ||
| 505 | sps, elementHasStylePolicies := p.elsAndStyles[elementName] | ||
| 506 | if len(p.globalStyles) > 0 || (elementHasStylePolicies && len(sps) > 0) { | ||
| 507 | hasStylePolicies = true | ||
| 508 | } | ||
| 509 | // no specific element policy found, look for a pattern match | ||
| 510 | if !hasStylePolicies { | ||
| 511 | for k, v := range p.elsMatchingAndStyles { | ||
| 512 | if k.MatchString(elementName) { | ||
| 513 | if len(v) > 0 { | ||
| 514 | hasStylePolicies = true | ||
| 515 | break | ||
| 516 | } | ||
| 517 | } | ||
| 518 | } | ||
| 519 | } | ||
| 520 | |||
| 521 | // Builds a new attribute slice based on the whether the attribute has been | ||
| 522 | // allowed explicitly or globally. | ||
| 523 | cleanAttrs := []html.Attribute{} | ||
| 524 | attrsLoop: | ||
| 525 | for _, htmlAttr := range attrs { | ||
| 526 | if p.allowDataAttributes { | ||
| 527 | // If we see a data attribute, let it through. | ||
| 528 | if isDataAttribute(htmlAttr.Key) { | ||
| 529 | cleanAttrs = append(cleanAttrs, htmlAttr) | ||
| 530 | continue | ||
| 531 | } | ||
| 532 | } | ||
| 533 | // Is this a "style" attribute, and if so, do we need to sanitize it? | ||
| 534 | if htmlAttr.Key == "style" && hasStylePolicies { | ||
| 535 | htmlAttr = p.sanitizeStyles(htmlAttr, elementName) | ||
| 536 | if htmlAttr.Val == "" { | ||
| 537 | // We've sanitized away any and all styles; don't bother to | ||
| 538 | // output the style attribute (even if it's allowed) | ||
| 539 | continue | ||
| 540 | } else { | ||
| 541 | cleanAttrs = append(cleanAttrs, htmlAttr) | ||
| 542 | continue | ||
| 543 | } | ||
| 544 | } | ||
| 545 | |||
| 546 | // Is there an element specific attribute policy that applies? | ||
| 547 | if apl, ok := aps[htmlAttr.Key]; ok { | ||
| 548 | for _, ap := range apl { | ||
| 549 | if ap.regexp != nil { | ||
| 550 | if ap.regexp.MatchString(htmlAttr.Val) { | ||
| 551 | cleanAttrs = append(cleanAttrs, htmlAttr) | ||
| 552 | continue attrsLoop | ||
| 553 | } | ||
| 554 | } else { | ||
| 555 | cleanAttrs = append(cleanAttrs, htmlAttr) | ||
| 556 | continue attrsLoop | ||
| 557 | } | ||
| 558 | } | ||
| 559 | } | ||
| 560 | |||
| 561 | // Is there a global attribute policy that applies? | ||
| 562 | if apl, ok := p.globalAttrs[htmlAttr.Key]; ok { | ||
| 563 | for _, ap := range apl { | ||
| 564 | if ap.regexp != nil { | ||
| 565 | if ap.regexp.MatchString(htmlAttr.Val) { | ||
| 566 | cleanAttrs = append(cleanAttrs, htmlAttr) | ||
| 567 | } | ||
| 568 | } else { | ||
| 569 | cleanAttrs = append(cleanAttrs, htmlAttr) | ||
| 570 | } | ||
| 571 | } | ||
| 572 | } | ||
| 573 | } | ||
| 574 | |||
| 575 | if len(cleanAttrs) == 0 { | ||
| 576 | // If nothing was allowed, let's get out of here | ||
| 577 | return cleanAttrs | ||
| 578 | } | ||
| 579 | // cleanAttrs now contains the attributes that are permitted | ||
| 580 | |||
| 581 | if linkable(elementName) { | ||
| 582 | if p.requireParseableURLs { | ||
| 583 | // Ensure URLs are parseable: | ||
| 584 | // - a.href | ||
| 585 | // - area.href | ||
| 586 | // - link.href | ||
| 587 | // - blockquote.cite | ||
| 588 | // - q.cite | ||
| 589 | // - img.src | ||
| 590 | // - script.src | ||
| 591 | tmpAttrs := []html.Attribute{} | ||
| 592 | for _, htmlAttr := range cleanAttrs { | ||
| 593 | switch elementName { | ||
| 594 | case "a", "area", "base", "link": | ||
| 595 | if htmlAttr.Key == "href" { | ||
| 596 | if u, ok := p.validURL(htmlAttr.Val); ok { | ||
| 597 | htmlAttr.Val = u | ||
| 598 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 599 | } | ||
| 600 | break | ||
| 601 | } | ||
| 602 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 603 | case "blockquote", "del", "ins", "q": | ||
| 604 | if htmlAttr.Key == "cite" { | ||
| 605 | if u, ok := p.validURL(htmlAttr.Val); ok { | ||
| 606 | htmlAttr.Val = u | ||
| 607 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 608 | } | ||
| 609 | break | ||
| 610 | } | ||
| 611 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 612 | case "audio", "embed", "iframe", "img", "script", "source", "track", "video": | ||
| 613 | if htmlAttr.Key == "src" { | ||
| 614 | if u, ok := p.validURL(htmlAttr.Val); ok { | ||
| 615 | htmlAttr.Val = u | ||
| 616 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 617 | } | ||
| 618 | break | ||
| 619 | } | ||
| 620 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 621 | default: | ||
| 622 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 623 | } | ||
| 624 | } | ||
| 625 | cleanAttrs = tmpAttrs | ||
| 626 | } | ||
| 627 | |||
| 628 | if (p.requireNoFollow || | ||
| 629 | p.requireNoFollowFullyQualifiedLinks || | ||
| 630 | p.requireNoReferrer || | ||
| 631 | p.requireNoReferrerFullyQualifiedLinks || | ||
| 632 | p.addTargetBlankToFullyQualifiedLinks) && | ||
| 633 | len(cleanAttrs) > 0 { | ||
| 634 | |||
| 635 | // Add rel="nofollow" if a "href" exists | ||
| 636 | switch elementName { | ||
| 637 | case "a", "area", "base", "link": | ||
| 638 | var hrefFound bool | ||
| 639 | var externalLink bool | ||
| 640 | for _, htmlAttr := range cleanAttrs { | ||
| 641 | if htmlAttr.Key == "href" { | ||
| 642 | hrefFound = true | ||
| 643 | |||
| 644 | u, err := url.Parse(htmlAttr.Val) | ||
| 645 | if err != nil { | ||
| 646 | continue | ||
| 647 | } | ||
| 648 | if u.Host != "" { | ||
| 649 | externalLink = true | ||
| 650 | } | ||
| 651 | |||
| 652 | continue | ||
| 653 | } | ||
| 654 | } | ||
| 655 | |||
| 656 | if hrefFound { | ||
| 657 | var ( | ||
| 658 | noFollowFound bool | ||
| 659 | noReferrerFound bool | ||
| 660 | targetBlankFound bool | ||
| 661 | ) | ||
| 662 | |||
| 663 | addNoFollow := (p.requireNoFollow || | ||
| 664 | externalLink && p.requireNoFollowFullyQualifiedLinks) | ||
| 665 | |||
| 666 | addNoReferrer := (p.requireNoReferrer || | ||
| 667 | externalLink && p.requireNoReferrerFullyQualifiedLinks) | ||
| 668 | |||
| 669 | addTargetBlank := (externalLink && | ||
| 670 | p.addTargetBlankToFullyQualifiedLinks) | ||
| 671 | |||
| 672 | tmpAttrs := []html.Attribute{} | ||
| 673 | for _, htmlAttr := range cleanAttrs { | ||
| 674 | |||
| 675 | var appended bool | ||
| 676 | if htmlAttr.Key == "rel" && (addNoFollow || addNoReferrer) { | ||
| 677 | |||
| 678 | if addNoFollow && !strings.Contains(htmlAttr.Val, "nofollow") { | ||
| 679 | htmlAttr.Val += " nofollow" | ||
| 680 | } | ||
| 681 | if addNoReferrer && !strings.Contains(htmlAttr.Val, "noreferrer") { | ||
| 682 | htmlAttr.Val += " noreferrer" | ||
| 683 | } | ||
| 684 | noFollowFound = addNoFollow | ||
| 685 | noReferrerFound = addNoReferrer | ||
| 686 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 687 | appended = true | ||
| 688 | } | ||
| 689 | |||
| 690 | if elementName == "a" && htmlAttr.Key == "target" { | ||
| 691 | if htmlAttr.Val == "_blank" { | ||
| 692 | targetBlankFound = true | ||
| 693 | } | ||
| 694 | if addTargetBlank && !targetBlankFound { | ||
| 695 | htmlAttr.Val = "_blank" | ||
| 696 | targetBlankFound = true | ||
| 697 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 698 | appended = true | ||
| 699 | } | ||
| 700 | } | ||
| 701 | |||
| 702 | if !appended { | ||
| 703 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 704 | } | ||
| 705 | } | ||
| 706 | if noFollowFound || noReferrerFound || targetBlankFound { | ||
| 707 | cleanAttrs = tmpAttrs | ||
| 708 | } | ||
| 709 | |||
| 710 | if (addNoFollow && !noFollowFound) || (addNoReferrer && !noReferrerFound) { | ||
| 711 | rel := html.Attribute{} | ||
| 712 | rel.Key = "rel" | ||
| 713 | if addNoFollow { | ||
| 714 | rel.Val = "nofollow" | ||
| 715 | } | ||
| 716 | if addNoReferrer { | ||
| 717 | if rel.Val != "" { | ||
| 718 | rel.Val += " " | ||
| 719 | } | ||
| 720 | rel.Val += "noreferrer" | ||
| 721 | } | ||
| 722 | cleanAttrs = append(cleanAttrs, rel) | ||
| 723 | } | ||
| 724 | |||
| 725 | if elementName == "a" && addTargetBlank && !targetBlankFound { | ||
| 726 | rel := html.Attribute{} | ||
| 727 | rel.Key = "target" | ||
| 728 | rel.Val = "_blank" | ||
| 729 | targetBlankFound = true | ||
| 730 | cleanAttrs = append(cleanAttrs, rel) | ||
| 731 | } | ||
| 732 | |||
| 733 | if targetBlankFound { | ||
| 734 | // target="_blank" has a security risk that allows the | ||
| 735 | // opened window/tab to issue JavaScript calls against | ||
| 736 | // window.opener, which in effect allow the destination | ||
| 737 | // of the link to control the source: | ||
| 738 | // https://dev.to/ben/the-targetblank-vulnerability-by-example | ||
| 739 | // | ||
| 740 | // To mitigate this risk, we need to add a specific rel | ||
| 741 | // attribute if it is not already present. | ||
| 742 | // rel="noopener" | ||
| 743 | // | ||
| 744 | // Unfortunately this is processing the rel twice (we | ||
| 745 | // already looked at it earlier ^^) as we cannot be sure | ||
| 746 | // of the ordering of the href and rel, and whether we | ||
| 747 | // have fully satisfied that we need to do this. This | ||
| 748 | // double processing only happens *if* target="_blank" | ||
| 749 | // is true. | ||
| 750 | var noOpenerAdded bool | ||
| 751 | tmpAttrs := []html.Attribute{} | ||
| 752 | for _, htmlAttr := range cleanAttrs { | ||
| 753 | var appended bool | ||
| 754 | if htmlAttr.Key == "rel" { | ||
| 755 | if strings.Contains(htmlAttr.Val, "noopener") { | ||
| 756 | noOpenerAdded = true | ||
| 757 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 758 | } else { | ||
| 759 | htmlAttr.Val += " noopener" | ||
| 760 | noOpenerAdded = true | ||
| 761 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 762 | } | ||
| 763 | |||
| 764 | appended = true | ||
| 765 | } | ||
| 766 | if !appended { | ||
| 767 | tmpAttrs = append(tmpAttrs, htmlAttr) | ||
| 768 | } | ||
| 769 | } | ||
| 770 | if noOpenerAdded { | ||
| 771 | cleanAttrs = tmpAttrs | ||
| 772 | } else { | ||
| 773 | // rel attr was not found, or else noopener would | ||
| 774 | // have been added already | ||
| 775 | rel := html.Attribute{} | ||
| 776 | rel.Key = "rel" | ||
| 777 | rel.Val = "noopener" | ||
| 778 | cleanAttrs = append(cleanAttrs, rel) | ||
| 779 | } | ||
| 780 | |||
| 781 | } | ||
| 782 | } | ||
| 783 | default: | ||
| 784 | } | ||
| 785 | } | ||
| 786 | } | ||
| 787 | |||
| 788 | if p.requireCrossOriginAnonymous && len(cleanAttrs) > 0 { | ||
| 789 | switch elementName { | ||
| 790 | case "audio", "img", "link", "script", "video": | ||
| 791 | var crossOriginFound bool | ||
| 792 | for _, htmlAttr := range cleanAttrs { | ||
| 793 | if htmlAttr.Key == "crossorigin" { | ||
| 794 | crossOriginFound = true | ||
| 795 | htmlAttr.Val = "anonymous" | ||
| 796 | } | ||
| 797 | } | ||
| 798 | |||
| 799 | if !crossOriginFound { | ||
| 800 | crossOrigin := html.Attribute{} | ||
| 801 | crossOrigin.Key = "crossorigin" | ||
| 802 | crossOrigin.Val = "anonymous" | ||
| 803 | cleanAttrs = append(cleanAttrs, crossOrigin) | ||
| 804 | } | ||
| 805 | } | ||
| 806 | } | ||
| 807 | |||
| 808 | if p.requireSandboxOnIFrame != nil && elementName == "iframe" { | ||
| 809 | var sandboxFound bool | ||
| 810 | for i, htmlAttr := range cleanAttrs { | ||
| 811 | if htmlAttr.Key == "sandbox" { | ||
| 812 | sandboxFound = true | ||
| 813 | var cleanVals []string | ||
| 814 | cleanValsSet := make(map[string]bool) | ||
| 815 | for _, val := range strings.Fields(htmlAttr.Val) { | ||
| 816 | if p.requireSandboxOnIFrame[val] { | ||
| 817 | if !cleanValsSet[val] { | ||
| 818 | cleanVals = append(cleanVals, val) | ||
| 819 | cleanValsSet[val] = true | ||
| 820 | } | ||
| 821 | } | ||
| 822 | } | ||
| 823 | cleanAttrs[i].Val = strings.Join(cleanVals, " ") | ||
| 824 | } | ||
| 825 | } | ||
| 826 | |||
| 827 | if !sandboxFound { | ||
| 828 | sandbox := html.Attribute{} | ||
| 829 | sandbox.Key = "sandbox" | ||
| 830 | sandbox.Val = "" | ||
| 831 | cleanAttrs = append(cleanAttrs, sandbox) | ||
| 832 | } | ||
| 833 | } | ||
| 834 | |||
| 835 | return cleanAttrs | ||
| 836 | } | ||
| 837 | |||
| 838 | func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.Attribute { | ||
| 839 | sps := p.elsAndStyles[elementName] | ||
| 840 | if len(sps) == 0 { | ||
| 841 | sps = map[string][]stylePolicy{} | ||
| 842 | // check for any matching elements, if we don't already have a policy found | ||
| 843 | // if multiple matches are found they will be overwritten, it's best | ||
| 844 | // to not have overlapping matchers | ||
| 845 | for regex, policies := range p.elsMatchingAndStyles { | ||
| 846 | if regex.MatchString(elementName) { | ||
| 847 | for k, v := range policies { | ||
| 848 | sps[k] = append(sps[k], v...) | ||
| 849 | } | ||
| 850 | } | ||
| 851 | } | ||
| 852 | } | ||
| 853 | |||
| 854 | //Add semi-colon to end to fix parsing issue | ||
| 855 | attr.Val = strings.TrimRight(attr.Val, " ") | ||
| 856 | if len(attr.Val) > 0 && attr.Val[len(attr.Val)-1] != ';' { | ||
| 857 | attr.Val = attr.Val + ";" | ||
| 858 | } | ||
| 859 | decs, err := parser.ParseDeclarations(attr.Val) | ||
| 860 | if err != nil { | ||
| 861 | attr.Val = "" | ||
| 862 | return attr | ||
| 863 | } | ||
| 864 | clean := []string{} | ||
| 865 | prefixes := []string{"-webkit-", "-moz-", "-ms-", "-o-", "mso-", "-xv-", "-atsc-", "-wap-", "-khtml-", "prince-", "-ah-", "-hp-", "-ro-", "-rim-", "-tc-"} | ||
| 866 | |||
| 867 | decLoop: | ||
| 868 | for _, dec := range decs { | ||
| 869 | tempProperty := strings.ToLower(dec.Property) | ||
| 870 | tempValue := removeUnicode(strings.ToLower(dec.Value)) | ||
| 871 | for _, i := range prefixes { | ||
| 872 | tempProperty = strings.TrimPrefix(tempProperty, i) | ||
| 873 | } | ||
| 874 | if spl, ok := sps[tempProperty]; ok { | ||
| 875 | for _, sp := range spl { | ||
| 876 | if sp.handler != nil { | ||
| 877 | if sp.handler(tempValue) { | ||
| 878 | clean = append(clean, dec.Property+": "+dec.Value) | ||
| 879 | continue decLoop | ||
| 880 | } | ||
| 881 | } else if len(sp.enum) > 0 { | ||
| 882 | if stringInSlice(tempValue, sp.enum) { | ||
| 883 | clean = append(clean, dec.Property+": "+dec.Value) | ||
| 884 | continue decLoop | ||
| 885 | } | ||
| 886 | } else if sp.regexp != nil { | ||
| 887 | if sp.regexp.MatchString(tempValue) { | ||
| 888 | clean = append(clean, dec.Property+": "+dec.Value) | ||
| 889 | continue decLoop | ||
| 890 | } | ||
| 891 | } | ||
| 892 | } | ||
| 893 | } | ||
| 894 | if spl, ok := p.globalStyles[tempProperty]; ok { | ||
| 895 | for _, sp := range spl { | ||
| 896 | if sp.handler != nil { | ||
| 897 | if sp.handler(tempValue) { | ||
| 898 | clean = append(clean, dec.Property+": "+dec.Value) | ||
| 899 | continue decLoop | ||
| 900 | } | ||
| 901 | } else if len(sp.enum) > 0 { | ||
| 902 | if stringInSlice(tempValue, sp.enum) { | ||
| 903 | clean = append(clean, dec.Property+": "+dec.Value) | ||
| 904 | continue decLoop | ||
| 905 | } | ||
| 906 | } else if sp.regexp != nil { | ||
| 907 | if sp.regexp.MatchString(tempValue) { | ||
| 908 | clean = append(clean, dec.Property+": "+dec.Value) | ||
| 909 | continue decLoop | ||
| 910 | } | ||
| 911 | } | ||
| 912 | } | ||
| 913 | } | ||
| 914 | } | ||
| 915 | if len(clean) > 0 { | ||
| 916 | attr.Val = strings.Join(clean, "; ") | ||
| 917 | } else { | ||
| 918 | attr.Val = "" | ||
| 919 | } | ||
| 920 | return attr | ||
| 921 | } | ||
| 922 | |||
| 923 | func (p *Policy) allowNoAttrs(elementName string) bool { | ||
| 924 | _, ok := p.setOfElementsAllowedWithoutAttrs[elementName] | ||
| 925 | if !ok { | ||
| 926 | for _, r := range p.setOfElementsMatchingAllowedWithoutAttrs { | ||
| 927 | if r.MatchString(elementName) { | ||
| 928 | ok = true | ||
| 929 | break | ||
| 930 | } | ||
| 931 | } | ||
| 932 | } | ||
| 933 | return ok | ||
| 934 | } | ||
| 935 | |||
| 936 | func (p *Policy) validURL(rawurl string) (string, bool) { | ||
| 937 | if p.requireParseableURLs { | ||
| 938 | // URLs are valid if when space is trimmed the URL is valid | ||
| 939 | rawurl = strings.TrimSpace(rawurl) | ||
| 940 | |||
| 941 | // URLs cannot contain whitespace, unless it is a data-uri | ||
| 942 | if strings.Contains(rawurl, " ") || | ||
| 943 | strings.Contains(rawurl, "\t") || | ||
| 944 | strings.Contains(rawurl, "\n") { | ||
| 945 | if !strings.HasPrefix(rawurl, `data:`) { | ||
| 946 | return "", false | ||
| 947 | } | ||
| 948 | |||
| 949 | // Remove \r and \n from base64 encoded data to pass url.Parse. | ||
| 950 | matched := dataURIbase64Prefix.FindString(rawurl) | ||
| 951 | if matched != "" { | ||
| 952 | rawurl = matched + strings.Replace( | ||
| 953 | strings.Replace( | ||
| 954 | rawurl[len(matched):], | ||
| 955 | "\r", | ||
| 956 | "", | ||
| 957 | -1, | ||
| 958 | ), | ||
| 959 | "\n", | ||
| 960 | "", | ||
| 961 | -1, | ||
| 962 | ) | ||
| 963 | } | ||
| 964 | } | ||
| 965 | |||
| 966 | // URLs are valid if they parse | ||
| 967 | u, err := url.Parse(rawurl) | ||
| 968 | if err != nil { | ||
| 969 | return "", false | ||
| 970 | } | ||
| 971 | |||
| 972 | if u.Scheme != "" { | ||
| 973 | for _, r := range p.allowURLSchemeRegexps { | ||
| 974 | if r.MatchString(u.Scheme) { | ||
| 975 | return u.String(), true | ||
| 976 | } | ||
| 977 | } | ||
| 978 | |||
| 979 | urlPolicies, ok := p.allowURLSchemes[u.Scheme] | ||
| 980 | if !ok { | ||
| 981 | return "", false | ||
| 982 | } | ||
| 983 | |||
| 984 | if len(urlPolicies) == 0 { | ||
| 985 | return u.String(), true | ||
| 986 | } | ||
| 987 | |||
| 988 | for _, urlPolicy := range urlPolicies { | ||
| 989 | if urlPolicy(u) == true { | ||
| 990 | return u.String(), true | ||
| 991 | } | ||
| 992 | } | ||
| 993 | |||
| 994 | return "", false | ||
| 995 | } | ||
| 996 | |||
| 997 | if p.allowRelativeURLs { | ||
| 998 | if u.String() != "" { | ||
| 999 | return u.String(), true | ||
| 1000 | } | ||
| 1001 | } | ||
| 1002 | |||
| 1003 | return "", false | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | return rawurl, true | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | func linkable(elementName string) bool { | ||
| 1010 | switch elementName { | ||
| 1011 | case "a", "area", "base", "link": | ||
| 1012 | // elements that allow .href | ||
| 1013 | return true | ||
| 1014 | case "blockquote", "del", "ins", "q": | ||
| 1015 | // elements that allow .cite | ||
| 1016 | return true | ||
| 1017 | case "audio", "embed", "iframe", "img", "input", "script", "track", "video": | ||
| 1018 | // elements that allow .src | ||
| 1019 | return true | ||
| 1020 | default: | ||
| 1021 | return false | ||
| 1022 | } | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | // stringInSlice returns true if needle exists in haystack | ||
| 1026 | func stringInSlice(needle string, haystack []string) bool { | ||
| 1027 | for _, straw := range haystack { | ||
| 1028 | if strings.ToLower(straw) == strings.ToLower(needle) { | ||
| 1029 | return true | ||
| 1030 | } | ||
| 1031 | } | ||
| 1032 | return false | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | func isDataAttribute(val string) bool { | ||
| 1036 | if !dataAttribute.MatchString(val) { | ||
| 1037 | return false | ||
| 1038 | } | ||
| 1039 | rest := strings.Split(val, "data-") | ||
| 1040 | if len(rest) == 1 { | ||
| 1041 | return false | ||
| 1042 | } | ||
| 1043 | // data-xml* is invalid. | ||
| 1044 | if dataAttributeXMLPrefix.MatchString(rest[1]) { | ||
| 1045 | return false | ||
| 1046 | } | ||
| 1047 | // no uppercase or semi-colons allowed. | ||
| 1048 | if dataAttributeInvalidChars.MatchString(rest[1]) { | ||
| 1049 | return false | ||
| 1050 | } | ||
| 1051 | return true | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | func removeUnicode(value string) string { | ||
| 1055 | substitutedValue := value | ||
| 1056 | currentLoc := cssUnicodeChar.FindStringIndex(substitutedValue) | ||
| 1057 | for currentLoc != nil { | ||
| 1058 | |||
| 1059 | character := substitutedValue[currentLoc[0]+1 : currentLoc[1]] | ||
| 1060 | character = strings.TrimSpace(character) | ||
| 1061 | if len(character) < 4 { | ||
| 1062 | character = strings.Repeat("0", 4-len(character)) + character | ||
| 1063 | } else { | ||
| 1064 | for len(character) > 4 { | ||
| 1065 | if character[0] != '0' { | ||
| 1066 | character = "" | ||
| 1067 | break | ||
| 1068 | } else { | ||
| 1069 | character = character[1:] | ||
| 1070 | } | ||
| 1071 | } | ||
| 1072 | } | ||
| 1073 | character = "\\u" + character | ||
| 1074 | translatedChar, err := strconv.Unquote(`"` + character + `"`) | ||
| 1075 | translatedChar = strings.TrimSpace(translatedChar) | ||
| 1076 | if err != nil { | ||
| 1077 | return "" | ||
| 1078 | } | ||
| 1079 | substitutedValue = substitutedValue[0:currentLoc[0]] + translatedChar + substitutedValue[currentLoc[1]:] | ||
| 1080 | currentLoc = cssUnicodeChar.FindStringIndex(substitutedValue) | ||
| 1081 | } | ||
| 1082 | return substitutedValue | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | func (p *Policy) matchRegex(elementName string) (map[string][]attrPolicy, bool) { | ||
| 1086 | aps := make(map[string][]attrPolicy, 0) | ||
| 1087 | matched := false | ||
| 1088 | for regex, attrs := range p.elsMatchingAndAttrs { | ||
| 1089 | if regex.MatchString(elementName) { | ||
| 1090 | matched = true | ||
| 1091 | for k, v := range attrs { | ||
| 1092 | aps[k] = append(aps[k], v...) | ||
| 1093 | } | ||
| 1094 | } | ||
| 1095 | } | ||
| 1096 | return aps, matched | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | // normaliseElementName takes a HTML element like <script> which is user input | ||
| 1100 | // and returns a lower case version of it that is immune to UTF-8 to ASCII | ||
| 1101 | // conversion tricks (like the use of upper case cyrillic i scrİpt which a | ||
| 1102 | // strings.ToLower would convert to script). Instead this func will preserve | ||
| 1103 | // all non-ASCII as their escaped equivalent, i.e. \u0130 which reveals the | ||
| 1104 | // characters when lower cased | ||
| 1105 | func normaliseElementName(str string) string { | ||
| 1106 | // that useful QuoteToASCII put quote marks at the start and end | ||
| 1107 | // so those are trimmed off | ||
| 1108 | return strings.TrimSuffix( | ||
| 1109 | strings.TrimPrefix( | ||
| 1110 | strings.ToLower( | ||
| 1111 | strconv.QuoteToASCII(str), | ||
| 1112 | ), | ||
| 1113 | `"`), | ||
| 1114 | `"`, | ||
| 1115 | ) | ||
| 1116 | } | ||
