diff options
Diffstat (limited to 'vendor/github.com/microcosm-cc/bluemonday/helpers.go')
| -rw-r--r-- | vendor/github.com/microcosm-cc/bluemonday/helpers.go | 304 |
1 files changed, 304 insertions, 0 deletions
diff --git a/vendor/github.com/microcosm-cc/bluemonday/helpers.go b/vendor/github.com/microcosm-cc/bluemonday/helpers.go new file mode 100644 index 0000000..2b03d7e --- /dev/null +++ b/vendor/github.com/microcosm-cc/bluemonday/helpers.go | |||
| @@ -0,0 +1,304 @@ | |||
| 1 | // Copyright (c) 2014, David Kitchen <david@buro9.com> | ||
| 2 | // | ||
| 3 | // All rights reserved. | ||
| 4 | // | ||
| 5 | // Redistribution and use in source and binary forms, with or without | ||
| 6 | // modification, are permitted provided that the following conditions are met: | ||
| 7 | // | ||
| 8 | // * Redistributions of source code must retain the above copyright notice, this | ||
| 9 | // list of conditions and the following disclaimer. | ||
| 10 | // | ||
| 11 | // * Redistributions in binary form must reproduce the above copyright notice, | ||
| 12 | // this list of conditions and the following disclaimer in the documentation | ||
| 13 | // and/or other materials provided with the distribution. | ||
| 14 | // | ||
| 15 | // * Neither the name of the organisation (Microcosm) nor the names of its | ||
| 16 | // contributors may be used to endorse or promote products derived from | ||
| 17 | // this software without specific prior written permission. | ||
| 18 | // | ||
| 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
| 20 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 21 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 22 | // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
| 23 | // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 24 | // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 25 | // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
| 26 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| 27 | // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 29 | |||
| 30 | package bluemonday | ||
| 31 | |||
| 32 | import ( | ||
| 33 | "encoding/base64" | ||
| 34 | "net/url" | ||
| 35 | "regexp" | ||
| 36 | ) | ||
| 37 | |||
| 38 | // A selection of regular expressions that can be used as .Matching() rules on | ||
| 39 | // HTML attributes. | ||
| 40 | var ( | ||
| 41 | // CellAlign handles the `align` attribute | ||
| 42 | // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align | ||
| 43 | CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`) | ||
| 44 | |||
| 45 | // CellVerticalAlign handles the `valign` attribute | ||
| 46 | // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign | ||
| 47 | CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`) | ||
| 48 | |||
| 49 | // Direction handles the `dir` attribute | ||
| 50 | // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir | ||
| 51 | Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`) | ||
| 52 | |||
| 53 | // ImageAlign handles the `align` attribute on the `image` tag | ||
| 54 | // http://www.w3.org/MarkUp/Test/Img/imgtest.html | ||
| 55 | ImageAlign = regexp.MustCompile( | ||
| 56 | `(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`, | ||
| 57 | ) | ||
| 58 | |||
| 59 | // Integer describes whole positive integers (including 0) used in places | ||
| 60 | // like td.colspan | ||
| 61 | // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan | ||
| 62 | Integer = regexp.MustCompile(`^[0-9]+$`) | ||
| 63 | |||
| 64 | // ISO8601 according to the W3 group is only a subset of the ISO8601 | ||
| 65 | // standard: http://www.w3.org/TR/NOTE-datetime | ||
| 66 | // | ||
| 67 | // Used in places like time.datetime | ||
| 68 | // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime | ||
| 69 | // | ||
| 70 | // Matches patterns: | ||
| 71 | // Year: | ||
| 72 | // YYYY (eg 1997) | ||
| 73 | // Year and month: | ||
| 74 | // YYYY-MM (eg 1997-07) | ||
| 75 | // Complete date: | ||
| 76 | // YYYY-MM-DD (eg 1997-07-16) | ||
| 77 | // Complete date plus hours and minutes: | ||
| 78 | // YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00) | ||
| 79 | // Complete date plus hours, minutes and seconds: | ||
| 80 | // YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) | ||
| 81 | // Complete date plus hours, minutes, seconds and a decimal fraction of a | ||
| 82 | // second | ||
| 83 | // YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00) | ||
| 84 | ISO8601 = regexp.MustCompile( | ||
| 85 | `^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` + | ||
| 86 | `?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`, | ||
| 87 | ) | ||
| 88 | |||
| 89 | // ListType encapsulates the common value as well as the latest spec | ||
| 90 | // values for lists | ||
| 91 | // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type | ||
| 92 | ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`) | ||
| 93 | |||
| 94 | // SpaceSeparatedTokens is used in places like `a.rel` and the common attribute | ||
| 95 | // `class` which both contain space delimited lists of data tokens | ||
| 96 | // http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def | ||
| 97 | // Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers | ||
| 98 | SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`) | ||
| 99 | |||
| 100 | // Number is a double value used on HTML5 meter and progress elements | ||
| 101 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element | ||
| 102 | Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`) | ||
| 103 | |||
| 104 | // NumberOrPercent is used predominantly as units of measurement in width | ||
| 105 | // and height attributes | ||
| 106 | // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height | ||
| 107 | NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`) | ||
| 108 | |||
| 109 | // Paragraph of text in an attribute such as *.'title', img.alt, etc | ||
| 110 | // https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title | ||
| 111 | // Note that we are not allowing chars that could close tags like '>' | ||
| 112 | Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`) | ||
| 113 | |||
| 114 | // dataURIImagePrefix is used by AllowDataURIImages to define the acceptable | ||
| 115 | // prefix of data URIs that contain common web image formats. | ||
| 116 | // | ||
| 117 | // This is not exported as it's not useful by itself, and only has value | ||
| 118 | // within the AllowDataURIImages func | ||
| 119 | dataURIImagePrefix = regexp.MustCompile( | ||
| 120 | `^image/(gif|jpeg|png|svg\+xml|webp);base64,`, | ||
| 121 | ) | ||
| 122 | ) | ||
| 123 | |||
| 124 | // AllowStandardURLs is a convenience function that will enable rel="nofollow" | ||
| 125 | // on "a", "area" and "link" (if you have allowed those elements) and will | ||
| 126 | // ensure that the URL values are parseable and either relative or belong to the | ||
| 127 | // "mailto", "http", or "https" schemes | ||
| 128 | func (p *Policy) AllowStandardURLs() { | ||
| 129 | // URLs must be parseable by net/url.Parse() | ||
| 130 | p.RequireParseableURLs(true) | ||
| 131 | |||
| 132 | // !url.IsAbs() is permitted | ||
| 133 | p.AllowRelativeURLs(true) | ||
| 134 | |||
| 135 | // Most common URL schemes only | ||
| 136 | p.AllowURLSchemes("mailto", "http", "https") | ||
| 137 | |||
| 138 | // For linking elements we will add rel="nofollow" if it does not already exist | ||
| 139 | // This applies to "a" "area" "link" | ||
| 140 | p.RequireNoFollowOnLinks(true) | ||
| 141 | } | ||
| 142 | |||
| 143 | // AllowStandardAttributes will enable "id", "title" and the language specific | ||
| 144 | // attributes "dir" and "lang" on all elements that are allowed | ||
| 145 | func (p *Policy) AllowStandardAttributes() { | ||
| 146 | // "dir" "lang" are permitted as both language attributes affect charsets | ||
| 147 | // and direction of text. | ||
| 148 | p.AllowAttrs("dir").Matching(Direction).Globally() | ||
| 149 | p.AllowAttrs( | ||
| 150 | "lang", | ||
| 151 | ).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally() | ||
| 152 | |||
| 153 | // "id" is permitted. This is pretty much as some HTML elements require this | ||
| 154 | // to work well ("dfn" is an example of a "id" being value) | ||
| 155 | // This does create a risk that JavaScript and CSS within your web page | ||
| 156 | // might identify the wrong elements. Ensure that you select things | ||
| 157 | // accurately | ||
| 158 | p.AllowAttrs("id").Matching( | ||
| 159 | regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`), | ||
| 160 | ).Globally() | ||
| 161 | |||
| 162 | // "title" is permitted as it improves accessibility. | ||
| 163 | p.AllowAttrs("title").Matching(Paragraph).Globally() | ||
| 164 | } | ||
| 165 | |||
| 166 | // AllowStyling presently enables the class attribute globally. | ||
| 167 | // | ||
| 168 | // Note: When bluemonday ships a CSS parser and we can safely sanitise that, | ||
| 169 | // this will also allow sanitized styling of elements via the style attribute. | ||
| 170 | func (p *Policy) AllowStyling() { | ||
| 171 | |||
| 172 | // "class" is permitted globally | ||
| 173 | p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally() | ||
| 174 | } | ||
| 175 | |||
| 176 | // AllowImages enables the img element and some popular attributes. It will also | ||
| 177 | // ensure that URL values are parseable. This helper does not enable data URI | ||
| 178 | // images, for that you should also use the AllowDataURIImages() helper. | ||
| 179 | func (p *Policy) AllowImages() { | ||
| 180 | |||
| 181 | // "img" is permitted | ||
| 182 | p.AllowAttrs("align").Matching(ImageAlign).OnElements("img") | ||
| 183 | p.AllowAttrs("alt").Matching(Paragraph).OnElements("img") | ||
| 184 | p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img") | ||
| 185 | |||
| 186 | // Standard URLs enabled | ||
| 187 | p.AllowStandardURLs() | ||
| 188 | p.AllowAttrs("src").OnElements("img") | ||
| 189 | } | ||
| 190 | |||
| 191 | // AllowDataURIImages permits the use of inline images defined in RFC2397 | ||
| 192 | // http://tools.ietf.org/html/rfc2397 | ||
| 193 | // http://en.wikipedia.org/wiki/Data_URI_scheme | ||
| 194 | // | ||
| 195 | // Images must have a mimetype matching: | ||
| 196 | // | ||
| 197 | // image/gif | ||
| 198 | // image/jpeg | ||
| 199 | // image/png | ||
| 200 | // image/webp | ||
| 201 | // | ||
| 202 | // NOTE: There is a potential security risk to allowing data URIs and you should | ||
| 203 | // only permit them on content you already trust. | ||
| 204 | // http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/ | ||
| 205 | // https://capec.mitre.org/data/definitions/244.html | ||
| 206 | func (p *Policy) AllowDataURIImages() { | ||
| 207 | |||
| 208 | // URLs must be parseable by net/url.Parse() | ||
| 209 | p.RequireParseableURLs(true) | ||
| 210 | |||
| 211 | // Supply a function to validate images contained within data URI | ||
| 212 | p.AllowURLSchemeWithCustomPolicy( | ||
| 213 | "data", | ||
| 214 | func(url *url.URL) (allowUrl bool) { | ||
| 215 | if url.RawQuery != "" || url.Fragment != "" { | ||
| 216 | return false | ||
| 217 | } | ||
| 218 | |||
| 219 | matched := dataURIImagePrefix.FindString(url.Opaque) | ||
| 220 | if matched == "" { | ||
| 221 | return false | ||
| 222 | } | ||
| 223 | |||
| 224 | _, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):]) | ||
| 225 | if err != nil { | ||
| 226 | return false | ||
| 227 | } | ||
| 228 | |||
| 229 | return true | ||
| 230 | }, | ||
| 231 | ) | ||
| 232 | } | ||
| 233 | |||
| 234 | // AllowLists will enabled ordered and unordered lists, as well as definition | ||
| 235 | // lists | ||
| 236 | func (p *Policy) AllowLists() { | ||
| 237 | // "ol" "ul" are permitted | ||
| 238 | p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul") | ||
| 239 | |||
| 240 | // "li" is permitted | ||
| 241 | p.AllowAttrs("type").Matching(ListType).OnElements("li") | ||
| 242 | p.AllowAttrs("value").Matching(Integer).OnElements("li") | ||
| 243 | |||
| 244 | // "dl" "dt" "dd" are permitted | ||
| 245 | p.AllowElements("dl", "dt", "dd") | ||
| 246 | } | ||
| 247 | |||
| 248 | // AllowTables will enable a rich set of elements and attributes to describe | ||
| 249 | // HTML tables | ||
| 250 | func (p *Policy) AllowTables() { | ||
| 251 | |||
| 252 | // "table" is permitted | ||
| 253 | p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table") | ||
| 254 | p.AllowAttrs("summary").Matching(Paragraph).OnElements("table") | ||
| 255 | |||
| 256 | // "caption" is permitted | ||
| 257 | p.AllowElements("caption") | ||
| 258 | |||
| 259 | // "col" "colgroup" are permitted | ||
| 260 | p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup") | ||
| 261 | p.AllowAttrs("height", "width").Matching( | ||
| 262 | NumberOrPercent, | ||
| 263 | ).OnElements("col", "colgroup") | ||
| 264 | p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col") | ||
| 265 | p.AllowAttrs("valign").Matching( | ||
| 266 | CellVerticalAlign, | ||
| 267 | ).OnElements("col", "colgroup") | ||
| 268 | |||
| 269 | // "thead" "tr" are permitted | ||
| 270 | p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr") | ||
| 271 | p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr") | ||
| 272 | |||
| 273 | // "td" "th" are permitted | ||
| 274 | p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th") | ||
| 275 | p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th") | ||
| 276 | p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th") | ||
| 277 | p.AllowAttrs("headers").Matching( | ||
| 278 | SpaceSeparatedTokens, | ||
| 279 | ).OnElements("td", "th") | ||
| 280 | p.AllowAttrs("height", "width").Matching( | ||
| 281 | NumberOrPercent, | ||
| 282 | ).OnElements("td", "th") | ||
| 283 | p.AllowAttrs( | ||
| 284 | "scope", | ||
| 285 | ).Matching( | ||
| 286 | regexp.MustCompile(`(?i)(?:row|col)(?:group)?`), | ||
| 287 | ).OnElements("td", "th") | ||
| 288 | p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th") | ||
| 289 | p.AllowAttrs("nowrap").Matching( | ||
| 290 | regexp.MustCompile(`(?i)|nowrap`), | ||
| 291 | ).OnElements("td", "th") | ||
| 292 | |||
| 293 | // "tbody" "tfoot" | ||
| 294 | p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot") | ||
| 295 | p.AllowAttrs("valign").Matching( | ||
| 296 | CellVerticalAlign, | ||
| 297 | ).OnElements("tbody", "tfoot") | ||
| 298 | } | ||
| 299 | |||
| 300 | func (p *Policy) AllowIFrames(vals ...SandboxValue) { | ||
| 301 | p.AllowAttrs("sandbox").OnElements("iframe") | ||
| 302 | |||
| 303 | p.RequireSandboxOnIFrame(vals...) | ||
| 304 | } | ||
