aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/net/html
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/net/html')
-rw-r--r--vendor/golang.org/x/net/html/atom/atom.go78
-rw-r--r--vendor/golang.org/x/net/html/atom/table.go783
-rw-r--r--vendor/golang.org/x/net/html/const.go111
-rw-r--r--vendor/golang.org/x/net/html/doc.go127
-rw-r--r--vendor/golang.org/x/net/html/doctype.go156
-rw-r--r--vendor/golang.org/x/net/html/entity.go2253
-rw-r--r--vendor/golang.org/x/net/html/escape.go339
-rw-r--r--vendor/golang.org/x/net/html/foreign.go222
-rw-r--r--vendor/golang.org/x/net/html/node.go225
-rw-r--r--vendor/golang.org/x/net/html/parse.go2460
-rw-r--r--vendor/golang.org/x/net/html/render.go273
-rw-r--r--vendor/golang.org/x/net/html/token.go1261
12 files changed, 8288 insertions, 0 deletions
diff --git a/vendor/golang.org/x/net/html/atom/atom.go b/vendor/golang.org/x/net/html/atom/atom.go
new file mode 100644
index 0000000..cd0a8ac
--- /dev/null
+++ b/vendor/golang.org/x/net/html/atom/atom.go
@@ -0,0 +1,78 @@
1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package atom provides integer codes (also known as atoms) for a fixed set of
6// frequently occurring HTML strings: tag names and attribute keys such as "p"
7// and "id".
8//
9// Sharing an atom's name between all elements with the same tag can result in
10// fewer string allocations when tokenizing and parsing HTML. Integer
11// comparisons are also generally faster than string comparisons.
12//
13// The value of an atom's particular code is not guaranteed to stay the same
14// between versions of this package. Neither is any ordering guaranteed:
15// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
16// be dense. The only guarantees are that e.g. looking up "div" will yield
17// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
18package atom // import "golang.org/x/net/html/atom"
19
20// Atom is an integer code for a string. The zero value maps to "".
21type Atom uint32
22
23// String returns the atom's name.
24func (a Atom) String() string {
25 start := uint32(a >> 8)
26 n := uint32(a & 0xff)
27 if start+n > uint32(len(atomText)) {
28 return ""
29 }
30 return atomText[start : start+n]
31}
32
33func (a Atom) string() string {
34 return atomText[a>>8 : a>>8+a&0xff]
35}
36
37// fnv computes the FNV hash with an arbitrary starting value h.
38func fnv(h uint32, s []byte) uint32 {
39 for i := range s {
40 h ^= uint32(s[i])
41 h *= 16777619
42 }
43 return h
44}
45
46func match(s string, t []byte) bool {
47 for i, c := range t {
48 if s[i] != c {
49 return false
50 }
51 }
52 return true
53}
54
55// Lookup returns the atom whose name is s. It returns zero if there is no
56// such atom. The lookup is case sensitive.
57func Lookup(s []byte) Atom {
58 if len(s) == 0 || len(s) > maxAtomLen {
59 return 0
60 }
61 h := fnv(hash0, s)
62 if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
63 return a
64 }
65 if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
66 return a
67 }
68 return 0
69}
70
71// String returns a string whose contents are equal to s. In that sense, it is
72// equivalent to string(s) but may be more efficient.
73func String(s []byte) string {
74 if a := Lookup(s); a != 0 {
75 return a.String()
76 }
77 return string(s)
78}
diff --git a/vendor/golang.org/x/net/html/atom/table.go b/vendor/golang.org/x/net/html/atom/table.go
new file mode 100644
index 0000000..2a93886
--- /dev/null
+++ b/vendor/golang.org/x/net/html/atom/table.go
@@ -0,0 +1,783 @@
1// Code generated by go generate gen.go; DO NOT EDIT.
2
3//go:generate go run gen.go
4
5package atom
6
7const (
8 A Atom = 0x1
9 Abbr Atom = 0x4
10 Accept Atom = 0x1a06
11 AcceptCharset Atom = 0x1a0e
12 Accesskey Atom = 0x2c09
13 Acronym Atom = 0xaa07
14 Action Atom = 0x27206
15 Address Atom = 0x6f307
16 Align Atom = 0xb105
17 Allowfullscreen Atom = 0x2080f
18 Allowpaymentrequest Atom = 0xc113
19 Allowusermedia Atom = 0xdd0e
20 Alt Atom = 0xf303
21 Annotation Atom = 0x1c90a
22 AnnotationXml Atom = 0x1c90e
23 Applet Atom = 0x31906
24 Area Atom = 0x35604
25 Article Atom = 0x3fc07
26 As Atom = 0x3c02
27 Aside Atom = 0x10705
28 Async Atom = 0xff05
29 Audio Atom = 0x11505
30 Autocomplete Atom = 0x2780c
31 Autofocus Atom = 0x12109
32 Autoplay Atom = 0x13c08
33 B Atom = 0x101
34 Base Atom = 0x3b04
35 Basefont Atom = 0x3b08
36 Bdi Atom = 0xba03
37 Bdo Atom = 0x14b03
38 Bgsound Atom = 0x15e07
39 Big Atom = 0x17003
40 Blink Atom = 0x17305
41 Blockquote Atom = 0x1870a
42 Body Atom = 0x2804
43 Br Atom = 0x202
44 Button Atom = 0x19106
45 Canvas Atom = 0x10306
46 Caption Atom = 0x23107
47 Center Atom = 0x22006
48 Challenge Atom = 0x29b09
49 Charset Atom = 0x2107
50 Checked Atom = 0x47907
51 Cite Atom = 0x19c04
52 Class Atom = 0x56405
53 Code Atom = 0x5c504
54 Col Atom = 0x1ab03
55 Colgroup Atom = 0x1ab08
56 Color Atom = 0x1bf05
57 Cols Atom = 0x1c404
58 Colspan Atom = 0x1c407
59 Command Atom = 0x1d707
60 Content Atom = 0x58b07
61 Contenteditable Atom = 0x58b0f
62 Contextmenu Atom = 0x3800b
63 Controls Atom = 0x1de08
64 Coords Atom = 0x1ea06
65 Crossorigin Atom = 0x1fb0b
66 Data Atom = 0x4a504
67 Datalist Atom = 0x4a508
68 Datetime Atom = 0x2b808
69 Dd Atom = 0x2d702
70 Default Atom = 0x10a07
71 Defer Atom = 0x5c705
72 Del Atom = 0x45203
73 Desc Atom = 0x56104
74 Details Atom = 0x7207
75 Dfn Atom = 0x8703
76 Dialog Atom = 0xbb06
77 Dir Atom = 0x9303
78 Dirname Atom = 0x9307
79 Disabled Atom = 0x16408
80 Div Atom = 0x16b03
81 Dl Atom = 0x5e602
82 Download Atom = 0x46308
83 Draggable Atom = 0x17a09
84 Dropzone Atom = 0x40508
85 Dt Atom = 0x64b02
86 Em Atom = 0x6e02
87 Embed Atom = 0x6e05
88 Enctype Atom = 0x28d07
89 Face Atom = 0x21e04
90 Fieldset Atom = 0x22608
91 Figcaption Atom = 0x22e0a
92 Figure Atom = 0x24806
93 Font Atom = 0x3f04
94 Footer Atom = 0xf606
95 For Atom = 0x25403
96 ForeignObject Atom = 0x2540d
97 Foreignobject Atom = 0x2610d
98 Form Atom = 0x26e04
99 Formaction Atom = 0x26e0a
100 Formenctype Atom = 0x2890b
101 Formmethod Atom = 0x2a40a
102 Formnovalidate Atom = 0x2ae0e
103 Formtarget Atom = 0x2c00a
104 Frame Atom = 0x8b05
105 Frameset Atom = 0x8b08
106 H1 Atom = 0x15c02
107 H2 Atom = 0x2de02
108 H3 Atom = 0x30d02
109 H4 Atom = 0x34502
110 H5 Atom = 0x34f02
111 H6 Atom = 0x64d02
112 Head Atom = 0x33104
113 Header Atom = 0x33106
114 Headers Atom = 0x33107
115 Height Atom = 0x5206
116 Hgroup Atom = 0x2ca06
117 Hidden Atom = 0x2d506
118 High Atom = 0x2db04
119 Hr Atom = 0x15702
120 Href Atom = 0x2e004
121 Hreflang Atom = 0x2e008
122 Html Atom = 0x5604
123 HttpEquiv Atom = 0x2e80a
124 I Atom = 0x601
125 Icon Atom = 0x58a04
126 Id Atom = 0x10902
127 Iframe Atom = 0x2fc06
128 Image Atom = 0x30205
129 Img Atom = 0x30703
130 Input Atom = 0x44b05
131 Inputmode Atom = 0x44b09
132 Ins Atom = 0x20403
133 Integrity Atom = 0x23f09
134 Is Atom = 0x16502
135 Isindex Atom = 0x30f07
136 Ismap Atom = 0x31605
137 Itemid Atom = 0x38b06
138 Itemprop Atom = 0x19d08
139 Itemref Atom = 0x3cd07
140 Itemscope Atom = 0x67109
141 Itemtype Atom = 0x31f08
142 Kbd Atom = 0xb903
143 Keygen Atom = 0x3206
144 Keytype Atom = 0xd607
145 Kind Atom = 0x17704
146 Label Atom = 0x5905
147 Lang Atom = 0x2e404
148 Legend Atom = 0x18106
149 Li Atom = 0xb202
150 Link Atom = 0x17404
151 List Atom = 0x4a904
152 Listing Atom = 0x4a907
153 Loop Atom = 0x5d04
154 Low Atom = 0xc303
155 Main Atom = 0x1004
156 Malignmark Atom = 0xb00a
157 Manifest Atom = 0x6d708
158 Map Atom = 0x31803
159 Mark Atom = 0xb604
160 Marquee Atom = 0x32707
161 Math Atom = 0x32e04
162 Max Atom = 0x33d03
163 Maxlength Atom = 0x33d09
164 Media Atom = 0xe605
165 Mediagroup Atom = 0xe60a
166 Menu Atom = 0x38704
167 Menuitem Atom = 0x38708
168 Meta Atom = 0x4b804
169 Meter Atom = 0x9805
170 Method Atom = 0x2a806
171 Mglyph Atom = 0x30806
172 Mi Atom = 0x34702
173 Min Atom = 0x34703
174 Minlength Atom = 0x34709
175 Mn Atom = 0x2b102
176 Mo Atom = 0xa402
177 Ms Atom = 0x67402
178 Mtext Atom = 0x35105
179 Multiple Atom = 0x35f08
180 Muted Atom = 0x36705
181 Name Atom = 0x9604
182 Nav Atom = 0x1303
183 Nobr Atom = 0x3704
184 Noembed Atom = 0x6c07
185 Noframes Atom = 0x8908
186 Nomodule Atom = 0xa208
187 Nonce Atom = 0x1a605
188 Noscript Atom = 0x21608
189 Novalidate Atom = 0x2b20a
190 Object Atom = 0x26806
191 Ol Atom = 0x13702
192 Onabort Atom = 0x19507
193 Onafterprint Atom = 0x2360c
194 Onautocomplete Atom = 0x2760e
195 Onautocompleteerror Atom = 0x27613
196 Onauxclick Atom = 0x61f0a
197 Onbeforeprint Atom = 0x69e0d
198 Onbeforeunload Atom = 0x6e70e
199 Onblur Atom = 0x56d06
200 Oncancel Atom = 0x11908
201 Oncanplay Atom = 0x14d09
202 Oncanplaythrough Atom = 0x14d10
203 Onchange Atom = 0x41b08
204 Onclick Atom = 0x2f507
205 Onclose Atom = 0x36c07
206 Oncontextmenu Atom = 0x37e0d
207 Oncopy Atom = 0x39106
208 Oncuechange Atom = 0x3970b
209 Oncut Atom = 0x3a205
210 Ondblclick Atom = 0x3a70a
211 Ondrag Atom = 0x3b106
212 Ondragend Atom = 0x3b109
213 Ondragenter Atom = 0x3ba0b
214 Ondragexit Atom = 0x3c50a
215 Ondragleave Atom = 0x3df0b
216 Ondragover Atom = 0x3ea0a
217 Ondragstart Atom = 0x3f40b
218 Ondrop Atom = 0x40306
219 Ondurationchange Atom = 0x41310
220 Onemptied Atom = 0x40a09
221 Onended Atom = 0x42307
222 Onerror Atom = 0x42a07
223 Onfocus Atom = 0x43107
224 Onhashchange Atom = 0x43d0c
225 Oninput Atom = 0x44907
226 Oninvalid Atom = 0x45509
227 Onkeydown Atom = 0x45e09
228 Onkeypress Atom = 0x46b0a
229 Onkeyup Atom = 0x48007
230 Onlanguagechange Atom = 0x48d10
231 Onload Atom = 0x49d06
232 Onloadeddata Atom = 0x49d0c
233 Onloadedmetadata Atom = 0x4b010
234 Onloadend Atom = 0x4c609
235 Onloadstart Atom = 0x4cf0b
236 Onmessage Atom = 0x4da09
237 Onmessageerror Atom = 0x4da0e
238 Onmousedown Atom = 0x4e80b
239 Onmouseenter Atom = 0x4f30c
240 Onmouseleave Atom = 0x4ff0c
241 Onmousemove Atom = 0x50b0b
242 Onmouseout Atom = 0x5160a
243 Onmouseover Atom = 0x5230b
244 Onmouseup Atom = 0x52e09
245 Onmousewheel Atom = 0x53c0c
246 Onoffline Atom = 0x54809
247 Ononline Atom = 0x55108
248 Onpagehide Atom = 0x5590a
249 Onpageshow Atom = 0x5730a
250 Onpaste Atom = 0x57f07
251 Onpause Atom = 0x59a07
252 Onplay Atom = 0x5a406
253 Onplaying Atom = 0x5a409
254 Onpopstate Atom = 0x5ad0a
255 Onprogress Atom = 0x5b70a
256 Onratechange Atom = 0x5cc0c
257 Onrejectionhandled Atom = 0x5d812
258 Onreset Atom = 0x5ea07
259 Onresize Atom = 0x5f108
260 Onscroll Atom = 0x60008
261 Onsecuritypolicyviolation Atom = 0x60819
262 Onseeked Atom = 0x62908
263 Onseeking Atom = 0x63109
264 Onselect Atom = 0x63a08
265 Onshow Atom = 0x64406
266 Onsort Atom = 0x64f06
267 Onstalled Atom = 0x65909
268 Onstorage Atom = 0x66209
269 Onsubmit Atom = 0x66b08
270 Onsuspend Atom = 0x67b09
271 Ontimeupdate Atom = 0x400c
272 Ontoggle Atom = 0x68408
273 Onunhandledrejection Atom = 0x68c14
274 Onunload Atom = 0x6ab08
275 Onvolumechange Atom = 0x6b30e
276 Onwaiting Atom = 0x6c109
277 Onwheel Atom = 0x6ca07
278 Open Atom = 0x1a304
279 Optgroup Atom = 0x5f08
280 Optimum Atom = 0x6d107
281 Option Atom = 0x6e306
282 Output Atom = 0x51d06
283 P Atom = 0xc01
284 Param Atom = 0xc05
285 Pattern Atom = 0x6607
286 Picture Atom = 0x7b07
287 Ping Atom = 0xef04
288 Placeholder Atom = 0x1310b
289 Plaintext Atom = 0x1b209
290 Playsinline Atom = 0x1400b
291 Poster Atom = 0x2cf06
292 Pre Atom = 0x47003
293 Preload Atom = 0x48607
294 Progress Atom = 0x5b908
295 Prompt Atom = 0x53606
296 Public Atom = 0x58606
297 Q Atom = 0xcf01
298 Radiogroup Atom = 0x30a
299 Rb Atom = 0x3a02
300 Readonly Atom = 0x35708
301 Referrerpolicy Atom = 0x3d10e
302 Rel Atom = 0x48703
303 Required Atom = 0x24c08
304 Reversed Atom = 0x8008
305 Rows Atom = 0x9c04
306 Rowspan Atom = 0x9c07
307 Rp Atom = 0x23c02
308 Rt Atom = 0x19a02
309 Rtc Atom = 0x19a03
310 Ruby Atom = 0xfb04
311 S Atom = 0x2501
312 Samp Atom = 0x7804
313 Sandbox Atom = 0x12907
314 Scope Atom = 0x67505
315 Scoped Atom = 0x67506
316 Script Atom = 0x21806
317 Seamless Atom = 0x37108
318 Section Atom = 0x56807
319 Select Atom = 0x63c06
320 Selected Atom = 0x63c08
321 Shape Atom = 0x1e505
322 Size Atom = 0x5f504
323 Sizes Atom = 0x5f505
324 Slot Atom = 0x1ef04
325 Small Atom = 0x20605
326 Sortable Atom = 0x65108
327 Sorted Atom = 0x33706
328 Source Atom = 0x37806
329 Spacer Atom = 0x43706
330 Span Atom = 0x9f04
331 Spellcheck Atom = 0x4740a
332 Src Atom = 0x5c003
333 Srcdoc Atom = 0x5c006
334 Srclang Atom = 0x5f907
335 Srcset Atom = 0x6f906
336 Start Atom = 0x3fa05
337 Step Atom = 0x58304
338 Strike Atom = 0xd206
339 Strong Atom = 0x6dd06
340 Style Atom = 0x6ff05
341 Sub Atom = 0x66d03
342 Summary Atom = 0x70407
343 Sup Atom = 0x70b03
344 Svg Atom = 0x70e03
345 System Atom = 0x71106
346 Tabindex Atom = 0x4be08
347 Table Atom = 0x59505
348 Target Atom = 0x2c406
349 Tbody Atom = 0x2705
350 Td Atom = 0x9202
351 Template Atom = 0x71408
352 Textarea Atom = 0x35208
353 Tfoot Atom = 0xf505
354 Th Atom = 0x15602
355 Thead Atom = 0x33005
356 Time Atom = 0x4204
357 Title Atom = 0x11005
358 Tr Atom = 0xcc02
359 Track Atom = 0x1ba05
360 Translate Atom = 0x1f209
361 Tt Atom = 0x6802
362 Type Atom = 0xd904
363 Typemustmatch Atom = 0x2900d
364 U Atom = 0xb01
365 Ul Atom = 0xa702
366 Updateviacache Atom = 0x460e
367 Usemap Atom = 0x59e06
368 Value Atom = 0x1505
369 Var Atom = 0x16d03
370 Video Atom = 0x2f105
371 Wbr Atom = 0x57c03
372 Width Atom = 0x64905
373 Workertype Atom = 0x71c0a
374 Wrap Atom = 0x72604
375 Xmp Atom = 0x12f03
376)
377
378const hash0 = 0x81cdf10e
379
380const maxAtomLen = 25
381
382var table = [1 << 9]Atom{
383 0x1: 0xe60a, // mediagroup
384 0x2: 0x2e404, // lang
385 0x4: 0x2c09, // accesskey
386 0x5: 0x8b08, // frameset
387 0x7: 0x63a08, // onselect
388 0x8: 0x71106, // system
389 0xa: 0x64905, // width
390 0xc: 0x2890b, // formenctype
391 0xd: 0x13702, // ol
392 0xe: 0x3970b, // oncuechange
393 0x10: 0x14b03, // bdo
394 0x11: 0x11505, // audio
395 0x12: 0x17a09, // draggable
396 0x14: 0x2f105, // video
397 0x15: 0x2b102, // mn
398 0x16: 0x38704, // menu
399 0x17: 0x2cf06, // poster
400 0x19: 0xf606, // footer
401 0x1a: 0x2a806, // method
402 0x1b: 0x2b808, // datetime
403 0x1c: 0x19507, // onabort
404 0x1d: 0x460e, // updateviacache
405 0x1e: 0xff05, // async
406 0x1f: 0x49d06, // onload
407 0x21: 0x11908, // oncancel
408 0x22: 0x62908, // onseeked
409 0x23: 0x30205, // image
410 0x24: 0x5d812, // onrejectionhandled
411 0x26: 0x17404, // link
412 0x27: 0x51d06, // output
413 0x28: 0x33104, // head
414 0x29: 0x4ff0c, // onmouseleave
415 0x2a: 0x57f07, // onpaste
416 0x2b: 0x5a409, // onplaying
417 0x2c: 0x1c407, // colspan
418 0x2f: 0x1bf05, // color
419 0x30: 0x5f504, // size
420 0x31: 0x2e80a, // http-equiv
421 0x33: 0x601, // i
422 0x34: 0x5590a, // onpagehide
423 0x35: 0x68c14, // onunhandledrejection
424 0x37: 0x42a07, // onerror
425 0x3a: 0x3b08, // basefont
426 0x3f: 0x1303, // nav
427 0x40: 0x17704, // kind
428 0x41: 0x35708, // readonly
429 0x42: 0x30806, // mglyph
430 0x44: 0xb202, // li
431 0x46: 0x2d506, // hidden
432 0x47: 0x70e03, // svg
433 0x48: 0x58304, // step
434 0x49: 0x23f09, // integrity
435 0x4a: 0x58606, // public
436 0x4c: 0x1ab03, // col
437 0x4d: 0x1870a, // blockquote
438 0x4e: 0x34f02, // h5
439 0x50: 0x5b908, // progress
440 0x51: 0x5f505, // sizes
441 0x52: 0x34502, // h4
442 0x56: 0x33005, // thead
443 0x57: 0xd607, // keytype
444 0x58: 0x5b70a, // onprogress
445 0x59: 0x44b09, // inputmode
446 0x5a: 0x3b109, // ondragend
447 0x5d: 0x3a205, // oncut
448 0x5e: 0x43706, // spacer
449 0x5f: 0x1ab08, // colgroup
450 0x62: 0x16502, // is
451 0x65: 0x3c02, // as
452 0x66: 0x54809, // onoffline
453 0x67: 0x33706, // sorted
454 0x69: 0x48d10, // onlanguagechange
455 0x6c: 0x43d0c, // onhashchange
456 0x6d: 0x9604, // name
457 0x6e: 0xf505, // tfoot
458 0x6f: 0x56104, // desc
459 0x70: 0x33d03, // max
460 0x72: 0x1ea06, // coords
461 0x73: 0x30d02, // h3
462 0x74: 0x6e70e, // onbeforeunload
463 0x75: 0x9c04, // rows
464 0x76: 0x63c06, // select
465 0x77: 0x9805, // meter
466 0x78: 0x38b06, // itemid
467 0x79: 0x53c0c, // onmousewheel
468 0x7a: 0x5c006, // srcdoc
469 0x7d: 0x1ba05, // track
470 0x7f: 0x31f08, // itemtype
471 0x82: 0xa402, // mo
472 0x83: 0x41b08, // onchange
473 0x84: 0x33107, // headers
474 0x85: 0x5cc0c, // onratechange
475 0x86: 0x60819, // onsecuritypolicyviolation
476 0x88: 0x4a508, // datalist
477 0x89: 0x4e80b, // onmousedown
478 0x8a: 0x1ef04, // slot
479 0x8b: 0x4b010, // onloadedmetadata
480 0x8c: 0x1a06, // accept
481 0x8d: 0x26806, // object
482 0x91: 0x6b30e, // onvolumechange
483 0x92: 0x2107, // charset
484 0x93: 0x27613, // onautocompleteerror
485 0x94: 0xc113, // allowpaymentrequest
486 0x95: 0x2804, // body
487 0x96: 0x10a07, // default
488 0x97: 0x63c08, // selected
489 0x98: 0x21e04, // face
490 0x99: 0x1e505, // shape
491 0x9b: 0x68408, // ontoggle
492 0x9e: 0x64b02, // dt
493 0x9f: 0xb604, // mark
494 0xa1: 0xb01, // u
495 0xa4: 0x6ab08, // onunload
496 0xa5: 0x5d04, // loop
497 0xa6: 0x16408, // disabled
498 0xaa: 0x42307, // onended
499 0xab: 0xb00a, // malignmark
500 0xad: 0x67b09, // onsuspend
501 0xae: 0x35105, // mtext
502 0xaf: 0x64f06, // onsort
503 0xb0: 0x19d08, // itemprop
504 0xb3: 0x67109, // itemscope
505 0xb4: 0x17305, // blink
506 0xb6: 0x3b106, // ondrag
507 0xb7: 0xa702, // ul
508 0xb8: 0x26e04, // form
509 0xb9: 0x12907, // sandbox
510 0xba: 0x8b05, // frame
511 0xbb: 0x1505, // value
512 0xbc: 0x66209, // onstorage
513 0xbf: 0xaa07, // acronym
514 0xc0: 0x19a02, // rt
515 0xc2: 0x202, // br
516 0xc3: 0x22608, // fieldset
517 0xc4: 0x2900d, // typemustmatch
518 0xc5: 0xa208, // nomodule
519 0xc6: 0x6c07, // noembed
520 0xc7: 0x69e0d, // onbeforeprint
521 0xc8: 0x19106, // button
522 0xc9: 0x2f507, // onclick
523 0xca: 0x70407, // summary
524 0xcd: 0xfb04, // ruby
525 0xce: 0x56405, // class
526 0xcf: 0x3f40b, // ondragstart
527 0xd0: 0x23107, // caption
528 0xd4: 0xdd0e, // allowusermedia
529 0xd5: 0x4cf0b, // onloadstart
530 0xd9: 0x16b03, // div
531 0xda: 0x4a904, // list
532 0xdb: 0x32e04, // math
533 0xdc: 0x44b05, // input
534 0xdf: 0x3ea0a, // ondragover
535 0xe0: 0x2de02, // h2
536 0xe2: 0x1b209, // plaintext
537 0xe4: 0x4f30c, // onmouseenter
538 0xe7: 0x47907, // checked
539 0xe8: 0x47003, // pre
540 0xea: 0x35f08, // multiple
541 0xeb: 0xba03, // bdi
542 0xec: 0x33d09, // maxlength
543 0xed: 0xcf01, // q
544 0xee: 0x61f0a, // onauxclick
545 0xf0: 0x57c03, // wbr
546 0xf2: 0x3b04, // base
547 0xf3: 0x6e306, // option
548 0xf5: 0x41310, // ondurationchange
549 0xf7: 0x8908, // noframes
550 0xf9: 0x40508, // dropzone
551 0xfb: 0x67505, // scope
552 0xfc: 0x8008, // reversed
553 0xfd: 0x3ba0b, // ondragenter
554 0xfe: 0x3fa05, // start
555 0xff: 0x12f03, // xmp
556 0x100: 0x5f907, // srclang
557 0x101: 0x30703, // img
558 0x104: 0x101, // b
559 0x105: 0x25403, // for
560 0x106: 0x10705, // aside
561 0x107: 0x44907, // oninput
562 0x108: 0x35604, // area
563 0x109: 0x2a40a, // formmethod
564 0x10a: 0x72604, // wrap
565 0x10c: 0x23c02, // rp
566 0x10d: 0x46b0a, // onkeypress
567 0x10e: 0x6802, // tt
568 0x110: 0x34702, // mi
569 0x111: 0x36705, // muted
570 0x112: 0xf303, // alt
571 0x113: 0x5c504, // code
572 0x114: 0x6e02, // em
573 0x115: 0x3c50a, // ondragexit
574 0x117: 0x9f04, // span
575 0x119: 0x6d708, // manifest
576 0x11a: 0x38708, // menuitem
577 0x11b: 0x58b07, // content
578 0x11d: 0x6c109, // onwaiting
579 0x11f: 0x4c609, // onloadend
580 0x121: 0x37e0d, // oncontextmenu
581 0x123: 0x56d06, // onblur
582 0x124: 0x3fc07, // article
583 0x125: 0x9303, // dir
584 0x126: 0xef04, // ping
585 0x127: 0x24c08, // required
586 0x128: 0x45509, // oninvalid
587 0x129: 0xb105, // align
588 0x12b: 0x58a04, // icon
589 0x12c: 0x64d02, // h6
590 0x12d: 0x1c404, // cols
591 0x12e: 0x22e0a, // figcaption
592 0x12f: 0x45e09, // onkeydown
593 0x130: 0x66b08, // onsubmit
594 0x131: 0x14d09, // oncanplay
595 0x132: 0x70b03, // sup
596 0x133: 0xc01, // p
597 0x135: 0x40a09, // onemptied
598 0x136: 0x39106, // oncopy
599 0x137: 0x19c04, // cite
600 0x138: 0x3a70a, // ondblclick
601 0x13a: 0x50b0b, // onmousemove
602 0x13c: 0x66d03, // sub
603 0x13d: 0x48703, // rel
604 0x13e: 0x5f08, // optgroup
605 0x142: 0x9c07, // rowspan
606 0x143: 0x37806, // source
607 0x144: 0x21608, // noscript
608 0x145: 0x1a304, // open
609 0x146: 0x20403, // ins
610 0x147: 0x2540d, // foreignObject
611 0x148: 0x5ad0a, // onpopstate
612 0x14a: 0x28d07, // enctype
613 0x14b: 0x2760e, // onautocomplete
614 0x14c: 0x35208, // textarea
615 0x14e: 0x2780c, // autocomplete
616 0x14f: 0x15702, // hr
617 0x150: 0x1de08, // controls
618 0x151: 0x10902, // id
619 0x153: 0x2360c, // onafterprint
620 0x155: 0x2610d, // foreignobject
621 0x156: 0x32707, // marquee
622 0x157: 0x59a07, // onpause
623 0x158: 0x5e602, // dl
624 0x159: 0x5206, // height
625 0x15a: 0x34703, // min
626 0x15b: 0x9307, // dirname
627 0x15c: 0x1f209, // translate
628 0x15d: 0x5604, // html
629 0x15e: 0x34709, // minlength
630 0x15f: 0x48607, // preload
631 0x160: 0x71408, // template
632 0x161: 0x3df0b, // ondragleave
633 0x162: 0x3a02, // rb
634 0x164: 0x5c003, // src
635 0x165: 0x6dd06, // strong
636 0x167: 0x7804, // samp
637 0x168: 0x6f307, // address
638 0x169: 0x55108, // ononline
639 0x16b: 0x1310b, // placeholder
640 0x16c: 0x2c406, // target
641 0x16d: 0x20605, // small
642 0x16e: 0x6ca07, // onwheel
643 0x16f: 0x1c90a, // annotation
644 0x170: 0x4740a, // spellcheck
645 0x171: 0x7207, // details
646 0x172: 0x10306, // canvas
647 0x173: 0x12109, // autofocus
648 0x174: 0xc05, // param
649 0x176: 0x46308, // download
650 0x177: 0x45203, // del
651 0x178: 0x36c07, // onclose
652 0x179: 0xb903, // kbd
653 0x17a: 0x31906, // applet
654 0x17b: 0x2e004, // href
655 0x17c: 0x5f108, // onresize
656 0x17e: 0x49d0c, // onloadeddata
657 0x180: 0xcc02, // tr
658 0x181: 0x2c00a, // formtarget
659 0x182: 0x11005, // title
660 0x183: 0x6ff05, // style
661 0x184: 0xd206, // strike
662 0x185: 0x59e06, // usemap
663 0x186: 0x2fc06, // iframe
664 0x187: 0x1004, // main
665 0x189: 0x7b07, // picture
666 0x18c: 0x31605, // ismap
667 0x18e: 0x4a504, // data
668 0x18f: 0x5905, // label
669 0x191: 0x3d10e, // referrerpolicy
670 0x192: 0x15602, // th
671 0x194: 0x53606, // prompt
672 0x195: 0x56807, // section
673 0x197: 0x6d107, // optimum
674 0x198: 0x2db04, // high
675 0x199: 0x15c02, // h1
676 0x19a: 0x65909, // onstalled
677 0x19b: 0x16d03, // var
678 0x19c: 0x4204, // time
679 0x19e: 0x67402, // ms
680 0x19f: 0x33106, // header
681 0x1a0: 0x4da09, // onmessage
682 0x1a1: 0x1a605, // nonce
683 0x1a2: 0x26e0a, // formaction
684 0x1a3: 0x22006, // center
685 0x1a4: 0x3704, // nobr
686 0x1a5: 0x59505, // table
687 0x1a6: 0x4a907, // listing
688 0x1a7: 0x18106, // legend
689 0x1a9: 0x29b09, // challenge
690 0x1aa: 0x24806, // figure
691 0x1ab: 0xe605, // media
692 0x1ae: 0xd904, // type
693 0x1af: 0x3f04, // font
694 0x1b0: 0x4da0e, // onmessageerror
695 0x1b1: 0x37108, // seamless
696 0x1b2: 0x8703, // dfn
697 0x1b3: 0x5c705, // defer
698 0x1b4: 0xc303, // low
699 0x1b5: 0x19a03, // rtc
700 0x1b6: 0x5230b, // onmouseover
701 0x1b7: 0x2b20a, // novalidate
702 0x1b8: 0x71c0a, // workertype
703 0x1ba: 0x3cd07, // itemref
704 0x1bd: 0x1, // a
705 0x1be: 0x31803, // map
706 0x1bf: 0x400c, // ontimeupdate
707 0x1c0: 0x15e07, // bgsound
708 0x1c1: 0x3206, // keygen
709 0x1c2: 0x2705, // tbody
710 0x1c5: 0x64406, // onshow
711 0x1c7: 0x2501, // s
712 0x1c8: 0x6607, // pattern
713 0x1cc: 0x14d10, // oncanplaythrough
714 0x1ce: 0x2d702, // dd
715 0x1cf: 0x6f906, // srcset
716 0x1d0: 0x17003, // big
717 0x1d2: 0x65108, // sortable
718 0x1d3: 0x48007, // onkeyup
719 0x1d5: 0x5a406, // onplay
720 0x1d7: 0x4b804, // meta
721 0x1d8: 0x40306, // ondrop
722 0x1da: 0x60008, // onscroll
723 0x1db: 0x1fb0b, // crossorigin
724 0x1dc: 0x5730a, // onpageshow
725 0x1dd: 0x4, // abbr
726 0x1de: 0x9202, // td
727 0x1df: 0x58b0f, // contenteditable
728 0x1e0: 0x27206, // action
729 0x1e1: 0x1400b, // playsinline
730 0x1e2: 0x43107, // onfocus
731 0x1e3: 0x2e008, // hreflang
732 0x1e5: 0x5160a, // onmouseout
733 0x1e6: 0x5ea07, // onreset
734 0x1e7: 0x13c08, // autoplay
735 0x1e8: 0x63109, // onseeking
736 0x1ea: 0x67506, // scoped
737 0x1ec: 0x30a, // radiogroup
738 0x1ee: 0x3800b, // contextmenu
739 0x1ef: 0x52e09, // onmouseup
740 0x1f1: 0x2ca06, // hgroup
741 0x1f2: 0x2080f, // allowfullscreen
742 0x1f3: 0x4be08, // tabindex
743 0x1f6: 0x30f07, // isindex
744 0x1f7: 0x1a0e, // accept-charset
745 0x1f8: 0x2ae0e, // formnovalidate
746 0x1fb: 0x1c90e, // annotation-xml
747 0x1fc: 0x6e05, // embed
748 0x1fd: 0x21806, // script
749 0x1fe: 0xbb06, // dialog
750 0x1ff: 0x1d707, // command
751}
752
753const atomText = "abbradiogrouparamainavalueaccept-charsetbodyaccesskeygenobrb" +
754 "asefontimeupdateviacacheightmlabelooptgroupatternoembedetail" +
755 "sampictureversedfnoframesetdirnameterowspanomoduleacronymali" +
756 "gnmarkbdialogallowpaymentrequestrikeytypeallowusermediagroup" +
757 "ingaltfooterubyasyncanvasidefaultitleaudioncancelautofocusan" +
758 "dboxmplaceholderautoplaysinlinebdoncanplaythrough1bgsoundisa" +
759 "bledivarbigblinkindraggablegendblockquotebuttonabortcitempro" +
760 "penoncecolgrouplaintextrackcolorcolspannotation-xmlcommandco" +
761 "ntrolshapecoordslotranslatecrossoriginsmallowfullscreenoscri" +
762 "ptfacenterfieldsetfigcaptionafterprintegrityfigurequiredfore" +
763 "ignObjectforeignobjectformactionautocompleteerrorformenctype" +
764 "mustmatchallengeformmethodformnovalidatetimeformtargethgroup" +
765 "osterhiddenhigh2hreflanghttp-equivideonclickiframeimageimgly" +
766 "ph3isindexismappletitemtypemarqueematheadersortedmaxlength4m" +
767 "inlength5mtextareadonlymultiplemutedoncloseamlessourceoncont" +
768 "extmenuitemidoncopyoncuechangeoncutondblclickondragendondrag" +
769 "enterondragexitemreferrerpolicyondragleaveondragoverondragst" +
770 "articleondropzonemptiedondurationchangeonendedonerroronfocus" +
771 "paceronhashchangeoninputmodeloninvalidonkeydownloadonkeypres" +
772 "spellcheckedonkeyupreloadonlanguagechangeonloadeddatalisting" +
773 "onloadedmetadatabindexonloadendonloadstartonmessageerroronmo" +
774 "usedownonmouseenteronmouseleaveonmousemoveonmouseoutputonmou" +
775 "seoveronmouseupromptonmousewheelonofflineononlineonpagehides" +
776 "classectionbluronpageshowbronpastepublicontenteditableonpaus" +
777 "emaponplayingonpopstateonprogressrcdocodeferonratechangeonre" +
778 "jectionhandledonresetonresizesrclangonscrollonsecuritypolicy" +
779 "violationauxclickonseekedonseekingonselectedonshowidth6onsor" +
780 "tableonstalledonstorageonsubmitemscopedonsuspendontoggleonun" +
781 "handledrejectionbeforeprintonunloadonvolumechangeonwaitingon" +
782 "wheeloptimumanifestrongoptionbeforeunloaddressrcsetstylesumm" +
783 "arysupsvgsystemplateworkertypewrap"
diff --git a/vendor/golang.org/x/net/html/const.go b/vendor/golang.org/x/net/html/const.go
new file mode 100644
index 0000000..ff7acf2
--- /dev/null
+++ b/vendor/golang.org/x/net/html/const.go
@@ -0,0 +1,111 @@
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7// Section 12.2.4.2 of the HTML5 specification says "The following elements
8// have varying levels of special parsing rules".
9// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
10var isSpecialElementMap = map[string]bool{
11 "address": true,
12 "applet": true,
13 "area": true,
14 "article": true,
15 "aside": true,
16 "base": true,
17 "basefont": true,
18 "bgsound": true,
19 "blockquote": true,
20 "body": true,
21 "br": true,
22 "button": true,
23 "caption": true,
24 "center": true,
25 "col": true,
26 "colgroup": true,
27 "dd": true,
28 "details": true,
29 "dir": true,
30 "div": true,
31 "dl": true,
32 "dt": true,
33 "embed": true,
34 "fieldset": true,
35 "figcaption": true,
36 "figure": true,
37 "footer": true,
38 "form": true,
39 "frame": true,
40 "frameset": true,
41 "h1": true,
42 "h2": true,
43 "h3": true,
44 "h4": true,
45 "h5": true,
46 "h6": true,
47 "head": true,
48 "header": true,
49 "hgroup": true,
50 "hr": true,
51 "html": true,
52 "iframe": true,
53 "img": true,
54 "input": true,
55 "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
56 "li": true,
57 "link": true,
58 "listing": true,
59 "main": true,
60 "marquee": true,
61 "menu": true,
62 "meta": true,
63 "nav": true,
64 "noembed": true,
65 "noframes": true,
66 "noscript": true,
67 "object": true,
68 "ol": true,
69 "p": true,
70 "param": true,
71 "plaintext": true,
72 "pre": true,
73 "script": true,
74 "section": true,
75 "select": true,
76 "source": true,
77 "style": true,
78 "summary": true,
79 "table": true,
80 "tbody": true,
81 "td": true,
82 "template": true,
83 "textarea": true,
84 "tfoot": true,
85 "th": true,
86 "thead": true,
87 "title": true,
88 "tr": true,
89 "track": true,
90 "ul": true,
91 "wbr": true,
92 "xmp": true,
93}
94
95func isSpecialElement(element *Node) bool {
96 switch element.Namespace {
97 case "", "html":
98 return isSpecialElementMap[element.Data]
99 case "math":
100 switch element.Data {
101 case "mi", "mo", "mn", "ms", "mtext", "annotation-xml":
102 return true
103 }
104 case "svg":
105 switch element.Data {
106 case "foreignObject", "desc", "title":
107 return true
108 }
109 }
110 return false
111}
diff --git a/vendor/golang.org/x/net/html/doc.go b/vendor/golang.org/x/net/html/doc.go
new file mode 100644
index 0000000..2466ae3
--- /dev/null
+++ b/vendor/golang.org/x/net/html/doc.go
@@ -0,0 +1,127 @@
1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5/*
6Package html implements an HTML5-compliant tokenizer and parser.
7
8Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
9caller's responsibility to ensure that r provides UTF-8 encoded HTML.
10
11 z := html.NewTokenizer(r)
12
13Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
14which parses the next token and returns its type, or an error:
15
16 for {
17 tt := z.Next()
18 if tt == html.ErrorToken {
19 // ...
20 return ...
21 }
22 // Process the current token.
23 }
24
25There are two APIs for retrieving the current token. The high-level API is to
26call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
27allow optionally calling Raw after Next but before Token, Text, TagName, or
28TagAttr. In EBNF notation, the valid call sequence per token is:
29
30 Next {Raw} [ Token | Text | TagName {TagAttr} ]
31
32Token returns an independent data structure that completely describes a token.
33Entities (such as "&lt;") are unescaped, tag names and attribute keys are
34lower-cased, and attributes are collected into a []Attribute. For example:
35
36 for {
37 if z.Next() == html.ErrorToken {
38 // Returning io.EOF indicates success.
39 return z.Err()
40 }
41 emitToken(z.Token())
42 }
43
44The low-level API performs fewer allocations and copies, but the contents of
45the []byte values returned by Text, TagName and TagAttr may change on the next
46call to Next. For example, to extract an HTML page's anchor text:
47
48 depth := 0
49 for {
50 tt := z.Next()
51 switch tt {
52 case html.ErrorToken:
53 return z.Err()
54 case html.TextToken:
55 if depth > 0 {
56 // emitBytes should copy the []byte it receives,
57 // if it doesn't process it immediately.
58 emitBytes(z.Text())
59 }
60 case html.StartTagToken, html.EndTagToken:
61 tn, _ := z.TagName()
62 if len(tn) == 1 && tn[0] == 'a' {
63 if tt == html.StartTagToken {
64 depth++
65 } else {
66 depth--
67 }
68 }
69 }
70 }
71
72Parsing is done by calling Parse with an io.Reader, which returns the root of
73the parse tree (the document element) as a *Node. It is the caller's
74responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
75example, to process each anchor node in depth-first order:
76
77 doc, err := html.Parse(r)
78 if err != nil {
79 // ...
80 }
81 var f func(*html.Node)
82 f = func(n *html.Node) {
83 if n.Type == html.ElementNode && n.Data == "a" {
84 // Do something with n...
85 }
86 for c := n.FirstChild; c != nil; c = c.NextSibling {
87 f(c)
88 }
89 }
90 f(doc)
91
92The relevant specifications include:
93https://html.spec.whatwg.org/multipage/syntax.html and
94https://html.spec.whatwg.org/multipage/syntax.html#tokenization
95
96# Security Considerations
97
98Care should be taken when parsing and interpreting HTML, whether full documents
99or fragments, within the framework of the HTML specification, especially with
100regard to untrusted inputs.
101
102This package provides both a tokenizer and a parser, which implement the
103tokenization, and tokenization and tree construction stages of the WHATWG HTML
104parsing specification respectively. While the tokenizer parses and normalizes
105individual HTML tokens, only the parser constructs the DOM tree from the
106tokenized HTML, as described in the tree construction stage of the
107specification, dynamically modifying or extending the docuemnt's DOM tree.
108
109If your use case requires semantically well-formed HTML documents, as defined by
110the WHATWG specification, the parser should be used rather than the tokenizer.
111
112In security contexts, if trust decisions are being made using the tokenized or
113parsed content, the input must be re-serialized (for instance by using Render or
114Token.String) in order for those trust decisions to hold, as the process of
115tokenization or parsing may alter the content.
116*/
117package html // import "golang.org/x/net/html"
118
119// The tokenization algorithm implemented by this package is not a line-by-line
120// transliteration of the relatively verbose state-machine in the WHATWG
121// specification. A more direct approach is used instead, where the program
122// counter implies the state, such as whether it is tokenizing a tag or a text
123// node. Specification compliance is verified by checking expected and actual
124// outputs over a test suite rather than aiming for algorithmic fidelity.
125
126// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
127// TODO(nigeltao): How does parsing interact with a JavaScript engine?
diff --git a/vendor/golang.org/x/net/html/doctype.go b/vendor/golang.org/x/net/html/doctype.go
new file mode 100644
index 0000000..c484e5a
--- /dev/null
+++ b/vendor/golang.org/x/net/html/doctype.go
@@ -0,0 +1,156 @@
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "strings"
9)
10
11// parseDoctype parses the data from a DoctypeToken into a name,
12// public identifier, and system identifier. It returns a Node whose Type
13// is DoctypeNode, whose Data is the name, and which has attributes
14// named "system" and "public" for the two identifiers if they were present.
15// quirks is whether the document should be parsed in "quirks mode".
16func parseDoctype(s string) (n *Node, quirks bool) {
17 n = &Node{Type: DoctypeNode}
18
19 // Find the name.
20 space := strings.IndexAny(s, whitespace)
21 if space == -1 {
22 space = len(s)
23 }
24 n.Data = s[:space]
25 // The comparison to "html" is case-sensitive.
26 if n.Data != "html" {
27 quirks = true
28 }
29 n.Data = strings.ToLower(n.Data)
30 s = strings.TrimLeft(s[space:], whitespace)
31
32 if len(s) < 6 {
33 // It can't start with "PUBLIC" or "SYSTEM".
34 // Ignore the rest of the string.
35 return n, quirks || s != ""
36 }
37
38 key := strings.ToLower(s[:6])
39 s = s[6:]
40 for key == "public" || key == "system" {
41 s = strings.TrimLeft(s, whitespace)
42 if s == "" {
43 break
44 }
45 quote := s[0]
46 if quote != '"' && quote != '\'' {
47 break
48 }
49 s = s[1:]
50 q := strings.IndexRune(s, rune(quote))
51 var id string
52 if q == -1 {
53 id = s
54 s = ""
55 } else {
56 id = s[:q]
57 s = s[q+1:]
58 }
59 n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
60 if key == "public" {
61 key = "system"
62 } else {
63 key = ""
64 }
65 }
66
67 if key != "" || s != "" {
68 quirks = true
69 } else if len(n.Attr) > 0 {
70 if n.Attr[0].Key == "public" {
71 public := strings.ToLower(n.Attr[0].Val)
72 switch public {
73 case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
74 quirks = true
75 default:
76 for _, q := range quirkyIDs {
77 if strings.HasPrefix(public, q) {
78 quirks = true
79 break
80 }
81 }
82 }
83 // The following two public IDs only cause quirks mode if there is no system ID.
84 if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
85 strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
86 quirks = true
87 }
88 }
89 if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
90 strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
91 quirks = true
92 }
93 }
94
95 return n, quirks
96}
97
98// quirkyIDs is a list of public doctype identifiers that cause a document
99// to be interpreted in quirks mode. The identifiers should be in lower case.
100var quirkyIDs = []string{
101 "+//silmaril//dtd html pro v0r11 19970101//",
102 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
103 "-//as//dtd html 3.0 aswedit + extensions//",
104 "-//ietf//dtd html 2.0 level 1//",
105 "-//ietf//dtd html 2.0 level 2//",
106 "-//ietf//dtd html 2.0 strict level 1//",
107 "-//ietf//dtd html 2.0 strict level 2//",
108 "-//ietf//dtd html 2.0 strict//",
109 "-//ietf//dtd html 2.0//",
110 "-//ietf//dtd html 2.1e//",
111 "-//ietf//dtd html 3.0//",
112 "-//ietf//dtd html 3.2 final//",
113 "-//ietf//dtd html 3.2//",
114 "-//ietf//dtd html 3//",
115 "-//ietf//dtd html level 0//",
116 "-//ietf//dtd html level 1//",
117 "-//ietf//dtd html level 2//",
118 "-//ietf//dtd html level 3//",
119 "-//ietf//dtd html strict level 0//",
120 "-//ietf//dtd html strict level 1//",
121 "-//ietf//dtd html strict level 2//",
122 "-//ietf//dtd html strict level 3//",
123 "-//ietf//dtd html strict//",
124 "-//ietf//dtd html//",
125 "-//metrius//dtd metrius presentational//",
126 "-//microsoft//dtd internet explorer 2.0 html strict//",
127 "-//microsoft//dtd internet explorer 2.0 html//",
128 "-//microsoft//dtd internet explorer 2.0 tables//",
129 "-//microsoft//dtd internet explorer 3.0 html strict//",
130 "-//microsoft//dtd internet explorer 3.0 html//",
131 "-//microsoft//dtd internet explorer 3.0 tables//",
132 "-//netscape comm. corp.//dtd html//",
133 "-//netscape comm. corp.//dtd strict html//",
134 "-//o'reilly and associates//dtd html 2.0//",
135 "-//o'reilly and associates//dtd html extended 1.0//",
136 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
137 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
138 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
139 "-//spyglass//dtd html 2.0 extended//",
140 "-//sq//dtd html 2.0 hotmetal + extensions//",
141 "-//sun microsystems corp.//dtd hotjava html//",
142 "-//sun microsystems corp.//dtd hotjava strict html//",
143 "-//w3c//dtd html 3 1995-03-24//",
144 "-//w3c//dtd html 3.2 draft//",
145 "-//w3c//dtd html 3.2 final//",
146 "-//w3c//dtd html 3.2//",
147 "-//w3c//dtd html 3.2s draft//",
148 "-//w3c//dtd html 4.0 frameset//",
149 "-//w3c//dtd html 4.0 transitional//",
150 "-//w3c//dtd html experimental 19960712//",
151 "-//w3c//dtd html experimental 970421//",
152 "-//w3c//dtd w3 html//",
153 "-//w3o//dtd w3 html 3.0//",
154 "-//webtechs//dtd mozilla html 2.0//",
155 "-//webtechs//dtd mozilla html//",
156}
diff --git a/vendor/golang.org/x/net/html/entity.go b/vendor/golang.org/x/net/html/entity.go
new file mode 100644
index 0000000..b628880
--- /dev/null
+++ b/vendor/golang.org/x/net/html/entity.go
@@ -0,0 +1,2253 @@
1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7// All entities that do not end with ';' are 6 or fewer bytes long.
8const longestEntityWithoutSemicolon = 6
9
10// entity is a map from HTML entity names to their values. The semicolon matters:
11// https://html.spec.whatwg.org/multipage/syntax.html#named-character-references
12// lists both "amp" and "amp;" as two separate entries.
13//
14// Note that the HTML5 list is larger than the HTML4 list at
15// http://www.w3.org/TR/html4/sgml/entities.html
16var entity = map[string]rune{
17 "AElig;": '\U000000C6',
18 "AMP;": '\U00000026',
19 "Aacute;": '\U000000C1',
20 "Abreve;": '\U00000102',
21 "Acirc;": '\U000000C2',
22 "Acy;": '\U00000410',
23 "Afr;": '\U0001D504',
24 "Agrave;": '\U000000C0',
25 "Alpha;": '\U00000391',
26 "Amacr;": '\U00000100',
27 "And;": '\U00002A53',
28 "Aogon;": '\U00000104',
29 "Aopf;": '\U0001D538',
30 "ApplyFunction;": '\U00002061',
31 "Aring;": '\U000000C5',
32 "Ascr;": '\U0001D49C',
33 "Assign;": '\U00002254',
34 "Atilde;": '\U000000C3',
35 "Auml;": '\U000000C4',
36 "Backslash;": '\U00002216',
37 "Barv;": '\U00002AE7',
38 "Barwed;": '\U00002306',
39 "Bcy;": '\U00000411',
40 "Because;": '\U00002235',
41 "Bernoullis;": '\U0000212C',
42 "Beta;": '\U00000392',
43 "Bfr;": '\U0001D505',
44 "Bopf;": '\U0001D539',
45 "Breve;": '\U000002D8',
46 "Bscr;": '\U0000212C',
47 "Bumpeq;": '\U0000224E',
48 "CHcy;": '\U00000427',
49 "COPY;": '\U000000A9',
50 "Cacute;": '\U00000106',
51 "Cap;": '\U000022D2',
52 "CapitalDifferentialD;": '\U00002145',
53 "Cayleys;": '\U0000212D',
54 "Ccaron;": '\U0000010C',
55 "Ccedil;": '\U000000C7',
56 "Ccirc;": '\U00000108',
57 "Cconint;": '\U00002230',
58 "Cdot;": '\U0000010A',
59 "Cedilla;": '\U000000B8',
60 "CenterDot;": '\U000000B7',
61 "Cfr;": '\U0000212D',
62 "Chi;": '\U000003A7',
63 "CircleDot;": '\U00002299',
64 "CircleMinus;": '\U00002296',
65 "CirclePlus;": '\U00002295',
66 "CircleTimes;": '\U00002297',
67 "ClockwiseContourIntegral;": '\U00002232',
68 "CloseCurlyDoubleQuote;": '\U0000201D',
69 "CloseCurlyQuote;": '\U00002019',
70 "Colon;": '\U00002237',
71 "Colone;": '\U00002A74',
72 "Congruent;": '\U00002261',
73 "Conint;": '\U0000222F',
74 "ContourIntegral;": '\U0000222E',
75 "Copf;": '\U00002102',
76 "Coproduct;": '\U00002210',
77 "CounterClockwiseContourIntegral;": '\U00002233',
78 "Cross;": '\U00002A2F',
79 "Cscr;": '\U0001D49E',
80 "Cup;": '\U000022D3',
81 "CupCap;": '\U0000224D',
82 "DD;": '\U00002145',
83 "DDotrahd;": '\U00002911',
84 "DJcy;": '\U00000402',
85 "DScy;": '\U00000405',
86 "DZcy;": '\U0000040F',
87 "Dagger;": '\U00002021',
88 "Darr;": '\U000021A1',
89 "Dashv;": '\U00002AE4',
90 "Dcaron;": '\U0000010E',
91 "Dcy;": '\U00000414',
92 "Del;": '\U00002207',
93 "Delta;": '\U00000394',
94 "Dfr;": '\U0001D507',
95 "DiacriticalAcute;": '\U000000B4',
96 "DiacriticalDot;": '\U000002D9',
97 "DiacriticalDoubleAcute;": '\U000002DD',
98 "DiacriticalGrave;": '\U00000060',
99 "DiacriticalTilde;": '\U000002DC',
100 "Diamond;": '\U000022C4',
101 "DifferentialD;": '\U00002146',
102 "Dopf;": '\U0001D53B',
103 "Dot;": '\U000000A8',
104 "DotDot;": '\U000020DC',
105 "DotEqual;": '\U00002250',
106 "DoubleContourIntegral;": '\U0000222F',
107 "DoubleDot;": '\U000000A8',
108 "DoubleDownArrow;": '\U000021D3',
109 "DoubleLeftArrow;": '\U000021D0',
110 "DoubleLeftRightArrow;": '\U000021D4',
111 "DoubleLeftTee;": '\U00002AE4',
112 "DoubleLongLeftArrow;": '\U000027F8',
113 "DoubleLongLeftRightArrow;": '\U000027FA',
114 "DoubleLongRightArrow;": '\U000027F9',
115 "DoubleRightArrow;": '\U000021D2',
116 "DoubleRightTee;": '\U000022A8',
117 "DoubleUpArrow;": '\U000021D1',
118 "DoubleUpDownArrow;": '\U000021D5',
119 "DoubleVerticalBar;": '\U00002225',
120 "DownArrow;": '\U00002193',
121 "DownArrowBar;": '\U00002913',
122 "DownArrowUpArrow;": '\U000021F5',
123 "DownBreve;": '\U00000311',
124 "DownLeftRightVector;": '\U00002950',
125 "DownLeftTeeVector;": '\U0000295E',
126 "DownLeftVector;": '\U000021BD',
127 "DownLeftVectorBar;": '\U00002956',
128 "DownRightTeeVector;": '\U0000295F',
129 "DownRightVector;": '\U000021C1',
130 "DownRightVectorBar;": '\U00002957',
131 "DownTee;": '\U000022A4',
132 "DownTeeArrow;": '\U000021A7',
133 "Downarrow;": '\U000021D3',
134 "Dscr;": '\U0001D49F',
135 "Dstrok;": '\U00000110',
136 "ENG;": '\U0000014A',
137 "ETH;": '\U000000D0',
138 "Eacute;": '\U000000C9',
139 "Ecaron;": '\U0000011A',
140 "Ecirc;": '\U000000CA',
141 "Ecy;": '\U0000042D',
142 "Edot;": '\U00000116',
143 "Efr;": '\U0001D508',
144 "Egrave;": '\U000000C8',
145 "Element;": '\U00002208',
146 "Emacr;": '\U00000112',
147 "EmptySmallSquare;": '\U000025FB',
148 "EmptyVerySmallSquare;": '\U000025AB',
149 "Eogon;": '\U00000118',
150 "Eopf;": '\U0001D53C',
151 "Epsilon;": '\U00000395',
152 "Equal;": '\U00002A75',
153 "EqualTilde;": '\U00002242',
154 "Equilibrium;": '\U000021CC',
155 "Escr;": '\U00002130',
156 "Esim;": '\U00002A73',
157 "Eta;": '\U00000397',
158 "Euml;": '\U000000CB',
159 "Exists;": '\U00002203',
160 "ExponentialE;": '\U00002147',
161 "Fcy;": '\U00000424',
162 "Ffr;": '\U0001D509',
163 "FilledSmallSquare;": '\U000025FC',
164 "FilledVerySmallSquare;": '\U000025AA',
165 "Fopf;": '\U0001D53D',
166 "ForAll;": '\U00002200',
167 "Fouriertrf;": '\U00002131',
168 "Fscr;": '\U00002131',
169 "GJcy;": '\U00000403',
170 "GT;": '\U0000003E',
171 "Gamma;": '\U00000393',
172 "Gammad;": '\U000003DC',
173 "Gbreve;": '\U0000011E',
174 "Gcedil;": '\U00000122',
175 "Gcirc;": '\U0000011C',
176 "Gcy;": '\U00000413',
177 "Gdot;": '\U00000120',
178 "Gfr;": '\U0001D50A',
179 "Gg;": '\U000022D9',
180 "Gopf;": '\U0001D53E',
181 "GreaterEqual;": '\U00002265',
182 "GreaterEqualLess;": '\U000022DB',
183 "GreaterFullEqual;": '\U00002267',
184 "GreaterGreater;": '\U00002AA2',
185 "GreaterLess;": '\U00002277',
186 "GreaterSlantEqual;": '\U00002A7E',
187 "GreaterTilde;": '\U00002273',
188 "Gscr;": '\U0001D4A2',
189 "Gt;": '\U0000226B',
190 "HARDcy;": '\U0000042A',
191 "Hacek;": '\U000002C7',
192 "Hat;": '\U0000005E',
193 "Hcirc;": '\U00000124',
194 "Hfr;": '\U0000210C',
195 "HilbertSpace;": '\U0000210B',
196 "Hopf;": '\U0000210D',
197 "HorizontalLine;": '\U00002500',
198 "Hscr;": '\U0000210B',
199 "Hstrok;": '\U00000126',
200 "HumpDownHump;": '\U0000224E',
201 "HumpEqual;": '\U0000224F',
202 "IEcy;": '\U00000415',
203 "IJlig;": '\U00000132',
204 "IOcy;": '\U00000401',
205 "Iacute;": '\U000000CD',
206 "Icirc;": '\U000000CE',
207 "Icy;": '\U00000418',
208 "Idot;": '\U00000130',
209 "Ifr;": '\U00002111',
210 "Igrave;": '\U000000CC',
211 "Im;": '\U00002111',
212 "Imacr;": '\U0000012A',
213 "ImaginaryI;": '\U00002148',
214 "Implies;": '\U000021D2',
215 "Int;": '\U0000222C',
216 "Integral;": '\U0000222B',
217 "Intersection;": '\U000022C2',
218 "InvisibleComma;": '\U00002063',
219 "InvisibleTimes;": '\U00002062',
220 "Iogon;": '\U0000012E',
221 "Iopf;": '\U0001D540',
222 "Iota;": '\U00000399',
223 "Iscr;": '\U00002110',
224 "Itilde;": '\U00000128',
225 "Iukcy;": '\U00000406',
226 "Iuml;": '\U000000CF',
227 "Jcirc;": '\U00000134',
228 "Jcy;": '\U00000419',
229 "Jfr;": '\U0001D50D',
230 "Jopf;": '\U0001D541',
231 "Jscr;": '\U0001D4A5',
232 "Jsercy;": '\U00000408',
233 "Jukcy;": '\U00000404',
234 "KHcy;": '\U00000425',
235 "KJcy;": '\U0000040C',
236 "Kappa;": '\U0000039A',
237 "Kcedil;": '\U00000136',
238 "Kcy;": '\U0000041A',
239 "Kfr;": '\U0001D50E',
240 "Kopf;": '\U0001D542',
241 "Kscr;": '\U0001D4A6',
242 "LJcy;": '\U00000409',
243 "LT;": '\U0000003C',
244 "Lacute;": '\U00000139',
245 "Lambda;": '\U0000039B',
246 "Lang;": '\U000027EA',
247 "Laplacetrf;": '\U00002112',
248 "Larr;": '\U0000219E',
249 "Lcaron;": '\U0000013D',
250 "Lcedil;": '\U0000013B',
251 "Lcy;": '\U0000041B',
252 "LeftAngleBracket;": '\U000027E8',
253 "LeftArrow;": '\U00002190',
254 "LeftArrowBar;": '\U000021E4',
255 "LeftArrowRightArrow;": '\U000021C6',
256 "LeftCeiling;": '\U00002308',
257 "LeftDoubleBracket;": '\U000027E6',
258 "LeftDownTeeVector;": '\U00002961',
259 "LeftDownVector;": '\U000021C3',
260 "LeftDownVectorBar;": '\U00002959',
261 "LeftFloor;": '\U0000230A',
262 "LeftRightArrow;": '\U00002194',
263 "LeftRightVector;": '\U0000294E',
264 "LeftTee;": '\U000022A3',
265 "LeftTeeArrow;": '\U000021A4',
266 "LeftTeeVector;": '\U0000295A',
267 "LeftTriangle;": '\U000022B2',
268 "LeftTriangleBar;": '\U000029CF',
269 "LeftTriangleEqual;": '\U000022B4',
270 "LeftUpDownVector;": '\U00002951',
271 "LeftUpTeeVector;": '\U00002960',
272 "LeftUpVector;": '\U000021BF',
273 "LeftUpVectorBar;": '\U00002958',
274 "LeftVector;": '\U000021BC',
275 "LeftVectorBar;": '\U00002952',
276 "Leftarrow;": '\U000021D0',
277 "Leftrightarrow;": '\U000021D4',
278 "LessEqualGreater;": '\U000022DA',
279 "LessFullEqual;": '\U00002266',
280 "LessGreater;": '\U00002276',
281 "LessLess;": '\U00002AA1',
282 "LessSlantEqual;": '\U00002A7D',
283 "LessTilde;": '\U00002272',
284 "Lfr;": '\U0001D50F',
285 "Ll;": '\U000022D8',
286 "Lleftarrow;": '\U000021DA',
287 "Lmidot;": '\U0000013F',
288 "LongLeftArrow;": '\U000027F5',
289 "LongLeftRightArrow;": '\U000027F7',
290 "LongRightArrow;": '\U000027F6',
291 "Longleftarrow;": '\U000027F8',
292 "Longleftrightarrow;": '\U000027FA',
293 "Longrightarrow;": '\U000027F9',
294 "Lopf;": '\U0001D543',
295 "LowerLeftArrow;": '\U00002199',
296 "LowerRightArrow;": '\U00002198',
297 "Lscr;": '\U00002112',
298 "Lsh;": '\U000021B0',
299 "Lstrok;": '\U00000141',
300 "Lt;": '\U0000226A',
301 "Map;": '\U00002905',
302 "Mcy;": '\U0000041C',
303 "MediumSpace;": '\U0000205F',
304 "Mellintrf;": '\U00002133',
305 "Mfr;": '\U0001D510',
306 "MinusPlus;": '\U00002213',
307 "Mopf;": '\U0001D544',
308 "Mscr;": '\U00002133',
309 "Mu;": '\U0000039C',
310 "NJcy;": '\U0000040A',
311 "Nacute;": '\U00000143',
312 "Ncaron;": '\U00000147',
313 "Ncedil;": '\U00000145',
314 "Ncy;": '\U0000041D',
315 "NegativeMediumSpace;": '\U0000200B',
316 "NegativeThickSpace;": '\U0000200B',
317 "NegativeThinSpace;": '\U0000200B',
318 "NegativeVeryThinSpace;": '\U0000200B',
319 "NestedGreaterGreater;": '\U0000226B',
320 "NestedLessLess;": '\U0000226A',
321 "NewLine;": '\U0000000A',
322 "Nfr;": '\U0001D511',
323 "NoBreak;": '\U00002060',
324 "NonBreakingSpace;": '\U000000A0',
325 "Nopf;": '\U00002115',
326 "Not;": '\U00002AEC',
327 "NotCongruent;": '\U00002262',
328 "NotCupCap;": '\U0000226D',
329 "NotDoubleVerticalBar;": '\U00002226',
330 "NotElement;": '\U00002209',
331 "NotEqual;": '\U00002260',
332 "NotExists;": '\U00002204',
333 "NotGreater;": '\U0000226F',
334 "NotGreaterEqual;": '\U00002271',
335 "NotGreaterLess;": '\U00002279',
336 "NotGreaterTilde;": '\U00002275',
337 "NotLeftTriangle;": '\U000022EA',
338 "NotLeftTriangleEqual;": '\U000022EC',
339 "NotLess;": '\U0000226E',
340 "NotLessEqual;": '\U00002270',
341 "NotLessGreater;": '\U00002278',
342 "NotLessTilde;": '\U00002274',
343 "NotPrecedes;": '\U00002280',
344 "NotPrecedesSlantEqual;": '\U000022E0',
345 "NotReverseElement;": '\U0000220C',
346 "NotRightTriangle;": '\U000022EB',
347 "NotRightTriangleEqual;": '\U000022ED',
348 "NotSquareSubsetEqual;": '\U000022E2',
349 "NotSquareSupersetEqual;": '\U000022E3',
350 "NotSubsetEqual;": '\U00002288',
351 "NotSucceeds;": '\U00002281',
352 "NotSucceedsSlantEqual;": '\U000022E1',
353 "NotSupersetEqual;": '\U00002289',
354 "NotTilde;": '\U00002241',
355 "NotTildeEqual;": '\U00002244',
356 "NotTildeFullEqual;": '\U00002247',
357 "NotTildeTilde;": '\U00002249',
358 "NotVerticalBar;": '\U00002224',
359 "Nscr;": '\U0001D4A9',
360 "Ntilde;": '\U000000D1',
361 "Nu;": '\U0000039D',
362 "OElig;": '\U00000152',
363 "Oacute;": '\U000000D3',
364 "Ocirc;": '\U000000D4',
365 "Ocy;": '\U0000041E',
366 "Odblac;": '\U00000150',
367 "Ofr;": '\U0001D512',
368 "Ograve;": '\U000000D2',
369 "Omacr;": '\U0000014C',
370 "Omega;": '\U000003A9',
371 "Omicron;": '\U0000039F',
372 "Oopf;": '\U0001D546',
373 "OpenCurlyDoubleQuote;": '\U0000201C',
374 "OpenCurlyQuote;": '\U00002018',
375 "Or;": '\U00002A54',
376 "Oscr;": '\U0001D4AA',
377 "Oslash;": '\U000000D8',
378 "Otilde;": '\U000000D5',
379 "Otimes;": '\U00002A37',
380 "Ouml;": '\U000000D6',
381 "OverBar;": '\U0000203E',
382 "OverBrace;": '\U000023DE',
383 "OverBracket;": '\U000023B4',
384 "OverParenthesis;": '\U000023DC',
385 "PartialD;": '\U00002202',
386 "Pcy;": '\U0000041F',
387 "Pfr;": '\U0001D513',
388 "Phi;": '\U000003A6',
389 "Pi;": '\U000003A0',
390 "PlusMinus;": '\U000000B1',
391 "Poincareplane;": '\U0000210C',
392 "Popf;": '\U00002119',
393 "Pr;": '\U00002ABB',
394 "Precedes;": '\U0000227A',
395 "PrecedesEqual;": '\U00002AAF',
396 "PrecedesSlantEqual;": '\U0000227C',
397 "PrecedesTilde;": '\U0000227E',
398 "Prime;": '\U00002033',
399 "Product;": '\U0000220F',
400 "Proportion;": '\U00002237',
401 "Proportional;": '\U0000221D',
402 "Pscr;": '\U0001D4AB',
403 "Psi;": '\U000003A8',
404 "QUOT;": '\U00000022',
405 "Qfr;": '\U0001D514',
406 "Qopf;": '\U0000211A',
407 "Qscr;": '\U0001D4AC',
408 "RBarr;": '\U00002910',
409 "REG;": '\U000000AE',
410 "Racute;": '\U00000154',
411 "Rang;": '\U000027EB',
412 "Rarr;": '\U000021A0',
413 "Rarrtl;": '\U00002916',
414 "Rcaron;": '\U00000158',
415 "Rcedil;": '\U00000156',
416 "Rcy;": '\U00000420',
417 "Re;": '\U0000211C',
418 "ReverseElement;": '\U0000220B',
419 "ReverseEquilibrium;": '\U000021CB',
420 "ReverseUpEquilibrium;": '\U0000296F',
421 "Rfr;": '\U0000211C',
422 "Rho;": '\U000003A1',
423 "RightAngleBracket;": '\U000027E9',
424 "RightArrow;": '\U00002192',
425 "RightArrowBar;": '\U000021E5',
426 "RightArrowLeftArrow;": '\U000021C4',
427 "RightCeiling;": '\U00002309',
428 "RightDoubleBracket;": '\U000027E7',
429 "RightDownTeeVector;": '\U0000295D',
430 "RightDownVector;": '\U000021C2',
431 "RightDownVectorBar;": '\U00002955',
432 "RightFloor;": '\U0000230B',
433 "RightTee;": '\U000022A2',
434 "RightTeeArrow;": '\U000021A6',
435 "RightTeeVector;": '\U0000295B',
436 "RightTriangle;": '\U000022B3',
437 "RightTriangleBar;": '\U000029D0',
438 "RightTriangleEqual;": '\U000022B5',
439 "RightUpDownVector;": '\U0000294F',
440 "RightUpTeeVector;": '\U0000295C',
441 "RightUpVector;": '\U000021BE',
442 "RightUpVectorBar;": '\U00002954',
443 "RightVector;": '\U000021C0',
444 "RightVectorBar;": '\U00002953',
445 "Rightarrow;": '\U000021D2',
446 "Ropf;": '\U0000211D',
447 "RoundImplies;": '\U00002970',
448 "Rrightarrow;": '\U000021DB',
449 "Rscr;": '\U0000211B',
450 "Rsh;": '\U000021B1',
451 "RuleDelayed;": '\U000029F4',
452 "SHCHcy;": '\U00000429',
453 "SHcy;": '\U00000428',
454 "SOFTcy;": '\U0000042C',
455 "Sacute;": '\U0000015A',
456 "Sc;": '\U00002ABC',
457 "Scaron;": '\U00000160',
458 "Scedil;": '\U0000015E',
459 "Scirc;": '\U0000015C',
460 "Scy;": '\U00000421',
461 "Sfr;": '\U0001D516',
462 "ShortDownArrow;": '\U00002193',
463 "ShortLeftArrow;": '\U00002190',
464 "ShortRightArrow;": '\U00002192',
465 "ShortUpArrow;": '\U00002191',
466 "Sigma;": '\U000003A3',
467 "SmallCircle;": '\U00002218',
468 "Sopf;": '\U0001D54A',
469 "Sqrt;": '\U0000221A',
470 "Square;": '\U000025A1',
471 "SquareIntersection;": '\U00002293',
472 "SquareSubset;": '\U0000228F',
473 "SquareSubsetEqual;": '\U00002291',
474 "SquareSuperset;": '\U00002290',
475 "SquareSupersetEqual;": '\U00002292',
476 "SquareUnion;": '\U00002294',
477 "Sscr;": '\U0001D4AE',
478 "Star;": '\U000022C6',
479 "Sub;": '\U000022D0',
480 "Subset;": '\U000022D0',
481 "SubsetEqual;": '\U00002286',
482 "Succeeds;": '\U0000227B',
483 "SucceedsEqual;": '\U00002AB0',
484 "SucceedsSlantEqual;": '\U0000227D',
485 "SucceedsTilde;": '\U0000227F',
486 "SuchThat;": '\U0000220B',
487 "Sum;": '\U00002211',
488 "Sup;": '\U000022D1',
489 "Superset;": '\U00002283',
490 "SupersetEqual;": '\U00002287',
491 "Supset;": '\U000022D1',
492 "THORN;": '\U000000DE',
493 "TRADE;": '\U00002122',
494 "TSHcy;": '\U0000040B',
495 "TScy;": '\U00000426',
496 "Tab;": '\U00000009',
497 "Tau;": '\U000003A4',
498 "Tcaron;": '\U00000164',
499 "Tcedil;": '\U00000162',
500 "Tcy;": '\U00000422',
501 "Tfr;": '\U0001D517',
502 "Therefore;": '\U00002234',
503 "Theta;": '\U00000398',
504 "ThinSpace;": '\U00002009',
505 "Tilde;": '\U0000223C',
506 "TildeEqual;": '\U00002243',
507 "TildeFullEqual;": '\U00002245',
508 "TildeTilde;": '\U00002248',
509 "Topf;": '\U0001D54B',
510 "TripleDot;": '\U000020DB',
511 "Tscr;": '\U0001D4AF',
512 "Tstrok;": '\U00000166',
513 "Uacute;": '\U000000DA',
514 "Uarr;": '\U0000219F',
515 "Uarrocir;": '\U00002949',
516 "Ubrcy;": '\U0000040E',
517 "Ubreve;": '\U0000016C',
518 "Ucirc;": '\U000000DB',
519 "Ucy;": '\U00000423',
520 "Udblac;": '\U00000170',
521 "Ufr;": '\U0001D518',
522 "Ugrave;": '\U000000D9',
523 "Umacr;": '\U0000016A',
524 "UnderBar;": '\U0000005F',
525 "UnderBrace;": '\U000023DF',
526 "UnderBracket;": '\U000023B5',
527 "UnderParenthesis;": '\U000023DD',
528 "Union;": '\U000022C3',
529 "UnionPlus;": '\U0000228E',
530 "Uogon;": '\U00000172',
531 "Uopf;": '\U0001D54C',
532 "UpArrow;": '\U00002191',
533 "UpArrowBar;": '\U00002912',
534 "UpArrowDownArrow;": '\U000021C5',
535 "UpDownArrow;": '\U00002195',
536 "UpEquilibrium;": '\U0000296E',
537 "UpTee;": '\U000022A5',
538 "UpTeeArrow;": '\U000021A5',
539 "Uparrow;": '\U000021D1',
540 "Updownarrow;": '\U000021D5',
541 "UpperLeftArrow;": '\U00002196',
542 "UpperRightArrow;": '\U00002197',
543 "Upsi;": '\U000003D2',
544 "Upsilon;": '\U000003A5',
545 "Uring;": '\U0000016E',
546 "Uscr;": '\U0001D4B0',
547 "Utilde;": '\U00000168',
548 "Uuml;": '\U000000DC',
549 "VDash;": '\U000022AB',
550 "Vbar;": '\U00002AEB',
551 "Vcy;": '\U00000412',
552 "Vdash;": '\U000022A9',
553 "Vdashl;": '\U00002AE6',
554 "Vee;": '\U000022C1',
555 "Verbar;": '\U00002016',
556 "Vert;": '\U00002016',
557 "VerticalBar;": '\U00002223',
558 "VerticalLine;": '\U0000007C',
559 "VerticalSeparator;": '\U00002758',
560 "VerticalTilde;": '\U00002240',
561 "VeryThinSpace;": '\U0000200A',
562 "Vfr;": '\U0001D519',
563 "Vopf;": '\U0001D54D',
564 "Vscr;": '\U0001D4B1',
565 "Vvdash;": '\U000022AA',
566 "Wcirc;": '\U00000174',
567 "Wedge;": '\U000022C0',
568 "Wfr;": '\U0001D51A',
569 "Wopf;": '\U0001D54E',
570 "Wscr;": '\U0001D4B2',
571 "Xfr;": '\U0001D51B',
572 "Xi;": '\U0000039E',
573 "Xopf;": '\U0001D54F',
574 "Xscr;": '\U0001D4B3',
575 "YAcy;": '\U0000042F',
576 "YIcy;": '\U00000407',
577 "YUcy;": '\U0000042E',
578 "Yacute;": '\U000000DD',
579 "Ycirc;": '\U00000176',
580 "Ycy;": '\U0000042B',
581 "Yfr;": '\U0001D51C',
582 "Yopf;": '\U0001D550',
583 "Yscr;": '\U0001D4B4',
584 "Yuml;": '\U00000178',
585 "ZHcy;": '\U00000416',
586 "Zacute;": '\U00000179',
587 "Zcaron;": '\U0000017D',
588 "Zcy;": '\U00000417',
589 "Zdot;": '\U0000017B',
590 "ZeroWidthSpace;": '\U0000200B',
591 "Zeta;": '\U00000396',
592 "Zfr;": '\U00002128',
593 "Zopf;": '\U00002124',
594 "Zscr;": '\U0001D4B5',
595 "aacute;": '\U000000E1',
596 "abreve;": '\U00000103',
597 "ac;": '\U0000223E',
598 "acd;": '\U0000223F',
599 "acirc;": '\U000000E2',
600 "acute;": '\U000000B4',
601 "acy;": '\U00000430',
602 "aelig;": '\U000000E6',
603 "af;": '\U00002061',
604 "afr;": '\U0001D51E',
605 "agrave;": '\U000000E0',
606 "alefsym;": '\U00002135',
607 "aleph;": '\U00002135',
608 "alpha;": '\U000003B1',
609 "amacr;": '\U00000101',
610 "amalg;": '\U00002A3F',
611 "amp;": '\U00000026',
612 "and;": '\U00002227',
613 "andand;": '\U00002A55',
614 "andd;": '\U00002A5C',
615 "andslope;": '\U00002A58',
616 "andv;": '\U00002A5A',
617 "ang;": '\U00002220',
618 "ange;": '\U000029A4',
619 "angle;": '\U00002220',
620 "angmsd;": '\U00002221',
621 "angmsdaa;": '\U000029A8',
622 "angmsdab;": '\U000029A9',
623 "angmsdac;": '\U000029AA',
624 "angmsdad;": '\U000029AB',
625 "angmsdae;": '\U000029AC',
626 "angmsdaf;": '\U000029AD',
627 "angmsdag;": '\U000029AE',
628 "angmsdah;": '\U000029AF',
629 "angrt;": '\U0000221F',
630 "angrtvb;": '\U000022BE',
631 "angrtvbd;": '\U0000299D',
632 "angsph;": '\U00002222',
633 "angst;": '\U000000C5',
634 "angzarr;": '\U0000237C',
635 "aogon;": '\U00000105',
636 "aopf;": '\U0001D552',
637 "ap;": '\U00002248',
638 "apE;": '\U00002A70',
639 "apacir;": '\U00002A6F',
640 "ape;": '\U0000224A',
641 "apid;": '\U0000224B',
642 "apos;": '\U00000027',
643 "approx;": '\U00002248',
644 "approxeq;": '\U0000224A',
645 "aring;": '\U000000E5',
646 "ascr;": '\U0001D4B6',
647 "ast;": '\U0000002A',
648 "asymp;": '\U00002248',
649 "asympeq;": '\U0000224D',
650 "atilde;": '\U000000E3',
651 "auml;": '\U000000E4',
652 "awconint;": '\U00002233',
653 "awint;": '\U00002A11',
654 "bNot;": '\U00002AED',
655 "backcong;": '\U0000224C',
656 "backepsilon;": '\U000003F6',
657 "backprime;": '\U00002035',
658 "backsim;": '\U0000223D',
659 "backsimeq;": '\U000022CD',
660 "barvee;": '\U000022BD',
661 "barwed;": '\U00002305',
662 "barwedge;": '\U00002305',
663 "bbrk;": '\U000023B5',
664 "bbrktbrk;": '\U000023B6',
665 "bcong;": '\U0000224C',
666 "bcy;": '\U00000431',
667 "bdquo;": '\U0000201E',
668 "becaus;": '\U00002235',
669 "because;": '\U00002235',
670 "bemptyv;": '\U000029B0',
671 "bepsi;": '\U000003F6',
672 "bernou;": '\U0000212C',
673 "beta;": '\U000003B2',
674 "beth;": '\U00002136',
675 "between;": '\U0000226C',
676 "bfr;": '\U0001D51F',
677 "bigcap;": '\U000022C2',
678 "bigcirc;": '\U000025EF',
679 "bigcup;": '\U000022C3',
680 "bigodot;": '\U00002A00',
681 "bigoplus;": '\U00002A01',
682 "bigotimes;": '\U00002A02',
683 "bigsqcup;": '\U00002A06',
684 "bigstar;": '\U00002605',
685 "bigtriangledown;": '\U000025BD',
686 "bigtriangleup;": '\U000025B3',
687 "biguplus;": '\U00002A04',
688 "bigvee;": '\U000022C1',
689 "bigwedge;": '\U000022C0',
690 "bkarow;": '\U0000290D',
691 "blacklozenge;": '\U000029EB',
692 "blacksquare;": '\U000025AA',
693 "blacktriangle;": '\U000025B4',
694 "blacktriangledown;": '\U000025BE',
695 "blacktriangleleft;": '\U000025C2',
696 "blacktriangleright;": '\U000025B8',
697 "blank;": '\U00002423',
698 "blk12;": '\U00002592',
699 "blk14;": '\U00002591',
700 "blk34;": '\U00002593',
701 "block;": '\U00002588',
702 "bnot;": '\U00002310',
703 "bopf;": '\U0001D553',
704 "bot;": '\U000022A5',
705 "bottom;": '\U000022A5',
706 "bowtie;": '\U000022C8',
707 "boxDL;": '\U00002557',
708 "boxDR;": '\U00002554',
709 "boxDl;": '\U00002556',
710 "boxDr;": '\U00002553',
711 "boxH;": '\U00002550',
712 "boxHD;": '\U00002566',
713 "boxHU;": '\U00002569',
714 "boxHd;": '\U00002564',
715 "boxHu;": '\U00002567',
716 "boxUL;": '\U0000255D',
717 "boxUR;": '\U0000255A',
718 "boxUl;": '\U0000255C',
719 "boxUr;": '\U00002559',
720 "boxV;": '\U00002551',
721 "boxVH;": '\U0000256C',
722 "boxVL;": '\U00002563',
723 "boxVR;": '\U00002560',
724 "boxVh;": '\U0000256B',
725 "boxVl;": '\U00002562',
726 "boxVr;": '\U0000255F',
727 "boxbox;": '\U000029C9',
728 "boxdL;": '\U00002555',
729 "boxdR;": '\U00002552',
730 "boxdl;": '\U00002510',
731 "boxdr;": '\U0000250C',
732 "boxh;": '\U00002500',
733 "boxhD;": '\U00002565',
734 "boxhU;": '\U00002568',
735 "boxhd;": '\U0000252C',
736 "boxhu;": '\U00002534',
737 "boxminus;": '\U0000229F',
738 "boxplus;": '\U0000229E',
739 "boxtimes;": '\U000022A0',
740 "boxuL;": '\U0000255B',
741 "boxuR;": '\U00002558',
742 "boxul;": '\U00002518',
743 "boxur;": '\U00002514',
744 "boxv;": '\U00002502',
745 "boxvH;": '\U0000256A',
746 "boxvL;": '\U00002561',
747 "boxvR;": '\U0000255E',
748 "boxvh;": '\U0000253C',
749 "boxvl;": '\U00002524',
750 "boxvr;": '\U0000251C',
751 "bprime;": '\U00002035',
752 "breve;": '\U000002D8',
753 "brvbar;": '\U000000A6',
754 "bscr;": '\U0001D4B7',
755 "bsemi;": '\U0000204F',
756 "bsim;": '\U0000223D',
757 "bsime;": '\U000022CD',
758 "bsol;": '\U0000005C',
759 "bsolb;": '\U000029C5',
760 "bsolhsub;": '\U000027C8',
761 "bull;": '\U00002022',
762 "bullet;": '\U00002022',
763 "bump;": '\U0000224E',
764 "bumpE;": '\U00002AAE',
765 "bumpe;": '\U0000224F',
766 "bumpeq;": '\U0000224F',
767 "cacute;": '\U00000107',
768 "cap;": '\U00002229',
769 "capand;": '\U00002A44',
770 "capbrcup;": '\U00002A49',
771 "capcap;": '\U00002A4B',
772 "capcup;": '\U00002A47',
773 "capdot;": '\U00002A40',
774 "caret;": '\U00002041',
775 "caron;": '\U000002C7',
776 "ccaps;": '\U00002A4D',
777 "ccaron;": '\U0000010D',
778 "ccedil;": '\U000000E7',
779 "ccirc;": '\U00000109',
780 "ccups;": '\U00002A4C',
781 "ccupssm;": '\U00002A50',
782 "cdot;": '\U0000010B',
783 "cedil;": '\U000000B8',
784 "cemptyv;": '\U000029B2',
785 "cent;": '\U000000A2',
786 "centerdot;": '\U000000B7',
787 "cfr;": '\U0001D520',
788 "chcy;": '\U00000447',
789 "check;": '\U00002713',
790 "checkmark;": '\U00002713',
791 "chi;": '\U000003C7',
792 "cir;": '\U000025CB',
793 "cirE;": '\U000029C3',
794 "circ;": '\U000002C6',
795 "circeq;": '\U00002257',
796 "circlearrowleft;": '\U000021BA',
797 "circlearrowright;": '\U000021BB',
798 "circledR;": '\U000000AE',
799 "circledS;": '\U000024C8',
800 "circledast;": '\U0000229B',
801 "circledcirc;": '\U0000229A',
802 "circleddash;": '\U0000229D',
803 "cire;": '\U00002257',
804 "cirfnint;": '\U00002A10',
805 "cirmid;": '\U00002AEF',
806 "cirscir;": '\U000029C2',
807 "clubs;": '\U00002663',
808 "clubsuit;": '\U00002663',
809 "colon;": '\U0000003A',
810 "colone;": '\U00002254',
811 "coloneq;": '\U00002254',
812 "comma;": '\U0000002C',
813 "commat;": '\U00000040',
814 "comp;": '\U00002201',
815 "compfn;": '\U00002218',
816 "complement;": '\U00002201',
817 "complexes;": '\U00002102',
818 "cong;": '\U00002245',
819 "congdot;": '\U00002A6D',
820 "conint;": '\U0000222E',
821 "copf;": '\U0001D554',
822 "coprod;": '\U00002210',
823 "copy;": '\U000000A9',
824 "copysr;": '\U00002117',
825 "crarr;": '\U000021B5',
826 "cross;": '\U00002717',
827 "cscr;": '\U0001D4B8',
828 "csub;": '\U00002ACF',
829 "csube;": '\U00002AD1',
830 "csup;": '\U00002AD0',
831 "csupe;": '\U00002AD2',
832 "ctdot;": '\U000022EF',
833 "cudarrl;": '\U00002938',
834 "cudarrr;": '\U00002935',
835 "cuepr;": '\U000022DE',
836 "cuesc;": '\U000022DF',
837 "cularr;": '\U000021B6',
838 "cularrp;": '\U0000293D',
839 "cup;": '\U0000222A',
840 "cupbrcap;": '\U00002A48',
841 "cupcap;": '\U00002A46',
842 "cupcup;": '\U00002A4A',
843 "cupdot;": '\U0000228D',
844 "cupor;": '\U00002A45',
845 "curarr;": '\U000021B7',
846 "curarrm;": '\U0000293C',
847 "curlyeqprec;": '\U000022DE',
848 "curlyeqsucc;": '\U000022DF',
849 "curlyvee;": '\U000022CE',
850 "curlywedge;": '\U000022CF',
851 "curren;": '\U000000A4',
852 "curvearrowleft;": '\U000021B6',
853 "curvearrowright;": '\U000021B7',
854 "cuvee;": '\U000022CE',
855 "cuwed;": '\U000022CF',
856 "cwconint;": '\U00002232',
857 "cwint;": '\U00002231',
858 "cylcty;": '\U0000232D',
859 "dArr;": '\U000021D3',
860 "dHar;": '\U00002965',
861 "dagger;": '\U00002020',
862 "daleth;": '\U00002138',
863 "darr;": '\U00002193',
864 "dash;": '\U00002010',
865 "dashv;": '\U000022A3',
866 "dbkarow;": '\U0000290F',
867 "dblac;": '\U000002DD',
868 "dcaron;": '\U0000010F',
869 "dcy;": '\U00000434',
870 "dd;": '\U00002146',
871 "ddagger;": '\U00002021',
872 "ddarr;": '\U000021CA',
873 "ddotseq;": '\U00002A77',
874 "deg;": '\U000000B0',
875 "delta;": '\U000003B4',
876 "demptyv;": '\U000029B1',
877 "dfisht;": '\U0000297F',
878 "dfr;": '\U0001D521',
879 "dharl;": '\U000021C3',
880 "dharr;": '\U000021C2',
881 "diam;": '\U000022C4',
882 "diamond;": '\U000022C4',
883 "diamondsuit;": '\U00002666',
884 "diams;": '\U00002666',
885 "die;": '\U000000A8',
886 "digamma;": '\U000003DD',
887 "disin;": '\U000022F2',
888 "div;": '\U000000F7',
889 "divide;": '\U000000F7',
890 "divideontimes;": '\U000022C7',
891 "divonx;": '\U000022C7',
892 "djcy;": '\U00000452',
893 "dlcorn;": '\U0000231E',
894 "dlcrop;": '\U0000230D',
895 "dollar;": '\U00000024',
896 "dopf;": '\U0001D555',
897 "dot;": '\U000002D9',
898 "doteq;": '\U00002250',
899 "doteqdot;": '\U00002251',
900 "dotminus;": '\U00002238',
901 "dotplus;": '\U00002214',
902 "dotsquare;": '\U000022A1',
903 "doublebarwedge;": '\U00002306',
904 "downarrow;": '\U00002193',
905 "downdownarrows;": '\U000021CA',
906 "downharpoonleft;": '\U000021C3',
907 "downharpoonright;": '\U000021C2',
908 "drbkarow;": '\U00002910',
909 "drcorn;": '\U0000231F',
910 "drcrop;": '\U0000230C',
911 "dscr;": '\U0001D4B9',
912 "dscy;": '\U00000455',
913 "dsol;": '\U000029F6',
914 "dstrok;": '\U00000111',
915 "dtdot;": '\U000022F1',
916 "dtri;": '\U000025BF',
917 "dtrif;": '\U000025BE',
918 "duarr;": '\U000021F5',
919 "duhar;": '\U0000296F',
920 "dwangle;": '\U000029A6',
921 "dzcy;": '\U0000045F',
922 "dzigrarr;": '\U000027FF',
923 "eDDot;": '\U00002A77',
924 "eDot;": '\U00002251',
925 "eacute;": '\U000000E9',
926 "easter;": '\U00002A6E',
927 "ecaron;": '\U0000011B',
928 "ecir;": '\U00002256',
929 "ecirc;": '\U000000EA',
930 "ecolon;": '\U00002255',
931 "ecy;": '\U0000044D',
932 "edot;": '\U00000117',
933 "ee;": '\U00002147',
934 "efDot;": '\U00002252',
935 "efr;": '\U0001D522',
936 "eg;": '\U00002A9A',
937 "egrave;": '\U000000E8',
938 "egs;": '\U00002A96',
939 "egsdot;": '\U00002A98',
940 "el;": '\U00002A99',
941 "elinters;": '\U000023E7',
942 "ell;": '\U00002113',
943 "els;": '\U00002A95',
944 "elsdot;": '\U00002A97',
945 "emacr;": '\U00000113',
946 "empty;": '\U00002205',
947 "emptyset;": '\U00002205',
948 "emptyv;": '\U00002205',
949 "emsp;": '\U00002003',
950 "emsp13;": '\U00002004',
951 "emsp14;": '\U00002005',
952 "eng;": '\U0000014B',
953 "ensp;": '\U00002002',
954 "eogon;": '\U00000119',
955 "eopf;": '\U0001D556',
956 "epar;": '\U000022D5',
957 "eparsl;": '\U000029E3',
958 "eplus;": '\U00002A71',
959 "epsi;": '\U000003B5',
960 "epsilon;": '\U000003B5',
961 "epsiv;": '\U000003F5',
962 "eqcirc;": '\U00002256',
963 "eqcolon;": '\U00002255',
964 "eqsim;": '\U00002242',
965 "eqslantgtr;": '\U00002A96',
966 "eqslantless;": '\U00002A95',
967 "equals;": '\U0000003D',
968 "equest;": '\U0000225F',
969 "equiv;": '\U00002261',
970 "equivDD;": '\U00002A78',
971 "eqvparsl;": '\U000029E5',
972 "erDot;": '\U00002253',
973 "erarr;": '\U00002971',
974 "escr;": '\U0000212F',
975 "esdot;": '\U00002250',
976 "esim;": '\U00002242',
977 "eta;": '\U000003B7',
978 "eth;": '\U000000F0',
979 "euml;": '\U000000EB',
980 "euro;": '\U000020AC',
981 "excl;": '\U00000021',
982 "exist;": '\U00002203',
983 "expectation;": '\U00002130',
984 "exponentiale;": '\U00002147',
985 "fallingdotseq;": '\U00002252',
986 "fcy;": '\U00000444',
987 "female;": '\U00002640',
988 "ffilig;": '\U0000FB03',
989 "fflig;": '\U0000FB00',
990 "ffllig;": '\U0000FB04',
991 "ffr;": '\U0001D523',
992 "filig;": '\U0000FB01',
993 "flat;": '\U0000266D',
994 "fllig;": '\U0000FB02',
995 "fltns;": '\U000025B1',
996 "fnof;": '\U00000192',
997 "fopf;": '\U0001D557',
998 "forall;": '\U00002200',
999 "fork;": '\U000022D4',
1000 "forkv;": '\U00002AD9',
1001 "fpartint;": '\U00002A0D',
1002 "frac12;": '\U000000BD',
1003 "frac13;": '\U00002153',
1004 "frac14;": '\U000000BC',
1005 "frac15;": '\U00002155',
1006 "frac16;": '\U00002159',
1007 "frac18;": '\U0000215B',
1008 "frac23;": '\U00002154',
1009 "frac25;": '\U00002156',
1010 "frac34;": '\U000000BE',
1011 "frac35;": '\U00002157',
1012 "frac38;": '\U0000215C',
1013 "frac45;": '\U00002158',
1014 "frac56;": '\U0000215A',
1015 "frac58;": '\U0000215D',
1016 "frac78;": '\U0000215E',
1017 "frasl;": '\U00002044',
1018 "frown;": '\U00002322',
1019 "fscr;": '\U0001D4BB',
1020 "gE;": '\U00002267',
1021 "gEl;": '\U00002A8C',
1022 "gacute;": '\U000001F5',
1023 "gamma;": '\U000003B3',
1024 "gammad;": '\U000003DD',
1025 "gap;": '\U00002A86',
1026 "gbreve;": '\U0000011F',
1027 "gcirc;": '\U0000011D',
1028 "gcy;": '\U00000433',
1029 "gdot;": '\U00000121',
1030 "ge;": '\U00002265',
1031 "gel;": '\U000022DB',
1032 "geq;": '\U00002265',
1033 "geqq;": '\U00002267',
1034 "geqslant;": '\U00002A7E',
1035 "ges;": '\U00002A7E',
1036 "gescc;": '\U00002AA9',
1037 "gesdot;": '\U00002A80',
1038 "gesdoto;": '\U00002A82',
1039 "gesdotol;": '\U00002A84',
1040 "gesles;": '\U00002A94',
1041 "gfr;": '\U0001D524',
1042 "gg;": '\U0000226B',
1043 "ggg;": '\U000022D9',
1044 "gimel;": '\U00002137',
1045 "gjcy;": '\U00000453',
1046 "gl;": '\U00002277',
1047 "glE;": '\U00002A92',
1048 "gla;": '\U00002AA5',
1049 "glj;": '\U00002AA4',
1050 "gnE;": '\U00002269',
1051 "gnap;": '\U00002A8A',
1052 "gnapprox;": '\U00002A8A',
1053 "gne;": '\U00002A88',
1054 "gneq;": '\U00002A88',
1055 "gneqq;": '\U00002269',
1056 "gnsim;": '\U000022E7',
1057 "gopf;": '\U0001D558',
1058 "grave;": '\U00000060',
1059 "gscr;": '\U0000210A',
1060 "gsim;": '\U00002273',
1061 "gsime;": '\U00002A8E',
1062 "gsiml;": '\U00002A90',
1063 "gt;": '\U0000003E',
1064 "gtcc;": '\U00002AA7',
1065 "gtcir;": '\U00002A7A',
1066 "gtdot;": '\U000022D7',
1067 "gtlPar;": '\U00002995',
1068 "gtquest;": '\U00002A7C',
1069 "gtrapprox;": '\U00002A86',
1070 "gtrarr;": '\U00002978',
1071 "gtrdot;": '\U000022D7',
1072 "gtreqless;": '\U000022DB',
1073 "gtreqqless;": '\U00002A8C',
1074 "gtrless;": '\U00002277',
1075 "gtrsim;": '\U00002273',
1076 "hArr;": '\U000021D4',
1077 "hairsp;": '\U0000200A',
1078 "half;": '\U000000BD',
1079 "hamilt;": '\U0000210B',
1080 "hardcy;": '\U0000044A',
1081 "harr;": '\U00002194',
1082 "harrcir;": '\U00002948',
1083 "harrw;": '\U000021AD',
1084 "hbar;": '\U0000210F',
1085 "hcirc;": '\U00000125',
1086 "hearts;": '\U00002665',
1087 "heartsuit;": '\U00002665',
1088 "hellip;": '\U00002026',
1089 "hercon;": '\U000022B9',
1090 "hfr;": '\U0001D525',
1091 "hksearow;": '\U00002925',
1092 "hkswarow;": '\U00002926',
1093 "hoarr;": '\U000021FF',
1094 "homtht;": '\U0000223B',
1095 "hookleftarrow;": '\U000021A9',
1096 "hookrightarrow;": '\U000021AA',
1097 "hopf;": '\U0001D559',
1098 "horbar;": '\U00002015',
1099 "hscr;": '\U0001D4BD',
1100 "hslash;": '\U0000210F',
1101 "hstrok;": '\U00000127',
1102 "hybull;": '\U00002043',
1103 "hyphen;": '\U00002010',
1104 "iacute;": '\U000000ED',
1105 "ic;": '\U00002063',
1106 "icirc;": '\U000000EE',
1107 "icy;": '\U00000438',
1108 "iecy;": '\U00000435',
1109 "iexcl;": '\U000000A1',
1110 "iff;": '\U000021D4',
1111 "ifr;": '\U0001D526',
1112 "igrave;": '\U000000EC',
1113 "ii;": '\U00002148',
1114 "iiiint;": '\U00002A0C',
1115 "iiint;": '\U0000222D',
1116 "iinfin;": '\U000029DC',
1117 "iiota;": '\U00002129',
1118 "ijlig;": '\U00000133',
1119 "imacr;": '\U0000012B',
1120 "image;": '\U00002111',
1121 "imagline;": '\U00002110',
1122 "imagpart;": '\U00002111',
1123 "imath;": '\U00000131',
1124 "imof;": '\U000022B7',
1125 "imped;": '\U000001B5',
1126 "in;": '\U00002208',
1127 "incare;": '\U00002105',
1128 "infin;": '\U0000221E',
1129 "infintie;": '\U000029DD',
1130 "inodot;": '\U00000131',
1131 "int;": '\U0000222B',
1132 "intcal;": '\U000022BA',
1133 "integers;": '\U00002124',
1134 "intercal;": '\U000022BA',
1135 "intlarhk;": '\U00002A17',
1136 "intprod;": '\U00002A3C',
1137 "iocy;": '\U00000451',
1138 "iogon;": '\U0000012F',
1139 "iopf;": '\U0001D55A',
1140 "iota;": '\U000003B9',
1141 "iprod;": '\U00002A3C',
1142 "iquest;": '\U000000BF',
1143 "iscr;": '\U0001D4BE',
1144 "isin;": '\U00002208',
1145 "isinE;": '\U000022F9',
1146 "isindot;": '\U000022F5',
1147 "isins;": '\U000022F4',
1148 "isinsv;": '\U000022F3',
1149 "isinv;": '\U00002208',
1150 "it;": '\U00002062',
1151 "itilde;": '\U00000129',
1152 "iukcy;": '\U00000456',
1153 "iuml;": '\U000000EF',
1154 "jcirc;": '\U00000135',
1155 "jcy;": '\U00000439',
1156 "jfr;": '\U0001D527',
1157 "jmath;": '\U00000237',
1158 "jopf;": '\U0001D55B',
1159 "jscr;": '\U0001D4BF',
1160 "jsercy;": '\U00000458',
1161 "jukcy;": '\U00000454',
1162 "kappa;": '\U000003BA',
1163 "kappav;": '\U000003F0',
1164 "kcedil;": '\U00000137',
1165 "kcy;": '\U0000043A',
1166 "kfr;": '\U0001D528',
1167 "kgreen;": '\U00000138',
1168 "khcy;": '\U00000445',
1169 "kjcy;": '\U0000045C',
1170 "kopf;": '\U0001D55C',
1171 "kscr;": '\U0001D4C0',
1172 "lAarr;": '\U000021DA',
1173 "lArr;": '\U000021D0',
1174 "lAtail;": '\U0000291B',
1175 "lBarr;": '\U0000290E',
1176 "lE;": '\U00002266',
1177 "lEg;": '\U00002A8B',
1178 "lHar;": '\U00002962',
1179 "lacute;": '\U0000013A',
1180 "laemptyv;": '\U000029B4',
1181 "lagran;": '\U00002112',
1182 "lambda;": '\U000003BB',
1183 "lang;": '\U000027E8',
1184 "langd;": '\U00002991',
1185 "langle;": '\U000027E8',
1186 "lap;": '\U00002A85',
1187 "laquo;": '\U000000AB',
1188 "larr;": '\U00002190',
1189 "larrb;": '\U000021E4',
1190 "larrbfs;": '\U0000291F',
1191 "larrfs;": '\U0000291D',
1192 "larrhk;": '\U000021A9',
1193 "larrlp;": '\U000021AB',
1194 "larrpl;": '\U00002939',
1195 "larrsim;": '\U00002973',
1196 "larrtl;": '\U000021A2',
1197 "lat;": '\U00002AAB',
1198 "latail;": '\U00002919',
1199 "late;": '\U00002AAD',
1200 "lbarr;": '\U0000290C',
1201 "lbbrk;": '\U00002772',
1202 "lbrace;": '\U0000007B',
1203 "lbrack;": '\U0000005B',
1204 "lbrke;": '\U0000298B',
1205 "lbrksld;": '\U0000298F',
1206 "lbrkslu;": '\U0000298D',
1207 "lcaron;": '\U0000013E',
1208 "lcedil;": '\U0000013C',
1209 "lceil;": '\U00002308',
1210 "lcub;": '\U0000007B',
1211 "lcy;": '\U0000043B',
1212 "ldca;": '\U00002936',
1213 "ldquo;": '\U0000201C',
1214 "ldquor;": '\U0000201E',
1215 "ldrdhar;": '\U00002967',
1216 "ldrushar;": '\U0000294B',
1217 "ldsh;": '\U000021B2',
1218 "le;": '\U00002264',
1219 "leftarrow;": '\U00002190',
1220 "leftarrowtail;": '\U000021A2',
1221 "leftharpoondown;": '\U000021BD',
1222 "leftharpoonup;": '\U000021BC',
1223 "leftleftarrows;": '\U000021C7',
1224 "leftrightarrow;": '\U00002194',
1225 "leftrightarrows;": '\U000021C6',
1226 "leftrightharpoons;": '\U000021CB',
1227 "leftrightsquigarrow;": '\U000021AD',
1228 "leftthreetimes;": '\U000022CB',
1229 "leg;": '\U000022DA',
1230 "leq;": '\U00002264',
1231 "leqq;": '\U00002266',
1232 "leqslant;": '\U00002A7D',
1233 "les;": '\U00002A7D',
1234 "lescc;": '\U00002AA8',
1235 "lesdot;": '\U00002A7F',
1236 "lesdoto;": '\U00002A81',
1237 "lesdotor;": '\U00002A83',
1238 "lesges;": '\U00002A93',
1239 "lessapprox;": '\U00002A85',
1240 "lessdot;": '\U000022D6',
1241 "lesseqgtr;": '\U000022DA',
1242 "lesseqqgtr;": '\U00002A8B',
1243 "lessgtr;": '\U00002276',
1244 "lesssim;": '\U00002272',
1245 "lfisht;": '\U0000297C',
1246 "lfloor;": '\U0000230A',
1247 "lfr;": '\U0001D529',
1248 "lg;": '\U00002276',
1249 "lgE;": '\U00002A91',
1250 "lhard;": '\U000021BD',
1251 "lharu;": '\U000021BC',
1252 "lharul;": '\U0000296A',
1253 "lhblk;": '\U00002584',
1254 "ljcy;": '\U00000459',
1255 "ll;": '\U0000226A',
1256 "llarr;": '\U000021C7',
1257 "llcorner;": '\U0000231E',
1258 "llhard;": '\U0000296B',
1259 "lltri;": '\U000025FA',
1260 "lmidot;": '\U00000140',
1261 "lmoust;": '\U000023B0',
1262 "lmoustache;": '\U000023B0',
1263 "lnE;": '\U00002268',
1264 "lnap;": '\U00002A89',
1265 "lnapprox;": '\U00002A89',
1266 "lne;": '\U00002A87',
1267 "lneq;": '\U00002A87',
1268 "lneqq;": '\U00002268',
1269 "lnsim;": '\U000022E6',
1270 "loang;": '\U000027EC',
1271 "loarr;": '\U000021FD',
1272 "lobrk;": '\U000027E6',
1273 "longleftarrow;": '\U000027F5',
1274 "longleftrightarrow;": '\U000027F7',
1275 "longmapsto;": '\U000027FC',
1276 "longrightarrow;": '\U000027F6',
1277 "looparrowleft;": '\U000021AB',
1278 "looparrowright;": '\U000021AC',
1279 "lopar;": '\U00002985',
1280 "lopf;": '\U0001D55D',
1281 "loplus;": '\U00002A2D',
1282 "lotimes;": '\U00002A34',
1283 "lowast;": '\U00002217',
1284 "lowbar;": '\U0000005F',
1285 "loz;": '\U000025CA',
1286 "lozenge;": '\U000025CA',
1287 "lozf;": '\U000029EB',
1288 "lpar;": '\U00000028',
1289 "lparlt;": '\U00002993',
1290 "lrarr;": '\U000021C6',
1291 "lrcorner;": '\U0000231F',
1292 "lrhar;": '\U000021CB',
1293 "lrhard;": '\U0000296D',
1294 "lrm;": '\U0000200E',
1295 "lrtri;": '\U000022BF',
1296 "lsaquo;": '\U00002039',
1297 "lscr;": '\U0001D4C1',
1298 "lsh;": '\U000021B0',
1299 "lsim;": '\U00002272',
1300 "lsime;": '\U00002A8D',
1301 "lsimg;": '\U00002A8F',
1302 "lsqb;": '\U0000005B',
1303 "lsquo;": '\U00002018',
1304 "lsquor;": '\U0000201A',
1305 "lstrok;": '\U00000142',
1306 "lt;": '\U0000003C',
1307 "ltcc;": '\U00002AA6',
1308 "ltcir;": '\U00002A79',
1309 "ltdot;": '\U000022D6',
1310 "lthree;": '\U000022CB',
1311 "ltimes;": '\U000022C9',
1312 "ltlarr;": '\U00002976',
1313 "ltquest;": '\U00002A7B',
1314 "ltrPar;": '\U00002996',
1315 "ltri;": '\U000025C3',
1316 "ltrie;": '\U000022B4',
1317 "ltrif;": '\U000025C2',
1318 "lurdshar;": '\U0000294A',
1319 "luruhar;": '\U00002966',
1320 "mDDot;": '\U0000223A',
1321 "macr;": '\U000000AF',
1322 "male;": '\U00002642',
1323 "malt;": '\U00002720',
1324 "maltese;": '\U00002720',
1325 "map;": '\U000021A6',
1326 "mapsto;": '\U000021A6',
1327 "mapstodown;": '\U000021A7',
1328 "mapstoleft;": '\U000021A4',
1329 "mapstoup;": '\U000021A5',
1330 "marker;": '\U000025AE',
1331 "mcomma;": '\U00002A29',
1332 "mcy;": '\U0000043C',
1333 "mdash;": '\U00002014',
1334 "measuredangle;": '\U00002221',
1335 "mfr;": '\U0001D52A',
1336 "mho;": '\U00002127',
1337 "micro;": '\U000000B5',
1338 "mid;": '\U00002223',
1339 "midast;": '\U0000002A',
1340 "midcir;": '\U00002AF0',
1341 "middot;": '\U000000B7',
1342 "minus;": '\U00002212',
1343 "minusb;": '\U0000229F',
1344 "minusd;": '\U00002238',
1345 "minusdu;": '\U00002A2A',
1346 "mlcp;": '\U00002ADB',
1347 "mldr;": '\U00002026',
1348 "mnplus;": '\U00002213',
1349 "models;": '\U000022A7',
1350 "mopf;": '\U0001D55E',
1351 "mp;": '\U00002213',
1352 "mscr;": '\U0001D4C2',
1353 "mstpos;": '\U0000223E',
1354 "mu;": '\U000003BC',
1355 "multimap;": '\U000022B8',
1356 "mumap;": '\U000022B8',
1357 "nLeftarrow;": '\U000021CD',
1358 "nLeftrightarrow;": '\U000021CE',
1359 "nRightarrow;": '\U000021CF',
1360 "nVDash;": '\U000022AF',
1361 "nVdash;": '\U000022AE',
1362 "nabla;": '\U00002207',
1363 "nacute;": '\U00000144',
1364 "nap;": '\U00002249',
1365 "napos;": '\U00000149',
1366 "napprox;": '\U00002249',
1367 "natur;": '\U0000266E',
1368 "natural;": '\U0000266E',
1369 "naturals;": '\U00002115',
1370 "nbsp;": '\U000000A0',
1371 "ncap;": '\U00002A43',
1372 "ncaron;": '\U00000148',
1373 "ncedil;": '\U00000146',
1374 "ncong;": '\U00002247',
1375 "ncup;": '\U00002A42',
1376 "ncy;": '\U0000043D',
1377 "ndash;": '\U00002013',
1378 "ne;": '\U00002260',
1379 "neArr;": '\U000021D7',
1380 "nearhk;": '\U00002924',
1381 "nearr;": '\U00002197',
1382 "nearrow;": '\U00002197',
1383 "nequiv;": '\U00002262',
1384 "nesear;": '\U00002928',
1385 "nexist;": '\U00002204',
1386 "nexists;": '\U00002204',
1387 "nfr;": '\U0001D52B',
1388 "nge;": '\U00002271',
1389 "ngeq;": '\U00002271',
1390 "ngsim;": '\U00002275',
1391 "ngt;": '\U0000226F',
1392 "ngtr;": '\U0000226F',
1393 "nhArr;": '\U000021CE',
1394 "nharr;": '\U000021AE',
1395 "nhpar;": '\U00002AF2',
1396 "ni;": '\U0000220B',
1397 "nis;": '\U000022FC',
1398 "nisd;": '\U000022FA',
1399 "niv;": '\U0000220B',
1400 "njcy;": '\U0000045A',
1401 "nlArr;": '\U000021CD',
1402 "nlarr;": '\U0000219A',
1403 "nldr;": '\U00002025',
1404 "nle;": '\U00002270',
1405 "nleftarrow;": '\U0000219A',
1406 "nleftrightarrow;": '\U000021AE',
1407 "nleq;": '\U00002270',
1408 "nless;": '\U0000226E',
1409 "nlsim;": '\U00002274',
1410 "nlt;": '\U0000226E',
1411 "nltri;": '\U000022EA',
1412 "nltrie;": '\U000022EC',
1413 "nmid;": '\U00002224',
1414 "nopf;": '\U0001D55F',
1415 "not;": '\U000000AC',
1416 "notin;": '\U00002209',
1417 "notinva;": '\U00002209',
1418 "notinvb;": '\U000022F7',
1419 "notinvc;": '\U000022F6',
1420 "notni;": '\U0000220C',
1421 "notniva;": '\U0000220C',
1422 "notnivb;": '\U000022FE',
1423 "notnivc;": '\U000022FD',
1424 "npar;": '\U00002226',
1425 "nparallel;": '\U00002226',
1426 "npolint;": '\U00002A14',
1427 "npr;": '\U00002280',
1428 "nprcue;": '\U000022E0',
1429 "nprec;": '\U00002280',
1430 "nrArr;": '\U000021CF',
1431 "nrarr;": '\U0000219B',
1432 "nrightarrow;": '\U0000219B',
1433 "nrtri;": '\U000022EB',
1434 "nrtrie;": '\U000022ED',
1435 "nsc;": '\U00002281',
1436 "nsccue;": '\U000022E1',
1437 "nscr;": '\U0001D4C3',
1438 "nshortmid;": '\U00002224',
1439 "nshortparallel;": '\U00002226',
1440 "nsim;": '\U00002241',
1441 "nsime;": '\U00002244',
1442 "nsimeq;": '\U00002244',
1443 "nsmid;": '\U00002224',
1444 "nspar;": '\U00002226',
1445 "nsqsube;": '\U000022E2',
1446 "nsqsupe;": '\U000022E3',
1447 "nsub;": '\U00002284',
1448 "nsube;": '\U00002288',
1449 "nsubseteq;": '\U00002288',
1450 "nsucc;": '\U00002281',
1451 "nsup;": '\U00002285',
1452 "nsupe;": '\U00002289',
1453 "nsupseteq;": '\U00002289',
1454 "ntgl;": '\U00002279',
1455 "ntilde;": '\U000000F1',
1456 "ntlg;": '\U00002278',
1457 "ntriangleleft;": '\U000022EA',
1458 "ntrianglelefteq;": '\U000022EC',
1459 "ntriangleright;": '\U000022EB',
1460 "ntrianglerighteq;": '\U000022ED',
1461 "nu;": '\U000003BD',
1462 "num;": '\U00000023',
1463 "numero;": '\U00002116',
1464 "numsp;": '\U00002007',
1465 "nvDash;": '\U000022AD',
1466 "nvHarr;": '\U00002904',
1467 "nvdash;": '\U000022AC',
1468 "nvinfin;": '\U000029DE',
1469 "nvlArr;": '\U00002902',
1470 "nvrArr;": '\U00002903',
1471 "nwArr;": '\U000021D6',
1472 "nwarhk;": '\U00002923',
1473 "nwarr;": '\U00002196',
1474 "nwarrow;": '\U00002196',
1475 "nwnear;": '\U00002927',
1476 "oS;": '\U000024C8',
1477 "oacute;": '\U000000F3',
1478 "oast;": '\U0000229B',
1479 "ocir;": '\U0000229A',
1480 "ocirc;": '\U000000F4',
1481 "ocy;": '\U0000043E',
1482 "odash;": '\U0000229D',
1483 "odblac;": '\U00000151',
1484 "odiv;": '\U00002A38',
1485 "odot;": '\U00002299',
1486 "odsold;": '\U000029BC',
1487 "oelig;": '\U00000153',
1488 "ofcir;": '\U000029BF',
1489 "ofr;": '\U0001D52C',
1490 "ogon;": '\U000002DB',
1491 "ograve;": '\U000000F2',
1492 "ogt;": '\U000029C1',
1493 "ohbar;": '\U000029B5',
1494 "ohm;": '\U000003A9',
1495 "oint;": '\U0000222E',
1496 "olarr;": '\U000021BA',
1497 "olcir;": '\U000029BE',
1498 "olcross;": '\U000029BB',
1499 "oline;": '\U0000203E',
1500 "olt;": '\U000029C0',
1501 "omacr;": '\U0000014D',
1502 "omega;": '\U000003C9',
1503 "omicron;": '\U000003BF',
1504 "omid;": '\U000029B6',
1505 "ominus;": '\U00002296',
1506 "oopf;": '\U0001D560',
1507 "opar;": '\U000029B7',
1508 "operp;": '\U000029B9',
1509 "oplus;": '\U00002295',
1510 "or;": '\U00002228',
1511 "orarr;": '\U000021BB',
1512 "ord;": '\U00002A5D',
1513 "order;": '\U00002134',
1514 "orderof;": '\U00002134',
1515 "ordf;": '\U000000AA',
1516 "ordm;": '\U000000BA',
1517 "origof;": '\U000022B6',
1518 "oror;": '\U00002A56',
1519 "orslope;": '\U00002A57',
1520 "orv;": '\U00002A5B',
1521 "oscr;": '\U00002134',
1522 "oslash;": '\U000000F8',
1523 "osol;": '\U00002298',
1524 "otilde;": '\U000000F5',
1525 "otimes;": '\U00002297',
1526 "otimesas;": '\U00002A36',
1527 "ouml;": '\U000000F6',
1528 "ovbar;": '\U0000233D',
1529 "par;": '\U00002225',
1530 "para;": '\U000000B6',
1531 "parallel;": '\U00002225',
1532 "parsim;": '\U00002AF3',
1533 "parsl;": '\U00002AFD',
1534 "part;": '\U00002202',
1535 "pcy;": '\U0000043F',
1536 "percnt;": '\U00000025',
1537 "period;": '\U0000002E',
1538 "permil;": '\U00002030',
1539 "perp;": '\U000022A5',
1540 "pertenk;": '\U00002031',
1541 "pfr;": '\U0001D52D',
1542 "phi;": '\U000003C6',
1543 "phiv;": '\U000003D5',
1544 "phmmat;": '\U00002133',
1545 "phone;": '\U0000260E',
1546 "pi;": '\U000003C0',
1547 "pitchfork;": '\U000022D4',
1548 "piv;": '\U000003D6',
1549 "planck;": '\U0000210F',
1550 "planckh;": '\U0000210E',
1551 "plankv;": '\U0000210F',
1552 "plus;": '\U0000002B',
1553 "plusacir;": '\U00002A23',
1554 "plusb;": '\U0000229E',
1555 "pluscir;": '\U00002A22',
1556 "plusdo;": '\U00002214',
1557 "plusdu;": '\U00002A25',
1558 "pluse;": '\U00002A72',
1559 "plusmn;": '\U000000B1',
1560 "plussim;": '\U00002A26',
1561 "plustwo;": '\U00002A27',
1562 "pm;": '\U000000B1',
1563 "pointint;": '\U00002A15',
1564 "popf;": '\U0001D561',
1565 "pound;": '\U000000A3',
1566 "pr;": '\U0000227A',
1567 "prE;": '\U00002AB3',
1568 "prap;": '\U00002AB7',
1569 "prcue;": '\U0000227C',
1570 "pre;": '\U00002AAF',
1571 "prec;": '\U0000227A',
1572 "precapprox;": '\U00002AB7',
1573 "preccurlyeq;": '\U0000227C',
1574 "preceq;": '\U00002AAF',
1575 "precnapprox;": '\U00002AB9',
1576 "precneqq;": '\U00002AB5',
1577 "precnsim;": '\U000022E8',
1578 "precsim;": '\U0000227E',
1579 "prime;": '\U00002032',
1580 "primes;": '\U00002119',
1581 "prnE;": '\U00002AB5',
1582 "prnap;": '\U00002AB9',
1583 "prnsim;": '\U000022E8',
1584 "prod;": '\U0000220F',
1585 "profalar;": '\U0000232E',
1586 "profline;": '\U00002312',
1587 "profsurf;": '\U00002313',
1588 "prop;": '\U0000221D',
1589 "propto;": '\U0000221D',
1590 "prsim;": '\U0000227E',
1591 "prurel;": '\U000022B0',
1592 "pscr;": '\U0001D4C5',
1593 "psi;": '\U000003C8',
1594 "puncsp;": '\U00002008',
1595 "qfr;": '\U0001D52E',
1596 "qint;": '\U00002A0C',
1597 "qopf;": '\U0001D562',
1598 "qprime;": '\U00002057',
1599 "qscr;": '\U0001D4C6',
1600 "quaternions;": '\U0000210D',
1601 "quatint;": '\U00002A16',
1602 "quest;": '\U0000003F',
1603 "questeq;": '\U0000225F',
1604 "quot;": '\U00000022',
1605 "rAarr;": '\U000021DB',
1606 "rArr;": '\U000021D2',
1607 "rAtail;": '\U0000291C',
1608 "rBarr;": '\U0000290F',
1609 "rHar;": '\U00002964',
1610 "racute;": '\U00000155',
1611 "radic;": '\U0000221A',
1612 "raemptyv;": '\U000029B3',
1613 "rang;": '\U000027E9',
1614 "rangd;": '\U00002992',
1615 "range;": '\U000029A5',
1616 "rangle;": '\U000027E9',
1617 "raquo;": '\U000000BB',
1618 "rarr;": '\U00002192',
1619 "rarrap;": '\U00002975',
1620 "rarrb;": '\U000021E5',
1621 "rarrbfs;": '\U00002920',
1622 "rarrc;": '\U00002933',
1623 "rarrfs;": '\U0000291E',
1624 "rarrhk;": '\U000021AA',
1625 "rarrlp;": '\U000021AC',
1626 "rarrpl;": '\U00002945',
1627 "rarrsim;": '\U00002974',
1628 "rarrtl;": '\U000021A3',
1629 "rarrw;": '\U0000219D',
1630 "ratail;": '\U0000291A',
1631 "ratio;": '\U00002236',
1632 "rationals;": '\U0000211A',
1633 "rbarr;": '\U0000290D',
1634 "rbbrk;": '\U00002773',
1635 "rbrace;": '\U0000007D',
1636 "rbrack;": '\U0000005D',
1637 "rbrke;": '\U0000298C',
1638 "rbrksld;": '\U0000298E',
1639 "rbrkslu;": '\U00002990',
1640 "rcaron;": '\U00000159',
1641 "rcedil;": '\U00000157',
1642 "rceil;": '\U00002309',
1643 "rcub;": '\U0000007D',
1644 "rcy;": '\U00000440',
1645 "rdca;": '\U00002937',
1646 "rdldhar;": '\U00002969',
1647 "rdquo;": '\U0000201D',
1648 "rdquor;": '\U0000201D',
1649 "rdsh;": '\U000021B3',
1650 "real;": '\U0000211C',
1651 "realine;": '\U0000211B',
1652 "realpart;": '\U0000211C',
1653 "reals;": '\U0000211D',
1654 "rect;": '\U000025AD',
1655 "reg;": '\U000000AE',
1656 "rfisht;": '\U0000297D',
1657 "rfloor;": '\U0000230B',
1658 "rfr;": '\U0001D52F',
1659 "rhard;": '\U000021C1',
1660 "rharu;": '\U000021C0',
1661 "rharul;": '\U0000296C',
1662 "rho;": '\U000003C1',
1663 "rhov;": '\U000003F1',
1664 "rightarrow;": '\U00002192',
1665 "rightarrowtail;": '\U000021A3',
1666 "rightharpoondown;": '\U000021C1',
1667 "rightharpoonup;": '\U000021C0',
1668 "rightleftarrows;": '\U000021C4',
1669 "rightleftharpoons;": '\U000021CC',
1670 "rightrightarrows;": '\U000021C9',
1671 "rightsquigarrow;": '\U0000219D',
1672 "rightthreetimes;": '\U000022CC',
1673 "ring;": '\U000002DA',
1674 "risingdotseq;": '\U00002253',
1675 "rlarr;": '\U000021C4',
1676 "rlhar;": '\U000021CC',
1677 "rlm;": '\U0000200F',
1678 "rmoust;": '\U000023B1',
1679 "rmoustache;": '\U000023B1',
1680 "rnmid;": '\U00002AEE',
1681 "roang;": '\U000027ED',
1682 "roarr;": '\U000021FE',
1683 "robrk;": '\U000027E7',
1684 "ropar;": '\U00002986',
1685 "ropf;": '\U0001D563',
1686 "roplus;": '\U00002A2E',
1687 "rotimes;": '\U00002A35',
1688 "rpar;": '\U00000029',
1689 "rpargt;": '\U00002994',
1690 "rppolint;": '\U00002A12',
1691 "rrarr;": '\U000021C9',
1692 "rsaquo;": '\U0000203A',
1693 "rscr;": '\U0001D4C7',
1694 "rsh;": '\U000021B1',
1695 "rsqb;": '\U0000005D',
1696 "rsquo;": '\U00002019',
1697 "rsquor;": '\U00002019',
1698 "rthree;": '\U000022CC',
1699 "rtimes;": '\U000022CA',
1700 "rtri;": '\U000025B9',
1701 "rtrie;": '\U000022B5',
1702 "rtrif;": '\U000025B8',
1703 "rtriltri;": '\U000029CE',
1704 "ruluhar;": '\U00002968',
1705 "rx;": '\U0000211E',
1706 "sacute;": '\U0000015B',
1707 "sbquo;": '\U0000201A',
1708 "sc;": '\U0000227B',
1709 "scE;": '\U00002AB4',
1710 "scap;": '\U00002AB8',
1711 "scaron;": '\U00000161',
1712 "sccue;": '\U0000227D',
1713 "sce;": '\U00002AB0',
1714 "scedil;": '\U0000015F',
1715 "scirc;": '\U0000015D',
1716 "scnE;": '\U00002AB6',
1717 "scnap;": '\U00002ABA',
1718 "scnsim;": '\U000022E9',
1719 "scpolint;": '\U00002A13',
1720 "scsim;": '\U0000227F',
1721 "scy;": '\U00000441',
1722 "sdot;": '\U000022C5',
1723 "sdotb;": '\U000022A1',
1724 "sdote;": '\U00002A66',
1725 "seArr;": '\U000021D8',
1726 "searhk;": '\U00002925',
1727 "searr;": '\U00002198',
1728 "searrow;": '\U00002198',
1729 "sect;": '\U000000A7',
1730 "semi;": '\U0000003B',
1731 "seswar;": '\U00002929',
1732 "setminus;": '\U00002216',
1733 "setmn;": '\U00002216',
1734 "sext;": '\U00002736',
1735 "sfr;": '\U0001D530',
1736 "sfrown;": '\U00002322',
1737 "sharp;": '\U0000266F',
1738 "shchcy;": '\U00000449',
1739 "shcy;": '\U00000448',
1740 "shortmid;": '\U00002223',
1741 "shortparallel;": '\U00002225',
1742 "shy;": '\U000000AD',
1743 "sigma;": '\U000003C3',
1744 "sigmaf;": '\U000003C2',
1745 "sigmav;": '\U000003C2',
1746 "sim;": '\U0000223C',
1747 "simdot;": '\U00002A6A',
1748 "sime;": '\U00002243',
1749 "simeq;": '\U00002243',
1750 "simg;": '\U00002A9E',
1751 "simgE;": '\U00002AA0',
1752 "siml;": '\U00002A9D',
1753 "simlE;": '\U00002A9F',
1754 "simne;": '\U00002246',
1755 "simplus;": '\U00002A24',
1756 "simrarr;": '\U00002972',
1757 "slarr;": '\U00002190',
1758 "smallsetminus;": '\U00002216',
1759 "smashp;": '\U00002A33',
1760 "smeparsl;": '\U000029E4',
1761 "smid;": '\U00002223',
1762 "smile;": '\U00002323',
1763 "smt;": '\U00002AAA',
1764 "smte;": '\U00002AAC',
1765 "softcy;": '\U0000044C',
1766 "sol;": '\U0000002F',
1767 "solb;": '\U000029C4',
1768 "solbar;": '\U0000233F',
1769 "sopf;": '\U0001D564',
1770 "spades;": '\U00002660',
1771 "spadesuit;": '\U00002660',
1772 "spar;": '\U00002225',
1773 "sqcap;": '\U00002293',
1774 "sqcup;": '\U00002294',
1775 "sqsub;": '\U0000228F',
1776 "sqsube;": '\U00002291',
1777 "sqsubset;": '\U0000228F',
1778 "sqsubseteq;": '\U00002291',
1779 "sqsup;": '\U00002290',
1780 "sqsupe;": '\U00002292',
1781 "sqsupset;": '\U00002290',
1782 "sqsupseteq;": '\U00002292',
1783 "squ;": '\U000025A1',
1784 "square;": '\U000025A1',
1785 "squarf;": '\U000025AA',
1786 "squf;": '\U000025AA',
1787 "srarr;": '\U00002192',
1788 "sscr;": '\U0001D4C8',
1789 "ssetmn;": '\U00002216',
1790 "ssmile;": '\U00002323',
1791 "sstarf;": '\U000022C6',
1792 "star;": '\U00002606',
1793 "starf;": '\U00002605',
1794 "straightepsilon;": '\U000003F5',
1795 "straightphi;": '\U000003D5',
1796 "strns;": '\U000000AF',
1797 "sub;": '\U00002282',
1798 "subE;": '\U00002AC5',
1799 "subdot;": '\U00002ABD',
1800 "sube;": '\U00002286',
1801 "subedot;": '\U00002AC3',
1802 "submult;": '\U00002AC1',
1803 "subnE;": '\U00002ACB',
1804 "subne;": '\U0000228A',
1805 "subplus;": '\U00002ABF',
1806 "subrarr;": '\U00002979',
1807 "subset;": '\U00002282',
1808 "subseteq;": '\U00002286',
1809 "subseteqq;": '\U00002AC5',
1810 "subsetneq;": '\U0000228A',
1811 "subsetneqq;": '\U00002ACB',
1812 "subsim;": '\U00002AC7',
1813 "subsub;": '\U00002AD5',
1814 "subsup;": '\U00002AD3',
1815 "succ;": '\U0000227B',
1816 "succapprox;": '\U00002AB8',
1817 "succcurlyeq;": '\U0000227D',
1818 "succeq;": '\U00002AB0',
1819 "succnapprox;": '\U00002ABA',
1820 "succneqq;": '\U00002AB6',
1821 "succnsim;": '\U000022E9',
1822 "succsim;": '\U0000227F',
1823 "sum;": '\U00002211',
1824 "sung;": '\U0000266A',
1825 "sup;": '\U00002283',
1826 "sup1;": '\U000000B9',
1827 "sup2;": '\U000000B2',
1828 "sup3;": '\U000000B3',
1829 "supE;": '\U00002AC6',
1830 "supdot;": '\U00002ABE',
1831 "supdsub;": '\U00002AD8',
1832 "supe;": '\U00002287',
1833 "supedot;": '\U00002AC4',
1834 "suphsol;": '\U000027C9',
1835 "suphsub;": '\U00002AD7',
1836 "suplarr;": '\U0000297B',
1837 "supmult;": '\U00002AC2',
1838 "supnE;": '\U00002ACC',
1839 "supne;": '\U0000228B',
1840 "supplus;": '\U00002AC0',
1841 "supset;": '\U00002283',
1842 "supseteq;": '\U00002287',
1843 "supseteqq;": '\U00002AC6',
1844 "supsetneq;": '\U0000228B',
1845 "supsetneqq;": '\U00002ACC',
1846 "supsim;": '\U00002AC8',
1847 "supsub;": '\U00002AD4',
1848 "supsup;": '\U00002AD6',
1849 "swArr;": '\U000021D9',
1850 "swarhk;": '\U00002926',
1851 "swarr;": '\U00002199',
1852 "swarrow;": '\U00002199',
1853 "swnwar;": '\U0000292A',
1854 "szlig;": '\U000000DF',
1855 "target;": '\U00002316',
1856 "tau;": '\U000003C4',
1857 "tbrk;": '\U000023B4',
1858 "tcaron;": '\U00000165',
1859 "tcedil;": '\U00000163',
1860 "tcy;": '\U00000442',
1861 "tdot;": '\U000020DB',
1862 "telrec;": '\U00002315',
1863 "tfr;": '\U0001D531',
1864 "there4;": '\U00002234',
1865 "therefore;": '\U00002234',
1866 "theta;": '\U000003B8',
1867 "thetasym;": '\U000003D1',
1868 "thetav;": '\U000003D1',
1869 "thickapprox;": '\U00002248',
1870 "thicksim;": '\U0000223C',
1871 "thinsp;": '\U00002009',
1872 "thkap;": '\U00002248',
1873 "thksim;": '\U0000223C',
1874 "thorn;": '\U000000FE',
1875 "tilde;": '\U000002DC',
1876 "times;": '\U000000D7',
1877 "timesb;": '\U000022A0',
1878 "timesbar;": '\U00002A31',
1879 "timesd;": '\U00002A30',
1880 "tint;": '\U0000222D',
1881 "toea;": '\U00002928',
1882 "top;": '\U000022A4',
1883 "topbot;": '\U00002336',
1884 "topcir;": '\U00002AF1',
1885 "topf;": '\U0001D565',
1886 "topfork;": '\U00002ADA',
1887 "tosa;": '\U00002929',
1888 "tprime;": '\U00002034',
1889 "trade;": '\U00002122',
1890 "triangle;": '\U000025B5',
1891 "triangledown;": '\U000025BF',
1892 "triangleleft;": '\U000025C3',
1893 "trianglelefteq;": '\U000022B4',
1894 "triangleq;": '\U0000225C',
1895 "triangleright;": '\U000025B9',
1896 "trianglerighteq;": '\U000022B5',
1897 "tridot;": '\U000025EC',
1898 "trie;": '\U0000225C',
1899 "triminus;": '\U00002A3A',
1900 "triplus;": '\U00002A39',
1901 "trisb;": '\U000029CD',
1902 "tritime;": '\U00002A3B',
1903 "trpezium;": '\U000023E2',
1904 "tscr;": '\U0001D4C9',
1905 "tscy;": '\U00000446',
1906 "tshcy;": '\U0000045B',
1907 "tstrok;": '\U00000167',
1908 "twixt;": '\U0000226C',
1909 "twoheadleftarrow;": '\U0000219E',
1910 "twoheadrightarrow;": '\U000021A0',
1911 "uArr;": '\U000021D1',
1912 "uHar;": '\U00002963',
1913 "uacute;": '\U000000FA',
1914 "uarr;": '\U00002191',
1915 "ubrcy;": '\U0000045E',
1916 "ubreve;": '\U0000016D',
1917 "ucirc;": '\U000000FB',
1918 "ucy;": '\U00000443',
1919 "udarr;": '\U000021C5',
1920 "udblac;": '\U00000171',
1921 "udhar;": '\U0000296E',
1922 "ufisht;": '\U0000297E',
1923 "ufr;": '\U0001D532',
1924 "ugrave;": '\U000000F9',
1925 "uharl;": '\U000021BF',
1926 "uharr;": '\U000021BE',
1927 "uhblk;": '\U00002580',
1928 "ulcorn;": '\U0000231C',
1929 "ulcorner;": '\U0000231C',
1930 "ulcrop;": '\U0000230F',
1931 "ultri;": '\U000025F8',
1932 "umacr;": '\U0000016B',
1933 "uml;": '\U000000A8',
1934 "uogon;": '\U00000173',
1935 "uopf;": '\U0001D566',
1936 "uparrow;": '\U00002191',
1937 "updownarrow;": '\U00002195',
1938 "upharpoonleft;": '\U000021BF',
1939 "upharpoonright;": '\U000021BE',
1940 "uplus;": '\U0000228E',
1941 "upsi;": '\U000003C5',
1942 "upsih;": '\U000003D2',
1943 "upsilon;": '\U000003C5',
1944 "upuparrows;": '\U000021C8',
1945 "urcorn;": '\U0000231D',
1946 "urcorner;": '\U0000231D',
1947 "urcrop;": '\U0000230E',
1948 "uring;": '\U0000016F',
1949 "urtri;": '\U000025F9',
1950 "uscr;": '\U0001D4CA',
1951 "utdot;": '\U000022F0',
1952 "utilde;": '\U00000169',
1953 "utri;": '\U000025B5',
1954 "utrif;": '\U000025B4',
1955 "uuarr;": '\U000021C8',
1956 "uuml;": '\U000000FC',
1957 "uwangle;": '\U000029A7',
1958 "vArr;": '\U000021D5',
1959 "vBar;": '\U00002AE8',
1960 "vBarv;": '\U00002AE9',
1961 "vDash;": '\U000022A8',
1962 "vangrt;": '\U0000299C',
1963 "varepsilon;": '\U000003F5',
1964 "varkappa;": '\U000003F0',
1965 "varnothing;": '\U00002205',
1966 "varphi;": '\U000003D5',
1967 "varpi;": '\U000003D6',
1968 "varpropto;": '\U0000221D',
1969 "varr;": '\U00002195',
1970 "varrho;": '\U000003F1',
1971 "varsigma;": '\U000003C2',
1972 "vartheta;": '\U000003D1',
1973 "vartriangleleft;": '\U000022B2',
1974 "vartriangleright;": '\U000022B3',
1975 "vcy;": '\U00000432',
1976 "vdash;": '\U000022A2',
1977 "vee;": '\U00002228',
1978 "veebar;": '\U000022BB',
1979 "veeeq;": '\U0000225A',
1980 "vellip;": '\U000022EE',
1981 "verbar;": '\U0000007C',
1982 "vert;": '\U0000007C',
1983 "vfr;": '\U0001D533',
1984 "vltri;": '\U000022B2',
1985 "vopf;": '\U0001D567',
1986 "vprop;": '\U0000221D',
1987 "vrtri;": '\U000022B3',
1988 "vscr;": '\U0001D4CB',
1989 "vzigzag;": '\U0000299A',
1990 "wcirc;": '\U00000175',
1991 "wedbar;": '\U00002A5F',
1992 "wedge;": '\U00002227',
1993 "wedgeq;": '\U00002259',
1994 "weierp;": '\U00002118',
1995 "wfr;": '\U0001D534',
1996 "wopf;": '\U0001D568',
1997 "wp;": '\U00002118',
1998 "wr;": '\U00002240',
1999 "wreath;": '\U00002240',
2000 "wscr;": '\U0001D4CC',
2001 "xcap;": '\U000022C2',
2002 "xcirc;": '\U000025EF',
2003 "xcup;": '\U000022C3',
2004 "xdtri;": '\U000025BD',
2005 "xfr;": '\U0001D535',
2006 "xhArr;": '\U000027FA',
2007 "xharr;": '\U000027F7',
2008 "xi;": '\U000003BE',
2009 "xlArr;": '\U000027F8',
2010 "xlarr;": '\U000027F5',
2011 "xmap;": '\U000027FC',
2012 "xnis;": '\U000022FB',
2013 "xodot;": '\U00002A00',
2014 "xopf;": '\U0001D569',
2015 "xoplus;": '\U00002A01',
2016 "xotime;": '\U00002A02',
2017 "xrArr;": '\U000027F9',
2018 "xrarr;": '\U000027F6',
2019 "xscr;": '\U0001D4CD',
2020 "xsqcup;": '\U00002A06',
2021 "xuplus;": '\U00002A04',
2022 "xutri;": '\U000025B3',
2023 "xvee;": '\U000022C1',
2024 "xwedge;": '\U000022C0',
2025 "yacute;": '\U000000FD',
2026 "yacy;": '\U0000044F',
2027 "ycirc;": '\U00000177',
2028 "ycy;": '\U0000044B',
2029 "yen;": '\U000000A5',
2030 "yfr;": '\U0001D536',
2031 "yicy;": '\U00000457',
2032 "yopf;": '\U0001D56A',
2033 "yscr;": '\U0001D4CE',
2034 "yucy;": '\U0000044E',
2035 "yuml;": '\U000000FF',
2036 "zacute;": '\U0000017A',
2037 "zcaron;": '\U0000017E',
2038 "zcy;": '\U00000437',
2039 "zdot;": '\U0000017C',
2040 "zeetrf;": '\U00002128',
2041 "zeta;": '\U000003B6',
2042 "zfr;": '\U0001D537',
2043 "zhcy;": '\U00000436',
2044 "zigrarr;": '\U000021DD',
2045 "zopf;": '\U0001D56B',
2046 "zscr;": '\U0001D4CF',
2047 "zwj;": '\U0000200D',
2048 "zwnj;": '\U0000200C',
2049 "AElig": '\U000000C6',
2050 "AMP": '\U00000026',
2051 "Aacute": '\U000000C1',
2052 "Acirc": '\U000000C2',
2053 "Agrave": '\U000000C0',
2054 "Aring": '\U000000C5',
2055 "Atilde": '\U000000C3',
2056 "Auml": '\U000000C4',
2057 "COPY": '\U000000A9',
2058 "Ccedil": '\U000000C7',
2059 "ETH": '\U000000D0',
2060 "Eacute": '\U000000C9',
2061 "Ecirc": '\U000000CA',
2062 "Egrave": '\U000000C8',
2063 "Euml": '\U000000CB',
2064 "GT": '\U0000003E',
2065 "Iacute": '\U000000CD',
2066 "Icirc": '\U000000CE',
2067 "Igrave": '\U000000CC',
2068 "Iuml": '\U000000CF',
2069 "LT": '\U0000003C',
2070 "Ntilde": '\U000000D1',
2071 "Oacute": '\U000000D3',
2072 "Ocirc": '\U000000D4',
2073 "Ograve": '\U000000D2',
2074 "Oslash": '\U000000D8',
2075 "Otilde": '\U000000D5',
2076 "Ouml": '\U000000D6',
2077 "QUOT": '\U00000022',
2078 "REG": '\U000000AE',
2079 "THORN": '\U000000DE',
2080 "Uacute": '\U000000DA',
2081 "Ucirc": '\U000000DB',
2082 "Ugrave": '\U000000D9',
2083 "Uuml": '\U000000DC',
2084 "Yacute": '\U000000DD',
2085 "aacute": '\U000000E1',
2086 "acirc": '\U000000E2',
2087 "acute": '\U000000B4',
2088 "aelig": '\U000000E6',
2089 "agrave": '\U000000E0',
2090 "amp": '\U00000026',
2091 "aring": '\U000000E5',
2092 "atilde": '\U000000E3',
2093 "auml": '\U000000E4',
2094 "brvbar": '\U000000A6',
2095 "ccedil": '\U000000E7',
2096 "cedil": '\U000000B8',
2097 "cent": '\U000000A2',
2098 "copy": '\U000000A9',
2099 "curren": '\U000000A4',
2100 "deg": '\U000000B0',
2101 "divide": '\U000000F7',
2102 "eacute": '\U000000E9',
2103 "ecirc": '\U000000EA',
2104 "egrave": '\U000000E8',
2105 "eth": '\U000000F0',
2106 "euml": '\U000000EB',
2107 "frac12": '\U000000BD',
2108 "frac14": '\U000000BC',
2109 "frac34": '\U000000BE',
2110 "gt": '\U0000003E',
2111 "iacute": '\U000000ED',
2112 "icirc": '\U000000EE',
2113 "iexcl": '\U000000A1',
2114 "igrave": '\U000000EC',
2115 "iquest": '\U000000BF',
2116 "iuml": '\U000000EF',
2117 "laquo": '\U000000AB',
2118 "lt": '\U0000003C',
2119 "macr": '\U000000AF',
2120 "micro": '\U000000B5',
2121 "middot": '\U000000B7',
2122 "nbsp": '\U000000A0',
2123 "not": '\U000000AC',
2124 "ntilde": '\U000000F1',
2125 "oacute": '\U000000F3',
2126 "ocirc": '\U000000F4',
2127 "ograve": '\U000000F2',
2128 "ordf": '\U000000AA',
2129 "ordm": '\U000000BA',
2130 "oslash": '\U000000F8',
2131 "otilde": '\U000000F5',
2132 "ouml": '\U000000F6',
2133 "para": '\U000000B6',
2134 "plusmn": '\U000000B1',
2135 "pound": '\U000000A3',
2136 "quot": '\U00000022',
2137 "raquo": '\U000000BB',
2138 "reg": '\U000000AE',
2139 "sect": '\U000000A7',
2140 "shy": '\U000000AD',
2141 "sup1": '\U000000B9',
2142 "sup2": '\U000000B2',
2143 "sup3": '\U000000B3',
2144 "szlig": '\U000000DF',
2145 "thorn": '\U000000FE',
2146 "times": '\U000000D7',
2147 "uacute": '\U000000FA',
2148 "ucirc": '\U000000FB',
2149 "ugrave": '\U000000F9',
2150 "uml": '\U000000A8',
2151 "uuml": '\U000000FC',
2152 "yacute": '\U000000FD',
2153 "yen": '\U000000A5',
2154 "yuml": '\U000000FF',
2155}
2156
2157// HTML entities that are two unicode codepoints.
2158var entity2 = map[string][2]rune{
2159 // TODO(nigeltao): Handle replacements that are wider than their names.
2160 // "nLt;": {'\u226A', '\u20D2'},
2161 // "nGt;": {'\u226B', '\u20D2'},
2162 "NotEqualTilde;": {'\u2242', '\u0338'},
2163 "NotGreaterFullEqual;": {'\u2267', '\u0338'},
2164 "NotGreaterGreater;": {'\u226B', '\u0338'},
2165 "NotGreaterSlantEqual;": {'\u2A7E', '\u0338'},
2166 "NotHumpDownHump;": {'\u224E', '\u0338'},
2167 "NotHumpEqual;": {'\u224F', '\u0338'},
2168 "NotLeftTriangleBar;": {'\u29CF', '\u0338'},
2169 "NotLessLess;": {'\u226A', '\u0338'},
2170 "NotLessSlantEqual;": {'\u2A7D', '\u0338'},
2171 "NotNestedGreaterGreater;": {'\u2AA2', '\u0338'},
2172 "NotNestedLessLess;": {'\u2AA1', '\u0338'},
2173 "NotPrecedesEqual;": {'\u2AAF', '\u0338'},
2174 "NotRightTriangleBar;": {'\u29D0', '\u0338'},
2175 "NotSquareSubset;": {'\u228F', '\u0338'},
2176 "NotSquareSuperset;": {'\u2290', '\u0338'},
2177 "NotSubset;": {'\u2282', '\u20D2'},
2178 "NotSucceedsEqual;": {'\u2AB0', '\u0338'},
2179 "NotSucceedsTilde;": {'\u227F', '\u0338'},
2180 "NotSuperset;": {'\u2283', '\u20D2'},
2181 "ThickSpace;": {'\u205F', '\u200A'},
2182 "acE;": {'\u223E', '\u0333'},
2183 "bne;": {'\u003D', '\u20E5'},
2184 "bnequiv;": {'\u2261', '\u20E5'},
2185 "caps;": {'\u2229', '\uFE00'},
2186 "cups;": {'\u222A', '\uFE00'},
2187 "fjlig;": {'\u0066', '\u006A'},
2188 "gesl;": {'\u22DB', '\uFE00'},
2189 "gvertneqq;": {'\u2269', '\uFE00'},
2190 "gvnE;": {'\u2269', '\uFE00'},
2191 "lates;": {'\u2AAD', '\uFE00'},
2192 "lesg;": {'\u22DA', '\uFE00'},
2193 "lvertneqq;": {'\u2268', '\uFE00'},
2194 "lvnE;": {'\u2268', '\uFE00'},
2195 "nGg;": {'\u22D9', '\u0338'},
2196 "nGtv;": {'\u226B', '\u0338'},
2197 "nLl;": {'\u22D8', '\u0338'},
2198 "nLtv;": {'\u226A', '\u0338'},
2199 "nang;": {'\u2220', '\u20D2'},
2200 "napE;": {'\u2A70', '\u0338'},
2201 "napid;": {'\u224B', '\u0338'},
2202 "nbump;": {'\u224E', '\u0338'},
2203 "nbumpe;": {'\u224F', '\u0338'},
2204 "ncongdot;": {'\u2A6D', '\u0338'},
2205 "nedot;": {'\u2250', '\u0338'},
2206 "nesim;": {'\u2242', '\u0338'},
2207 "ngE;": {'\u2267', '\u0338'},
2208 "ngeqq;": {'\u2267', '\u0338'},
2209 "ngeqslant;": {'\u2A7E', '\u0338'},
2210 "nges;": {'\u2A7E', '\u0338'},
2211 "nlE;": {'\u2266', '\u0338'},
2212 "nleqq;": {'\u2266', '\u0338'},
2213 "nleqslant;": {'\u2A7D', '\u0338'},
2214 "nles;": {'\u2A7D', '\u0338'},
2215 "notinE;": {'\u22F9', '\u0338'},
2216 "notindot;": {'\u22F5', '\u0338'},
2217 "nparsl;": {'\u2AFD', '\u20E5'},
2218 "npart;": {'\u2202', '\u0338'},
2219 "npre;": {'\u2AAF', '\u0338'},
2220 "npreceq;": {'\u2AAF', '\u0338'},
2221 "nrarrc;": {'\u2933', '\u0338'},
2222 "nrarrw;": {'\u219D', '\u0338'},
2223 "nsce;": {'\u2AB0', '\u0338'},
2224 "nsubE;": {'\u2AC5', '\u0338'},
2225 "nsubset;": {'\u2282', '\u20D2'},
2226 "nsubseteqq;": {'\u2AC5', '\u0338'},
2227 "nsucceq;": {'\u2AB0', '\u0338'},
2228 "nsupE;": {'\u2AC6', '\u0338'},
2229 "nsupset;": {'\u2283', '\u20D2'},
2230 "nsupseteqq;": {'\u2AC6', '\u0338'},
2231 "nvap;": {'\u224D', '\u20D2'},
2232 "nvge;": {'\u2265', '\u20D2'},
2233 "nvgt;": {'\u003E', '\u20D2'},
2234 "nvle;": {'\u2264', '\u20D2'},
2235 "nvlt;": {'\u003C', '\u20D2'},
2236 "nvltrie;": {'\u22B4', '\u20D2'},
2237 "nvrtrie;": {'\u22B5', '\u20D2'},
2238 "nvsim;": {'\u223C', '\u20D2'},
2239 "race;": {'\u223D', '\u0331'},
2240 "smtes;": {'\u2AAC', '\uFE00'},
2241 "sqcaps;": {'\u2293', '\uFE00'},
2242 "sqcups;": {'\u2294', '\uFE00'},
2243 "varsubsetneq;": {'\u228A', '\uFE00'},
2244 "varsubsetneqq;": {'\u2ACB', '\uFE00'},
2245 "varsupsetneq;": {'\u228B', '\uFE00'},
2246 "varsupsetneqq;": {'\u2ACC', '\uFE00'},
2247 "vnsub;": {'\u2282', '\u20D2'},
2248 "vnsup;": {'\u2283', '\u20D2'},
2249 "vsubnE;": {'\u2ACB', '\uFE00'},
2250 "vsubne;": {'\u228A', '\uFE00'},
2251 "vsupnE;": {'\u2ACC', '\uFE00'},
2252 "vsupne;": {'\u228B', '\uFE00'},
2253}
diff --git a/vendor/golang.org/x/net/html/escape.go b/vendor/golang.org/x/net/html/escape.go
new file mode 100644
index 0000000..04c6bec
--- /dev/null
+++ b/vendor/golang.org/x/net/html/escape.go
@@ -0,0 +1,339 @@
1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "bytes"
9 "strings"
10 "unicode/utf8"
11)
12
13// These replacements permit compatibility with old numeric entities that
14// assumed Windows-1252 encoding.
15// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
16var replacementTable = [...]rune{
17 '\u20AC', // First entry is what 0x80 should be replaced with.
18 '\u0081',
19 '\u201A',
20 '\u0192',
21 '\u201E',
22 '\u2026',
23 '\u2020',
24 '\u2021',
25 '\u02C6',
26 '\u2030',
27 '\u0160',
28 '\u2039',
29 '\u0152',
30 '\u008D',
31 '\u017D',
32 '\u008F',
33 '\u0090',
34 '\u2018',
35 '\u2019',
36 '\u201C',
37 '\u201D',
38 '\u2022',
39 '\u2013',
40 '\u2014',
41 '\u02DC',
42 '\u2122',
43 '\u0161',
44 '\u203A',
45 '\u0153',
46 '\u009D',
47 '\u017E',
48 '\u0178', // Last entry is 0x9F.
49 // 0x00->'\uFFFD' is handled programmatically.
50 // 0x0D->'\u000D' is a no-op.
51}
52
53// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
54// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
55// Precondition: b[src] == '&' && dst <= src.
56// attribute should be true if parsing an attribute value.
57func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
58 // https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
59
60 // i starts at 1 because we already know that s[0] == '&'.
61 i, s := 1, b[src:]
62
63 if len(s) <= 1 {
64 b[dst] = b[src]
65 return dst + 1, src + 1
66 }
67
68 if s[i] == '#' {
69 if len(s) <= 3 { // We need to have at least "&#.".
70 b[dst] = b[src]
71 return dst + 1, src + 1
72 }
73 i++
74 c := s[i]
75 hex := false
76 if c == 'x' || c == 'X' {
77 hex = true
78 i++
79 }
80
81 x := '\x00'
82 for i < len(s) {
83 c = s[i]
84 i++
85 if hex {
86 if '0' <= c && c <= '9' {
87 x = 16*x + rune(c) - '0'
88 continue
89 } else if 'a' <= c && c <= 'f' {
90 x = 16*x + rune(c) - 'a' + 10
91 continue
92 } else if 'A' <= c && c <= 'F' {
93 x = 16*x + rune(c) - 'A' + 10
94 continue
95 }
96 } else if '0' <= c && c <= '9' {
97 x = 10*x + rune(c) - '0'
98 continue
99 }
100 if c != ';' {
101 i--
102 }
103 break
104 }
105
106 if i <= 3 { // No characters matched.
107 b[dst] = b[src]
108 return dst + 1, src + 1
109 }
110
111 if 0x80 <= x && x <= 0x9F {
112 // Replace characters from Windows-1252 with UTF-8 equivalents.
113 x = replacementTable[x-0x80]
114 } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
115 // Replace invalid characters with the replacement character.
116 x = '\uFFFD'
117 }
118
119 return dst + utf8.EncodeRune(b[dst:], x), src + i
120 }
121
122 // Consume the maximum number of characters possible, with the
123 // consumed characters matching one of the named references.
124
125 for i < len(s) {
126 c := s[i]
127 i++
128 // Lower-cased characters are more common in entities, so we check for them first.
129 if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
130 continue
131 }
132 if c != ';' {
133 i--
134 }
135 break
136 }
137
138 entityName := string(s[1:i])
139 if entityName == "" {
140 // No-op.
141 } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
142 // No-op.
143 } else if x := entity[entityName]; x != 0 {
144 return dst + utf8.EncodeRune(b[dst:], x), src + i
145 } else if x := entity2[entityName]; x[0] != 0 {
146 dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
147 return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
148 } else if !attribute {
149 maxLen := len(entityName) - 1
150 if maxLen > longestEntityWithoutSemicolon {
151 maxLen = longestEntityWithoutSemicolon
152 }
153 for j := maxLen; j > 1; j-- {
154 if x := entity[entityName[:j]]; x != 0 {
155 return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
156 }
157 }
158 }
159
160 dst1, src1 = dst+i, src+i
161 copy(b[dst:dst1], b[src:src1])
162 return dst1, src1
163}
164
165// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
166// attribute should be true if parsing an attribute value.
167func unescape(b []byte, attribute bool) []byte {
168 for i, c := range b {
169 if c == '&' {
170 dst, src := unescapeEntity(b, i, i, attribute)
171 for src < len(b) {
172 c := b[src]
173 if c == '&' {
174 dst, src = unescapeEntity(b, dst, src, attribute)
175 } else {
176 b[dst] = c
177 dst, src = dst+1, src+1
178 }
179 }
180 return b[0:dst]
181 }
182 }
183 return b
184}
185
186// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
187func lower(b []byte) []byte {
188 for i, c := range b {
189 if 'A' <= c && c <= 'Z' {
190 b[i] = c + 'a' - 'A'
191 }
192 }
193 return b
194}
195
196// escapeComment is like func escape but escapes its input bytes less often.
197// Per https://github.com/golang/go/issues/58246 some HTML comments are (1)
198// meaningful and (2) contain angle brackets that we'd like to avoid escaping
199// unless we have to.
200//
201// "We have to" includes the '&' byte, since that introduces other escapes.
202//
203// It also includes those bytes (not including EOF) that would otherwise end
204// the comment. Per the summary table at the bottom of comment_test.go, this is
205// the '>' byte that, per above, we'd like to avoid escaping unless we have to.
206//
207// Studying the summary table (and T actions in its '>' column) closely, we
208// only need to escape in states 43, 44, 49, 51 and 52. State 43 is at the
209// start of the comment data. State 52 is after a '!'. The other three states
210// are after a '-'.
211//
212// Our algorithm is thus to escape every '&' and to escape '>' if and only if:
213// - The '>' is after a '!' or '-' (in the unescaped data) or
214// - The '>' is at the start of the comment data (after the opening "<!--").
215func escapeComment(w writer, s string) error {
216 // When modifying this function, consider manually increasing the
217 // maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
218 // That increase should only be temporary, not committed, as it
219 // exponentially affects the test running time.
220
221 if len(s) == 0 {
222 return nil
223 }
224
225 // Loop:
226 // - Grow j such that s[i:j] does not need escaping.
227 // - If s[j] does need escaping, output s[i:j] and an escaped s[j],
228 // resetting i and j to point past that s[j] byte.
229 i := 0
230 for j := 0; j < len(s); j++ {
231 escaped := ""
232 switch s[j] {
233 case '&':
234 escaped = "&amp;"
235
236 case '>':
237 if j > 0 {
238 if prev := s[j-1]; (prev != '!') && (prev != '-') {
239 continue
240 }
241 }
242 escaped = "&gt;"
243
244 default:
245 continue
246 }
247
248 if i < j {
249 if _, err := w.WriteString(s[i:j]); err != nil {
250 return err
251 }
252 }
253 if _, err := w.WriteString(escaped); err != nil {
254 return err
255 }
256 i = j + 1
257 }
258
259 if i < len(s) {
260 if _, err := w.WriteString(s[i:]); err != nil {
261 return err
262 }
263 }
264 return nil
265}
266
267// escapeCommentString is to EscapeString as escapeComment is to escape.
268func escapeCommentString(s string) string {
269 if strings.IndexAny(s, "&>") == -1 {
270 return s
271 }
272 var buf bytes.Buffer
273 escapeComment(&buf, s)
274 return buf.String()
275}
276
277const escapedChars = "&'<>\"\r"
278
279func escape(w writer, s string) error {
280 i := strings.IndexAny(s, escapedChars)
281 for i != -1 {
282 if _, err := w.WriteString(s[:i]); err != nil {
283 return err
284 }
285 var esc string
286 switch s[i] {
287 case '&':
288 esc = "&amp;"
289 case '\'':
290 // "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
291 esc = "&#39;"
292 case '<':
293 esc = "&lt;"
294 case '>':
295 esc = "&gt;"
296 case '"':
297 // "&#34;" is shorter than "&quot;".
298 esc = "&#34;"
299 case '\r':
300 esc = "&#13;"
301 default:
302 panic("unrecognized escape character")
303 }
304 s = s[i+1:]
305 if _, err := w.WriteString(esc); err != nil {
306 return err
307 }
308 i = strings.IndexAny(s, escapedChars)
309 }
310 _, err := w.WriteString(s)
311 return err
312}
313
314// EscapeString escapes special characters like "<" to become "&lt;". It
315// escapes only five such characters: <, >, &, ' and ".
316// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
317// always true.
318func EscapeString(s string) string {
319 if strings.IndexAny(s, escapedChars) == -1 {
320 return s
321 }
322 var buf bytes.Buffer
323 escape(&buf, s)
324 return buf.String()
325}
326
327// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
328// larger range of entities than EscapeString escapes. For example, "&aacute;"
329// unescapes to "á", as does "&#225;" and "&xE1;".
330// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
331// always true.
332func UnescapeString(s string) string {
333 for _, c := range s {
334 if c == '&' {
335 return string(unescape([]byte(s), false))
336 }
337 }
338 return s
339}
diff --git a/vendor/golang.org/x/net/html/foreign.go b/vendor/golang.org/x/net/html/foreign.go
new file mode 100644
index 0000000..9da9e9d
--- /dev/null
+++ b/vendor/golang.org/x/net/html/foreign.go
@@ -0,0 +1,222 @@
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "strings"
9)
10
11func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
12 for i := range aa {
13 if newName, ok := nameMap[aa[i].Key]; ok {
14 aa[i].Key = newName
15 }
16 }
17}
18
19func adjustForeignAttributes(aa []Attribute) {
20 for i, a := range aa {
21 if a.Key == "" || a.Key[0] != 'x' {
22 continue
23 }
24 switch a.Key {
25 case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
26 "xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
27 j := strings.Index(a.Key, ":")
28 aa[i].Namespace = a.Key[:j]
29 aa[i].Key = a.Key[j+1:]
30 }
31 }
32}
33
34func htmlIntegrationPoint(n *Node) bool {
35 if n.Type != ElementNode {
36 return false
37 }
38 switch n.Namespace {
39 case "math":
40 if n.Data == "annotation-xml" {
41 for _, a := range n.Attr {
42 if a.Key == "encoding" {
43 val := strings.ToLower(a.Val)
44 if val == "text/html" || val == "application/xhtml+xml" {
45 return true
46 }
47 }
48 }
49 }
50 case "svg":
51 switch n.Data {
52 case "desc", "foreignObject", "title":
53 return true
54 }
55 }
56 return false
57}
58
59func mathMLTextIntegrationPoint(n *Node) bool {
60 if n.Namespace != "math" {
61 return false
62 }
63 switch n.Data {
64 case "mi", "mo", "mn", "ms", "mtext":
65 return true
66 }
67 return false
68}
69
70// Section 12.2.6.5.
71var breakout = map[string]bool{
72 "b": true,
73 "big": true,
74 "blockquote": true,
75 "body": true,
76 "br": true,
77 "center": true,
78 "code": true,
79 "dd": true,
80 "div": true,
81 "dl": true,
82 "dt": true,
83 "em": true,
84 "embed": true,
85 "h1": true,
86 "h2": true,
87 "h3": true,
88 "h4": true,
89 "h5": true,
90 "h6": true,
91 "head": true,
92 "hr": true,
93 "i": true,
94 "img": true,
95 "li": true,
96 "listing": true,
97 "menu": true,
98 "meta": true,
99 "nobr": true,
100 "ol": true,
101 "p": true,
102 "pre": true,
103 "ruby": true,
104 "s": true,
105 "small": true,
106 "span": true,
107 "strong": true,
108 "strike": true,
109 "sub": true,
110 "sup": true,
111 "table": true,
112 "tt": true,
113 "u": true,
114 "ul": true,
115 "var": true,
116}
117
118// Section 12.2.6.5.
119var svgTagNameAdjustments = map[string]string{
120 "altglyph": "altGlyph",
121 "altglyphdef": "altGlyphDef",
122 "altglyphitem": "altGlyphItem",
123 "animatecolor": "animateColor",
124 "animatemotion": "animateMotion",
125 "animatetransform": "animateTransform",
126 "clippath": "clipPath",
127 "feblend": "feBlend",
128 "fecolormatrix": "feColorMatrix",
129 "fecomponenttransfer": "feComponentTransfer",
130 "fecomposite": "feComposite",
131 "feconvolvematrix": "feConvolveMatrix",
132 "fediffuselighting": "feDiffuseLighting",
133 "fedisplacementmap": "feDisplacementMap",
134 "fedistantlight": "feDistantLight",
135 "feflood": "feFlood",
136 "fefunca": "feFuncA",
137 "fefuncb": "feFuncB",
138 "fefuncg": "feFuncG",
139 "fefuncr": "feFuncR",
140 "fegaussianblur": "feGaussianBlur",
141 "feimage": "feImage",
142 "femerge": "feMerge",
143 "femergenode": "feMergeNode",
144 "femorphology": "feMorphology",
145 "feoffset": "feOffset",
146 "fepointlight": "fePointLight",
147 "fespecularlighting": "feSpecularLighting",
148 "fespotlight": "feSpotLight",
149 "fetile": "feTile",
150 "feturbulence": "feTurbulence",
151 "foreignobject": "foreignObject",
152 "glyphref": "glyphRef",
153 "lineargradient": "linearGradient",
154 "radialgradient": "radialGradient",
155 "textpath": "textPath",
156}
157
158// Section 12.2.6.1
159var mathMLAttributeAdjustments = map[string]string{
160 "definitionurl": "definitionURL",
161}
162
163var svgAttributeAdjustments = map[string]string{
164 "attributename": "attributeName",
165 "attributetype": "attributeType",
166 "basefrequency": "baseFrequency",
167 "baseprofile": "baseProfile",
168 "calcmode": "calcMode",
169 "clippathunits": "clipPathUnits",
170 "diffuseconstant": "diffuseConstant",
171 "edgemode": "edgeMode",
172 "filterunits": "filterUnits",
173 "glyphref": "glyphRef",
174 "gradienttransform": "gradientTransform",
175 "gradientunits": "gradientUnits",
176 "kernelmatrix": "kernelMatrix",
177 "kernelunitlength": "kernelUnitLength",
178 "keypoints": "keyPoints",
179 "keysplines": "keySplines",
180 "keytimes": "keyTimes",
181 "lengthadjust": "lengthAdjust",
182 "limitingconeangle": "limitingConeAngle",
183 "markerheight": "markerHeight",
184 "markerunits": "markerUnits",
185 "markerwidth": "markerWidth",
186 "maskcontentunits": "maskContentUnits",
187 "maskunits": "maskUnits",
188 "numoctaves": "numOctaves",
189 "pathlength": "pathLength",
190 "patterncontentunits": "patternContentUnits",
191 "patterntransform": "patternTransform",
192 "patternunits": "patternUnits",
193 "pointsatx": "pointsAtX",
194 "pointsaty": "pointsAtY",
195 "pointsatz": "pointsAtZ",
196 "preservealpha": "preserveAlpha",
197 "preserveaspectratio": "preserveAspectRatio",
198 "primitiveunits": "primitiveUnits",
199 "refx": "refX",
200 "refy": "refY",
201 "repeatcount": "repeatCount",
202 "repeatdur": "repeatDur",
203 "requiredextensions": "requiredExtensions",
204 "requiredfeatures": "requiredFeatures",
205 "specularconstant": "specularConstant",
206 "specularexponent": "specularExponent",
207 "spreadmethod": "spreadMethod",
208 "startoffset": "startOffset",
209 "stddeviation": "stdDeviation",
210 "stitchtiles": "stitchTiles",
211 "surfacescale": "surfaceScale",
212 "systemlanguage": "systemLanguage",
213 "tablevalues": "tableValues",
214 "targetx": "targetX",
215 "targety": "targetY",
216 "textlength": "textLength",
217 "viewbox": "viewBox",
218 "viewtarget": "viewTarget",
219 "xchannelselector": "xChannelSelector",
220 "ychannelselector": "yChannelSelector",
221 "zoomandpan": "zoomAndPan",
222}
diff --git a/vendor/golang.org/x/net/html/node.go b/vendor/golang.org/x/net/html/node.go
new file mode 100644
index 0000000..1350eef
--- /dev/null
+++ b/vendor/golang.org/x/net/html/node.go
@@ -0,0 +1,225 @@
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "golang.org/x/net/html/atom"
9)
10
11// A NodeType is the type of a Node.
12type NodeType uint32
13
14const (
15 ErrorNode NodeType = iota
16 TextNode
17 DocumentNode
18 ElementNode
19 CommentNode
20 DoctypeNode
21 // RawNode nodes are not returned by the parser, but can be part of the
22 // Node tree passed to func Render to insert raw HTML (without escaping).
23 // If so, this package makes no guarantee that the rendered HTML is secure
24 // (from e.g. Cross Site Scripting attacks) or well-formed.
25 RawNode
26 scopeMarkerNode
27)
28
29// Section 12.2.4.3 says "The markers are inserted when entering applet,
30// object, marquee, template, td, th, and caption elements, and are used
31// to prevent formatting from "leaking" into applet, object, marquee,
32// template, td, th, and caption elements".
33var scopeMarker = Node{Type: scopeMarkerNode}
34
35// A Node consists of a NodeType and some Data (tag name for element nodes,
36// content for text) and are part of a tree of Nodes. Element nodes may also
37// have a Namespace and contain a slice of Attributes. Data is unescaped, so
38// that it looks like "a<b" rather than "a&lt;b". For element nodes, DataAtom
39// is the atom for Data, or zero if Data is not a known tag name.
40//
41// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
42// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
43// "svg" is short for "http://www.w3.org/2000/svg".
44type Node struct {
45 Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
46
47 Type NodeType
48 DataAtom atom.Atom
49 Data string
50 Namespace string
51 Attr []Attribute
52}
53
54// InsertBefore inserts newChild as a child of n, immediately before oldChild
55// in the sequence of n's children. oldChild may be nil, in which case newChild
56// is appended to the end of n's children.
57//
58// It will panic if newChild already has a parent or siblings.
59func (n *Node) InsertBefore(newChild, oldChild *Node) {
60 if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
61 panic("html: InsertBefore called for an attached child Node")
62 }
63 var prev, next *Node
64 if oldChild != nil {
65 prev, next = oldChild.PrevSibling, oldChild
66 } else {
67 prev = n.LastChild
68 }
69 if prev != nil {
70 prev.NextSibling = newChild
71 } else {
72 n.FirstChild = newChild
73 }
74 if next != nil {
75 next.PrevSibling = newChild
76 } else {
77 n.LastChild = newChild
78 }
79 newChild.Parent = n
80 newChild.PrevSibling = prev
81 newChild.NextSibling = next
82}
83
84// AppendChild adds a node c as a child of n.
85//
86// It will panic if c already has a parent or siblings.
87func (n *Node) AppendChild(c *Node) {
88 if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
89 panic("html: AppendChild called for an attached child Node")
90 }
91 last := n.LastChild
92 if last != nil {
93 last.NextSibling = c
94 } else {
95 n.FirstChild = c
96 }
97 n.LastChild = c
98 c.Parent = n
99 c.PrevSibling = last
100}
101
102// RemoveChild removes a node c that is a child of n. Afterwards, c will have
103// no parent and no siblings.
104//
105// It will panic if c's parent is not n.
106func (n *Node) RemoveChild(c *Node) {
107 if c.Parent != n {
108 panic("html: RemoveChild called for a non-child Node")
109 }
110 if n.FirstChild == c {
111 n.FirstChild = c.NextSibling
112 }
113 if c.NextSibling != nil {
114 c.NextSibling.PrevSibling = c.PrevSibling
115 }
116 if n.LastChild == c {
117 n.LastChild = c.PrevSibling
118 }
119 if c.PrevSibling != nil {
120 c.PrevSibling.NextSibling = c.NextSibling
121 }
122 c.Parent = nil
123 c.PrevSibling = nil
124 c.NextSibling = nil
125}
126
127// reparentChildren reparents all of src's child nodes to dst.
128func reparentChildren(dst, src *Node) {
129 for {
130 child := src.FirstChild
131 if child == nil {
132 break
133 }
134 src.RemoveChild(child)
135 dst.AppendChild(child)
136 }
137}
138
139// clone returns a new node with the same type, data and attributes.
140// The clone has no parent, no siblings and no children.
141func (n *Node) clone() *Node {
142 m := &Node{
143 Type: n.Type,
144 DataAtom: n.DataAtom,
145 Data: n.Data,
146 Attr: make([]Attribute, len(n.Attr)),
147 }
148 copy(m.Attr, n.Attr)
149 return m
150}
151
152// nodeStack is a stack of nodes.
153type nodeStack []*Node
154
155// pop pops the stack. It will panic if s is empty.
156func (s *nodeStack) pop() *Node {
157 i := len(*s)
158 n := (*s)[i-1]
159 *s = (*s)[:i-1]
160 return n
161}
162
163// top returns the most recently pushed node, or nil if s is empty.
164func (s *nodeStack) top() *Node {
165 if i := len(*s); i > 0 {
166 return (*s)[i-1]
167 }
168 return nil
169}
170
171// index returns the index of the top-most occurrence of n in the stack, or -1
172// if n is not present.
173func (s *nodeStack) index(n *Node) int {
174 for i := len(*s) - 1; i >= 0; i-- {
175 if (*s)[i] == n {
176 return i
177 }
178 }
179 return -1
180}
181
182// contains returns whether a is within s.
183func (s *nodeStack) contains(a atom.Atom) bool {
184 for _, n := range *s {
185 if n.DataAtom == a && n.Namespace == "" {
186 return true
187 }
188 }
189 return false
190}
191
192// insert inserts a node at the given index.
193func (s *nodeStack) insert(i int, n *Node) {
194 (*s) = append(*s, nil)
195 copy((*s)[i+1:], (*s)[i:])
196 (*s)[i] = n
197}
198
199// remove removes a node from the stack. It is a no-op if n is not present.
200func (s *nodeStack) remove(n *Node) {
201 i := s.index(n)
202 if i == -1 {
203 return
204 }
205 copy((*s)[i:], (*s)[i+1:])
206 j := len(*s) - 1
207 (*s)[j] = nil
208 *s = (*s)[:j]
209}
210
211type insertionModeStack []insertionMode
212
213func (s *insertionModeStack) pop() (im insertionMode) {
214 i := len(*s)
215 im = (*s)[i-1]
216 *s = (*s)[:i-1]
217 return im
218}
219
220func (s *insertionModeStack) top() insertionMode {
221 if i := len(*s); i > 0 {
222 return (*s)[i-1]
223 }
224 return nil
225}
diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go
new file mode 100644
index 0000000..46a89ed
--- /dev/null
+++ b/vendor/golang.org/x/net/html/parse.go
@@ -0,0 +1,2460 @@
1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "errors"
9 "fmt"
10 "io"
11 "strings"
12
13 a "golang.org/x/net/html/atom"
14)
15
16// A parser implements the HTML5 parsing algorithm:
17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
18type parser struct {
19 // tokenizer provides the tokens for the parser.
20 tokenizer *Tokenizer
21 // tok is the most recently read token.
22 tok Token
23 // Self-closing tags like <hr/> are treated as start tags, except that
24 // hasSelfClosingToken is set while they are being processed.
25 hasSelfClosingToken bool
26 // doc is the document root element.
27 doc *Node
28 // The stack of open elements (section 12.2.4.2) and active formatting
29 // elements (section 12.2.4.3).
30 oe, afe nodeStack
31 // Element pointers (section 12.2.4.4).
32 head, form *Node
33 // Other parsing state flags (section 12.2.4.5).
34 scripting, framesetOK bool
35 // The stack of template insertion modes
36 templateStack insertionModeStack
37 // im is the current insertion mode.
38 im insertionMode
39 // originalIM is the insertion mode to go back to after completing a text
40 // or inTableText insertion mode.
41 originalIM insertionMode
42 // fosterParenting is whether new elements should be inserted according to
43 // the foster parenting rules (section 12.2.6.1).
44 fosterParenting bool
45 // quirks is whether the parser is operating in "quirks mode."
46 quirks bool
47 // fragment is whether the parser is parsing an HTML fragment.
48 fragment bool
49 // context is the context element when parsing an HTML fragment
50 // (section 12.4).
51 context *Node
52}
53
54func (p *parser) top() *Node {
55 if n := p.oe.top(); n != nil {
56 return n
57 }
58 return p.doc
59}
60
61// Stop tags for use in popUntil. These come from section 12.2.4.2.
62var (
63 defaultScopeStopTags = map[string][]a.Atom{
64 "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
65 "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
66 "svg": {a.Desc, a.ForeignObject, a.Title},
67 }
68)
69
70type scope int
71
72const (
73 defaultScope scope = iota
74 listItemScope
75 buttonScope
76 tableScope
77 tableRowScope
78 tableBodyScope
79 selectScope
80)
81
82// popUntil pops the stack of open elements at the highest element whose tag
83// is in matchTags, provided there is no higher element in the scope's stop
84// tags (as defined in section 12.2.4.2). It returns whether or not there was
85// such an element. If there was not, popUntil leaves the stack unchanged.
86//
87// For example, the set of stop tags for table scope is: "html", "table". If
88// the stack was:
89// ["html", "body", "font", "table", "b", "i", "u"]
90// then popUntil(tableScope, "font") would return false, but
91// popUntil(tableScope, "i") would return true and the stack would become:
92// ["html", "body", "font", "table", "b"]
93//
94// If an element's tag is in both the stop tags and matchTags, then the stack
95// will be popped and the function returns true (provided, of course, there was
96// no higher element in the stack that was also in the stop tags). For example,
97// popUntil(tableScope, "table") returns true and leaves:
98// ["html", "body", "font"]
99func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
100 if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
101 p.oe = p.oe[:i]
102 return true
103 }
104 return false
105}
106
107// indexOfElementInScope returns the index in p.oe of the highest element whose
108// tag is in matchTags that is in scope. If no matching element is in scope, it
109// returns -1.
110func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
111 for i := len(p.oe) - 1; i >= 0; i-- {
112 tagAtom := p.oe[i].DataAtom
113 if p.oe[i].Namespace == "" {
114 for _, t := range matchTags {
115 if t == tagAtom {
116 return i
117 }
118 }
119 switch s {
120 case defaultScope:
121 // No-op.
122 case listItemScope:
123 if tagAtom == a.Ol || tagAtom == a.Ul {
124 return -1
125 }
126 case buttonScope:
127 if tagAtom == a.Button {
128 return -1
129 }
130 case tableScope:
131 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
132 return -1
133 }
134 case selectScope:
135 if tagAtom != a.Optgroup && tagAtom != a.Option {
136 return -1
137 }
138 default:
139 panic("unreachable")
140 }
141 }
142 switch s {
143 case defaultScope, listItemScope, buttonScope:
144 for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
145 if t == tagAtom {
146 return -1
147 }
148 }
149 }
150 }
151 return -1
152}
153
154// elementInScope is like popUntil, except that it doesn't modify the stack of
155// open elements.
156func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
157 return p.indexOfElementInScope(s, matchTags...) != -1
158}
159
160// clearStackToContext pops elements off the stack of open elements until a
161// scope-defined element is found.
162func (p *parser) clearStackToContext(s scope) {
163 for i := len(p.oe) - 1; i >= 0; i-- {
164 tagAtom := p.oe[i].DataAtom
165 switch s {
166 case tableScope:
167 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
168 p.oe = p.oe[:i+1]
169 return
170 }
171 case tableRowScope:
172 if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
173 p.oe = p.oe[:i+1]
174 return
175 }
176 case tableBodyScope:
177 if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
178 p.oe = p.oe[:i+1]
179 return
180 }
181 default:
182 panic("unreachable")
183 }
184 }
185}
186
187// parseGenericRawTextElement implements the generic raw text element parsing
188// algorithm defined in 12.2.6.2.
189// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
190// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
191// officially, need to make tokenizer consider both states.
192func (p *parser) parseGenericRawTextElement() {
193 p.addElement()
194 p.originalIM = p.im
195 p.im = textIM
196}
197
198// generateImpliedEndTags pops nodes off the stack of open elements as long as
199// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
200// If exceptions are specified, nodes with that name will not be popped off.
201func (p *parser) generateImpliedEndTags(exceptions ...string) {
202 var i int
203loop:
204 for i = len(p.oe) - 1; i >= 0; i-- {
205 n := p.oe[i]
206 if n.Type != ElementNode {
207 break
208 }
209 switch n.DataAtom {
210 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
211 for _, except := range exceptions {
212 if n.Data == except {
213 break loop
214 }
215 }
216 continue
217 }
218 break
219 }
220
221 p.oe = p.oe[:i+1]
222}
223
224// addChild adds a child node n to the top element, and pushes n onto the stack
225// of open elements if it is an element node.
226func (p *parser) addChild(n *Node) {
227 if p.shouldFosterParent() {
228 p.fosterParent(n)
229 } else {
230 p.top().AppendChild(n)
231 }
232
233 if n.Type == ElementNode {
234 p.oe = append(p.oe, n)
235 }
236}
237
238// shouldFosterParent returns whether the next node to be added should be
239// foster parented.
240func (p *parser) shouldFosterParent() bool {
241 if p.fosterParenting {
242 switch p.top().DataAtom {
243 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
244 return true
245 }
246 }
247 return false
248}
249
250// fosterParent adds a child node according to the foster parenting rules.
251// Section 12.2.6.1, "foster parenting".
252func (p *parser) fosterParent(n *Node) {
253 var table, parent, prev, template *Node
254 var i int
255 for i = len(p.oe) - 1; i >= 0; i-- {
256 if p.oe[i].DataAtom == a.Table {
257 table = p.oe[i]
258 break
259 }
260 }
261
262 var j int
263 for j = len(p.oe) - 1; j >= 0; j-- {
264 if p.oe[j].DataAtom == a.Template {
265 template = p.oe[j]
266 break
267 }
268 }
269
270 if template != nil && (table == nil || j > i) {
271 template.AppendChild(n)
272 return
273 }
274
275 if table == nil {
276 // The foster parent is the html element.
277 parent = p.oe[0]
278 } else {
279 parent = table.Parent
280 }
281 if parent == nil {
282 parent = p.oe[i-1]
283 }
284
285 if table != nil {
286 prev = table.PrevSibling
287 } else {
288 prev = parent.LastChild
289 }
290 if prev != nil && prev.Type == TextNode && n.Type == TextNode {
291 prev.Data += n.Data
292 return
293 }
294
295 parent.InsertBefore(n, table)
296}
297
298// addText adds text to the preceding node if it is a text node, or else it
299// calls addChild with a new text node.
300func (p *parser) addText(text string) {
301 if text == "" {
302 return
303 }
304
305 if p.shouldFosterParent() {
306 p.fosterParent(&Node{
307 Type: TextNode,
308 Data: text,
309 })
310 return
311 }
312
313 t := p.top()
314 if n := t.LastChild; n != nil && n.Type == TextNode {
315 n.Data += text
316 return
317 }
318 p.addChild(&Node{
319 Type: TextNode,
320 Data: text,
321 })
322}
323
324// addElement adds a child element based on the current token.
325func (p *parser) addElement() {
326 p.addChild(&Node{
327 Type: ElementNode,
328 DataAtom: p.tok.DataAtom,
329 Data: p.tok.Data,
330 Attr: p.tok.Attr,
331 })
332}
333
334// Section 12.2.4.3.
335func (p *parser) addFormattingElement() {
336 tagAtom, attr := p.tok.DataAtom, p.tok.Attr
337 p.addElement()
338
339 // Implement the Noah's Ark clause, but with three per family instead of two.
340 identicalElements := 0
341findIdenticalElements:
342 for i := len(p.afe) - 1; i >= 0; i-- {
343 n := p.afe[i]
344 if n.Type == scopeMarkerNode {
345 break
346 }
347 if n.Type != ElementNode {
348 continue
349 }
350 if n.Namespace != "" {
351 continue
352 }
353 if n.DataAtom != tagAtom {
354 continue
355 }
356 if len(n.Attr) != len(attr) {
357 continue
358 }
359 compareAttributes:
360 for _, t0 := range n.Attr {
361 for _, t1 := range attr {
362 if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
363 // Found a match for this attribute, continue with the next attribute.
364 continue compareAttributes
365 }
366 }
367 // If we get here, there is no attribute that matches a.
368 // Therefore the element is not identical to the new one.
369 continue findIdenticalElements
370 }
371
372 identicalElements++
373 if identicalElements >= 3 {
374 p.afe.remove(n)
375 }
376 }
377
378 p.afe = append(p.afe, p.top())
379}
380
381// Section 12.2.4.3.
382func (p *parser) clearActiveFormattingElements() {
383 for {
384 if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
385 return
386 }
387 }
388}
389
390// Section 12.2.4.3.
391func (p *parser) reconstructActiveFormattingElements() {
392 n := p.afe.top()
393 if n == nil {
394 return
395 }
396 if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
397 return
398 }
399 i := len(p.afe) - 1
400 for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
401 if i == 0 {
402 i = -1
403 break
404 }
405 i--
406 n = p.afe[i]
407 }
408 for {
409 i++
410 clone := p.afe[i].clone()
411 p.addChild(clone)
412 p.afe[i] = clone
413 if i == len(p.afe)-1 {
414 break
415 }
416 }
417}
418
419// Section 12.2.5.
420func (p *parser) acknowledgeSelfClosingTag() {
421 p.hasSelfClosingToken = false
422}
423
424// An insertion mode (section 12.2.4.1) is the state transition function from
425// a particular state in the HTML5 parser's state machine. It updates the
426// parser's fields depending on parser.tok (where ErrorToken means EOF).
427// It returns whether the token was consumed.
428type insertionMode func(*parser) bool
429
430// setOriginalIM sets the insertion mode to return to after completing a text or
431// inTableText insertion mode.
432// Section 12.2.4.1, "using the rules for".
433func (p *parser) setOriginalIM() {
434 if p.originalIM != nil {
435 panic("html: bad parser state: originalIM was set twice")
436 }
437 p.originalIM = p.im
438}
439
440// Section 12.2.4.1, "reset the insertion mode".
441func (p *parser) resetInsertionMode() {
442 for i := len(p.oe) - 1; i >= 0; i-- {
443 n := p.oe[i]
444 last := i == 0
445 if last && p.context != nil {
446 n = p.context
447 }
448
449 switch n.DataAtom {
450 case a.Select:
451 if !last {
452 for ancestor, first := n, p.oe[0]; ancestor != first; {
453 ancestor = p.oe[p.oe.index(ancestor)-1]
454 switch ancestor.DataAtom {
455 case a.Template:
456 p.im = inSelectIM
457 return
458 case a.Table:
459 p.im = inSelectInTableIM
460 return
461 }
462 }
463 }
464 p.im = inSelectIM
465 case a.Td, a.Th:
466 // TODO: remove this divergence from the HTML5 spec.
467 //
468 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
469 p.im = inCellIM
470 case a.Tr:
471 p.im = inRowIM
472 case a.Tbody, a.Thead, a.Tfoot:
473 p.im = inTableBodyIM
474 case a.Caption:
475 p.im = inCaptionIM
476 case a.Colgroup:
477 p.im = inColumnGroupIM
478 case a.Table:
479 p.im = inTableIM
480 case a.Template:
481 // TODO: remove this divergence from the HTML5 spec.
482 if n.Namespace != "" {
483 continue
484 }
485 p.im = p.templateStack.top()
486 case a.Head:
487 // TODO: remove this divergence from the HTML5 spec.
488 //
489 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
490 p.im = inHeadIM
491 case a.Body:
492 p.im = inBodyIM
493 case a.Frameset:
494 p.im = inFramesetIM
495 case a.Html:
496 if p.head == nil {
497 p.im = beforeHeadIM
498 } else {
499 p.im = afterHeadIM
500 }
501 default:
502 if last {
503 p.im = inBodyIM
504 return
505 }
506 continue
507 }
508 return
509 }
510}
511
512const whitespace = " \t\r\n\f"
513
514// Section 12.2.6.4.1.
515func initialIM(p *parser) bool {
516 switch p.tok.Type {
517 case TextToken:
518 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
519 if len(p.tok.Data) == 0 {
520 // It was all whitespace, so ignore it.
521 return true
522 }
523 case CommentToken:
524 p.doc.AppendChild(&Node{
525 Type: CommentNode,
526 Data: p.tok.Data,
527 })
528 return true
529 case DoctypeToken:
530 n, quirks := parseDoctype(p.tok.Data)
531 p.doc.AppendChild(n)
532 p.quirks = quirks
533 p.im = beforeHTMLIM
534 return true
535 }
536 p.quirks = true
537 p.im = beforeHTMLIM
538 return false
539}
540
541// Section 12.2.6.4.2.
542func beforeHTMLIM(p *parser) bool {
543 switch p.tok.Type {
544 case DoctypeToken:
545 // Ignore the token.
546 return true
547 case TextToken:
548 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
549 if len(p.tok.Data) == 0 {
550 // It was all whitespace, so ignore it.
551 return true
552 }
553 case StartTagToken:
554 if p.tok.DataAtom == a.Html {
555 p.addElement()
556 p.im = beforeHeadIM
557 return true
558 }
559 case EndTagToken:
560 switch p.tok.DataAtom {
561 case a.Head, a.Body, a.Html, a.Br:
562 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
563 return false
564 default:
565 // Ignore the token.
566 return true
567 }
568 case CommentToken:
569 p.doc.AppendChild(&Node{
570 Type: CommentNode,
571 Data: p.tok.Data,
572 })
573 return true
574 }
575 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
576 return false
577}
578
579// Section 12.2.6.4.3.
580func beforeHeadIM(p *parser) bool {
581 switch p.tok.Type {
582 case TextToken:
583 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
584 if len(p.tok.Data) == 0 {
585 // It was all whitespace, so ignore it.
586 return true
587 }
588 case StartTagToken:
589 switch p.tok.DataAtom {
590 case a.Head:
591 p.addElement()
592 p.head = p.top()
593 p.im = inHeadIM
594 return true
595 case a.Html:
596 return inBodyIM(p)
597 }
598 case EndTagToken:
599 switch p.tok.DataAtom {
600 case a.Head, a.Body, a.Html, a.Br:
601 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
602 return false
603 default:
604 // Ignore the token.
605 return true
606 }
607 case CommentToken:
608 p.addChild(&Node{
609 Type: CommentNode,
610 Data: p.tok.Data,
611 })
612 return true
613 case DoctypeToken:
614 // Ignore the token.
615 return true
616 }
617
618 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
619 return false
620}
621
622// Section 12.2.6.4.4.
623func inHeadIM(p *parser) bool {
624 switch p.tok.Type {
625 case TextToken:
626 s := strings.TrimLeft(p.tok.Data, whitespace)
627 if len(s) < len(p.tok.Data) {
628 // Add the initial whitespace to the current node.
629 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
630 if s == "" {
631 return true
632 }
633 p.tok.Data = s
634 }
635 case StartTagToken:
636 switch p.tok.DataAtom {
637 case a.Html:
638 return inBodyIM(p)
639 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
640 p.addElement()
641 p.oe.pop()
642 p.acknowledgeSelfClosingTag()
643 return true
644 case a.Noscript:
645 if p.scripting {
646 p.parseGenericRawTextElement()
647 return true
648 }
649 p.addElement()
650 p.im = inHeadNoscriptIM
651 // Don't let the tokenizer go into raw text mode when scripting is disabled.
652 p.tokenizer.NextIsNotRawText()
653 return true
654 case a.Script, a.Title:
655 p.addElement()
656 p.setOriginalIM()
657 p.im = textIM
658 return true
659 case a.Noframes, a.Style:
660 p.parseGenericRawTextElement()
661 return true
662 case a.Head:
663 // Ignore the token.
664 return true
665 case a.Template:
666 // TODO: remove this divergence from the HTML5 spec.
667 //
668 // We don't handle all of the corner cases when mixing foreign
669 // content (i.e. <math> or <svg>) with <template>. Without this
670 // early return, we can get into an infinite loop, possibly because
671 // of the "TODO... further divergence" a little below.
672 //
673 // As a workaround, if we are mixing foreign content and templates,
674 // just ignore the rest of the HTML. Foreign content is rare and a
675 // relatively old HTML feature. Templates are also rare and a
676 // relatively new HTML feature. Their combination is very rare.
677 for _, e := range p.oe {
678 if e.Namespace != "" {
679 p.im = ignoreTheRemainingTokens
680 return true
681 }
682 }
683
684 p.addElement()
685 p.afe = append(p.afe, &scopeMarker)
686 p.framesetOK = false
687 p.im = inTemplateIM
688 p.templateStack = append(p.templateStack, inTemplateIM)
689 return true
690 }
691 case EndTagToken:
692 switch p.tok.DataAtom {
693 case a.Head:
694 p.oe.pop()
695 p.im = afterHeadIM
696 return true
697 case a.Body, a.Html, a.Br:
698 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
699 return false
700 case a.Template:
701 if !p.oe.contains(a.Template) {
702 return true
703 }
704 // TODO: remove this further divergence from the HTML5 spec.
705 //
706 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
707 p.generateImpliedEndTags()
708 for i := len(p.oe) - 1; i >= 0; i-- {
709 if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
710 p.oe = p.oe[:i]
711 break
712 }
713 }
714 p.clearActiveFormattingElements()
715 p.templateStack.pop()
716 p.resetInsertionMode()
717 return true
718 default:
719 // Ignore the token.
720 return true
721 }
722 case CommentToken:
723 p.addChild(&Node{
724 Type: CommentNode,
725 Data: p.tok.Data,
726 })
727 return true
728 case DoctypeToken:
729 // Ignore the token.
730 return true
731 }
732
733 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
734 return false
735}
736
737// Section 12.2.6.4.5.
738func inHeadNoscriptIM(p *parser) bool {
739 switch p.tok.Type {
740 case DoctypeToken:
741 // Ignore the token.
742 return true
743 case StartTagToken:
744 switch p.tok.DataAtom {
745 case a.Html:
746 return inBodyIM(p)
747 case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
748 return inHeadIM(p)
749 case a.Head:
750 // Ignore the token.
751 return true
752 case a.Noscript:
753 // Don't let the tokenizer go into raw text mode even when a <noscript>
754 // tag is in "in head noscript" insertion mode.
755 p.tokenizer.NextIsNotRawText()
756 // Ignore the token.
757 return true
758 }
759 case EndTagToken:
760 switch p.tok.DataAtom {
761 case a.Noscript, a.Br:
762 default:
763 // Ignore the token.
764 return true
765 }
766 case TextToken:
767 s := strings.TrimLeft(p.tok.Data, whitespace)
768 if len(s) == 0 {
769 // It was all whitespace.
770 return inHeadIM(p)
771 }
772 case CommentToken:
773 return inHeadIM(p)
774 }
775 p.oe.pop()
776 if p.top().DataAtom != a.Head {
777 panic("html: the new current node will be a head element.")
778 }
779 p.im = inHeadIM
780 if p.tok.DataAtom == a.Noscript {
781 return true
782 }
783 return false
784}
785
786// Section 12.2.6.4.6.
787func afterHeadIM(p *parser) bool {
788 switch p.tok.Type {
789 case TextToken:
790 s := strings.TrimLeft(p.tok.Data, whitespace)
791 if len(s) < len(p.tok.Data) {
792 // Add the initial whitespace to the current node.
793 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
794 if s == "" {
795 return true
796 }
797 p.tok.Data = s
798 }
799 case StartTagToken:
800 switch p.tok.DataAtom {
801 case a.Html:
802 return inBodyIM(p)
803 case a.Body:
804 p.addElement()
805 p.framesetOK = false
806 p.im = inBodyIM
807 return true
808 case a.Frameset:
809 p.addElement()
810 p.im = inFramesetIM
811 return true
812 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
813 p.oe = append(p.oe, p.head)
814 defer p.oe.remove(p.head)
815 return inHeadIM(p)
816 case a.Head:
817 // Ignore the token.
818 return true
819 }
820 case EndTagToken:
821 switch p.tok.DataAtom {
822 case a.Body, a.Html, a.Br:
823 // Drop down to creating an implied <body> tag.
824 case a.Template:
825 return inHeadIM(p)
826 default:
827 // Ignore the token.
828 return true
829 }
830 case CommentToken:
831 p.addChild(&Node{
832 Type: CommentNode,
833 Data: p.tok.Data,
834 })
835 return true
836 case DoctypeToken:
837 // Ignore the token.
838 return true
839 }
840
841 p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
842 p.framesetOK = true
843 return false
844}
845
846// copyAttributes copies attributes of src not found on dst to dst.
847func copyAttributes(dst *Node, src Token) {
848 if len(src.Attr) == 0 {
849 return
850 }
851 attr := map[string]string{}
852 for _, t := range dst.Attr {
853 attr[t.Key] = t.Val
854 }
855 for _, t := range src.Attr {
856 if _, ok := attr[t.Key]; !ok {
857 dst.Attr = append(dst.Attr, t)
858 attr[t.Key] = t.Val
859 }
860 }
861}
862
863// Section 12.2.6.4.7.
864func inBodyIM(p *parser) bool {
865 switch p.tok.Type {
866 case TextToken:
867 d := p.tok.Data
868 switch n := p.oe.top(); n.DataAtom {
869 case a.Pre, a.Listing:
870 if n.FirstChild == nil {
871 // Ignore a newline at the start of a <pre> block.
872 if d != "" && d[0] == '\r' {
873 d = d[1:]
874 }
875 if d != "" && d[0] == '\n' {
876 d = d[1:]
877 }
878 }
879 }
880 d = strings.Replace(d, "\x00", "", -1)
881 if d == "" {
882 return true
883 }
884 p.reconstructActiveFormattingElements()
885 p.addText(d)
886 if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
887 // There were non-whitespace characters inserted.
888 p.framesetOK = false
889 }
890 case StartTagToken:
891 switch p.tok.DataAtom {
892 case a.Html:
893 if p.oe.contains(a.Template) {
894 return true
895 }
896 copyAttributes(p.oe[0], p.tok)
897 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
898 return inHeadIM(p)
899 case a.Body:
900 if p.oe.contains(a.Template) {
901 return true
902 }
903 if len(p.oe) >= 2 {
904 body := p.oe[1]
905 if body.Type == ElementNode && body.DataAtom == a.Body {
906 p.framesetOK = false
907 copyAttributes(body, p.tok)
908 }
909 }
910 case a.Frameset:
911 if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
912 // Ignore the token.
913 return true
914 }
915 body := p.oe[1]
916 if body.Parent != nil {
917 body.Parent.RemoveChild(body)
918 }
919 p.oe = p.oe[:1]
920 p.addElement()
921 p.im = inFramesetIM
922 return true
923 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
924 p.popUntil(buttonScope, a.P)
925 p.addElement()
926 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
927 p.popUntil(buttonScope, a.P)
928 switch n := p.top(); n.DataAtom {
929 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
930 p.oe.pop()
931 }
932 p.addElement()
933 case a.Pre, a.Listing:
934 p.popUntil(buttonScope, a.P)
935 p.addElement()
936 // The newline, if any, will be dealt with by the TextToken case.
937 p.framesetOK = false
938 case a.Form:
939 if p.form != nil && !p.oe.contains(a.Template) {
940 // Ignore the token
941 return true
942 }
943 p.popUntil(buttonScope, a.P)
944 p.addElement()
945 if !p.oe.contains(a.Template) {
946 p.form = p.top()
947 }
948 case a.Li:
949 p.framesetOK = false
950 for i := len(p.oe) - 1; i >= 0; i-- {
951 node := p.oe[i]
952 switch node.DataAtom {
953 case a.Li:
954 p.oe = p.oe[:i]
955 case a.Address, a.Div, a.P:
956 continue
957 default:
958 if !isSpecialElement(node) {
959 continue
960 }
961 }
962 break
963 }
964 p.popUntil(buttonScope, a.P)
965 p.addElement()
966 case a.Dd, a.Dt:
967 p.framesetOK = false
968 for i := len(p.oe) - 1; i >= 0; i-- {
969 node := p.oe[i]
970 switch node.DataAtom {
971 case a.Dd, a.Dt:
972 p.oe = p.oe[:i]
973 case a.Address, a.Div, a.P:
974 continue
975 default:
976 if !isSpecialElement(node) {
977 continue
978 }
979 }
980 break
981 }
982 p.popUntil(buttonScope, a.P)
983 p.addElement()
984 case a.Plaintext:
985 p.popUntil(buttonScope, a.P)
986 p.addElement()
987 case a.Button:
988 p.popUntil(defaultScope, a.Button)
989 p.reconstructActiveFormattingElements()
990 p.addElement()
991 p.framesetOK = false
992 case a.A:
993 for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
994 if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
995 p.inBodyEndTagFormatting(a.A, "a")
996 p.oe.remove(n)
997 p.afe.remove(n)
998 break
999 }
1000 }
1001 p.reconstructActiveFormattingElements()
1002 p.addFormattingElement()
1003 case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1004 p.reconstructActiveFormattingElements()
1005 p.addFormattingElement()
1006 case a.Nobr:
1007 p.reconstructActiveFormattingElements()
1008 if p.elementInScope(defaultScope, a.Nobr) {
1009 p.inBodyEndTagFormatting(a.Nobr, "nobr")
1010 p.reconstructActiveFormattingElements()
1011 }
1012 p.addFormattingElement()
1013 case a.Applet, a.Marquee, a.Object:
1014 p.reconstructActiveFormattingElements()
1015 p.addElement()
1016 p.afe = append(p.afe, &scopeMarker)
1017 p.framesetOK = false
1018 case a.Table:
1019 if !p.quirks {
1020 p.popUntil(buttonScope, a.P)
1021 }
1022 p.addElement()
1023 p.framesetOK = false
1024 p.im = inTableIM
1025 return true
1026 case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
1027 p.reconstructActiveFormattingElements()
1028 p.addElement()
1029 p.oe.pop()
1030 p.acknowledgeSelfClosingTag()
1031 if p.tok.DataAtom == a.Input {
1032 for _, t := range p.tok.Attr {
1033 if t.Key == "type" {
1034 if strings.ToLower(t.Val) == "hidden" {
1035 // Skip setting framesetOK = false
1036 return true
1037 }
1038 }
1039 }
1040 }
1041 p.framesetOK = false
1042 case a.Param, a.Source, a.Track:
1043 p.addElement()
1044 p.oe.pop()
1045 p.acknowledgeSelfClosingTag()
1046 case a.Hr:
1047 p.popUntil(buttonScope, a.P)
1048 p.addElement()
1049 p.oe.pop()
1050 p.acknowledgeSelfClosingTag()
1051 p.framesetOK = false
1052 case a.Image:
1053 p.tok.DataAtom = a.Img
1054 p.tok.Data = a.Img.String()
1055 return false
1056 case a.Textarea:
1057 p.addElement()
1058 p.setOriginalIM()
1059 p.framesetOK = false
1060 p.im = textIM
1061 case a.Xmp:
1062 p.popUntil(buttonScope, a.P)
1063 p.reconstructActiveFormattingElements()
1064 p.framesetOK = false
1065 p.parseGenericRawTextElement()
1066 case a.Iframe:
1067 p.framesetOK = false
1068 p.parseGenericRawTextElement()
1069 case a.Noembed:
1070 p.parseGenericRawTextElement()
1071 case a.Noscript:
1072 if p.scripting {
1073 p.parseGenericRawTextElement()
1074 return true
1075 }
1076 p.reconstructActiveFormattingElements()
1077 p.addElement()
1078 // Don't let the tokenizer go into raw text mode when scripting is disabled.
1079 p.tokenizer.NextIsNotRawText()
1080 case a.Select:
1081 p.reconstructActiveFormattingElements()
1082 p.addElement()
1083 p.framesetOK = false
1084 p.im = inSelectIM
1085 return true
1086 case a.Optgroup, a.Option:
1087 if p.top().DataAtom == a.Option {
1088 p.oe.pop()
1089 }
1090 p.reconstructActiveFormattingElements()
1091 p.addElement()
1092 case a.Rb, a.Rtc:
1093 if p.elementInScope(defaultScope, a.Ruby) {
1094 p.generateImpliedEndTags()
1095 }
1096 p.addElement()
1097 case a.Rp, a.Rt:
1098 if p.elementInScope(defaultScope, a.Ruby) {
1099 p.generateImpliedEndTags("rtc")
1100 }
1101 p.addElement()
1102 case a.Math, a.Svg:
1103 p.reconstructActiveFormattingElements()
1104 if p.tok.DataAtom == a.Math {
1105 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1106 } else {
1107 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1108 }
1109 adjustForeignAttributes(p.tok.Attr)
1110 p.addElement()
1111 p.top().Namespace = p.tok.Data
1112 if p.hasSelfClosingToken {
1113 p.oe.pop()
1114 p.acknowledgeSelfClosingTag()
1115 }
1116 return true
1117 case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1118 // Ignore the token.
1119 default:
1120 p.reconstructActiveFormattingElements()
1121 p.addElement()
1122 }
1123 case EndTagToken:
1124 switch p.tok.DataAtom {
1125 case a.Body:
1126 if p.elementInScope(defaultScope, a.Body) {
1127 p.im = afterBodyIM
1128 }
1129 case a.Html:
1130 if p.elementInScope(defaultScope, a.Body) {
1131 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1132 return false
1133 }
1134 return true
1135 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1136 p.popUntil(defaultScope, p.tok.DataAtom)
1137 case a.Form:
1138 if p.oe.contains(a.Template) {
1139 i := p.indexOfElementInScope(defaultScope, a.Form)
1140 if i == -1 {
1141 // Ignore the token.
1142 return true
1143 }
1144 p.generateImpliedEndTags()
1145 if p.oe[i].DataAtom != a.Form {
1146 // Ignore the token.
1147 return true
1148 }
1149 p.popUntil(defaultScope, a.Form)
1150 } else {
1151 node := p.form
1152 p.form = nil
1153 i := p.indexOfElementInScope(defaultScope, a.Form)
1154 if node == nil || i == -1 || p.oe[i] != node {
1155 // Ignore the token.
1156 return true
1157 }
1158 p.generateImpliedEndTags()
1159 p.oe.remove(node)
1160 }
1161 case a.P:
1162 if !p.elementInScope(buttonScope, a.P) {
1163 p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1164 }
1165 p.popUntil(buttonScope, a.P)
1166 case a.Li:
1167 p.popUntil(listItemScope, a.Li)
1168 case a.Dd, a.Dt:
1169 p.popUntil(defaultScope, p.tok.DataAtom)
1170 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1171 p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1172 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1173 p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
1174 case a.Applet, a.Marquee, a.Object:
1175 if p.popUntil(defaultScope, p.tok.DataAtom) {
1176 p.clearActiveFormattingElements()
1177 }
1178 case a.Br:
1179 p.tok.Type = StartTagToken
1180 return false
1181 case a.Template:
1182 return inHeadIM(p)
1183 default:
1184 p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
1185 }
1186 case CommentToken:
1187 p.addChild(&Node{
1188 Type: CommentNode,
1189 Data: p.tok.Data,
1190 })
1191 case ErrorToken:
1192 // TODO: remove this divergence from the HTML5 spec.
1193 if len(p.templateStack) > 0 {
1194 p.im = inTemplateIM
1195 return false
1196 }
1197 for _, e := range p.oe {
1198 switch e.DataAtom {
1199 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1200 a.Thead, a.Tr, a.Body, a.Html:
1201 default:
1202 return true
1203 }
1204 }
1205 }
1206
1207 return true
1208}
1209
1210func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
1211 // This is the "adoption agency" algorithm, described at
1212 // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1213
1214 // TODO: this is a fairly literal line-by-line translation of that algorithm.
1215 // Once the code successfully parses the comprehensive test suite, we should
1216 // refactor this code to be more idiomatic.
1217
1218 // Steps 1-2
1219 if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
1220 p.oe.pop()
1221 return
1222 }
1223
1224 // Steps 3-5. The outer loop.
1225 for i := 0; i < 8; i++ {
1226 // Step 6. Find the formatting element.
1227 var formattingElement *Node
1228 for j := len(p.afe) - 1; j >= 0; j-- {
1229 if p.afe[j].Type == scopeMarkerNode {
1230 break
1231 }
1232 if p.afe[j].DataAtom == tagAtom {
1233 formattingElement = p.afe[j]
1234 break
1235 }
1236 }
1237 if formattingElement == nil {
1238 p.inBodyEndTagOther(tagAtom, tagName)
1239 return
1240 }
1241
1242 // Step 7. Ignore the tag if formatting element is not in the stack of open elements.
1243 feIndex := p.oe.index(formattingElement)
1244 if feIndex == -1 {
1245 p.afe.remove(formattingElement)
1246 return
1247 }
1248 // Step 8. Ignore the tag if formatting element is not in the scope.
1249 if !p.elementInScope(defaultScope, tagAtom) {
1250 // Ignore the tag.
1251 return
1252 }
1253
1254 // Step 9. This step is omitted because it's just a parse error but no need to return.
1255
1256 // Steps 10-11. Find the furthest block.
1257 var furthestBlock *Node
1258 for _, e := range p.oe[feIndex:] {
1259 if isSpecialElement(e) {
1260 furthestBlock = e
1261 break
1262 }
1263 }
1264 if furthestBlock == nil {
1265 e := p.oe.pop()
1266 for e != formattingElement {
1267 e = p.oe.pop()
1268 }
1269 p.afe.remove(e)
1270 return
1271 }
1272
1273 // Steps 12-13. Find the common ancestor and bookmark node.
1274 commonAncestor := p.oe[feIndex-1]
1275 bookmark := p.afe.index(formattingElement)
1276
1277 // Step 14. The inner loop. Find the lastNode to reparent.
1278 lastNode := furthestBlock
1279 node := furthestBlock
1280 x := p.oe.index(node)
1281 // Step 14.1.
1282 j := 0
1283 for {
1284 // Step 14.2.
1285 j++
1286 // Step. 14.3.
1287 x--
1288 node = p.oe[x]
1289 // Step 14.4. Go to the next step if node is formatting element.
1290 if node == formattingElement {
1291 break
1292 }
1293 // Step 14.5. Remove node from the list of active formatting elements if
1294 // inner loop counter is greater than three and node is in the list of
1295 // active formatting elements.
1296 if ni := p.afe.index(node); j > 3 && ni > -1 {
1297 p.afe.remove(node)
1298 // If any element of the list of active formatting elements is removed,
1299 // we need to take care whether bookmark should be decremented or not.
1300 // This is because the value of bookmark may exceed the size of the
1301 // list by removing elements from the list.
1302 if ni <= bookmark {
1303 bookmark--
1304 }
1305 continue
1306 }
1307 // Step 14.6. Continue the next inner loop if node is not in the list of
1308 // active formatting elements.
1309 if p.afe.index(node) == -1 {
1310 p.oe.remove(node)
1311 continue
1312 }
1313 // Step 14.7.
1314 clone := node.clone()
1315 p.afe[p.afe.index(node)] = clone
1316 p.oe[p.oe.index(node)] = clone
1317 node = clone
1318 // Step 14.8.
1319 if lastNode == furthestBlock {
1320 bookmark = p.afe.index(node) + 1
1321 }
1322 // Step 14.9.
1323 if lastNode.Parent != nil {
1324 lastNode.Parent.RemoveChild(lastNode)
1325 }
1326 node.AppendChild(lastNode)
1327 // Step 14.10.
1328 lastNode = node
1329 }
1330
1331 // Step 15. Reparent lastNode to the common ancestor,
1332 // or for misnested table nodes, to the foster parent.
1333 if lastNode.Parent != nil {
1334 lastNode.Parent.RemoveChild(lastNode)
1335 }
1336 switch commonAncestor.DataAtom {
1337 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1338 p.fosterParent(lastNode)
1339 default:
1340 commonAncestor.AppendChild(lastNode)
1341 }
1342
1343 // Steps 16-18. Reparent nodes from the furthest block's children
1344 // to a clone of the formatting element.
1345 clone := formattingElement.clone()
1346 reparentChildren(clone, furthestBlock)
1347 furthestBlock.AppendChild(clone)
1348
1349 // Step 19. Fix up the list of active formatting elements.
1350 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1351 // Move the bookmark with the rest of the list.
1352 bookmark--
1353 }
1354 p.afe.remove(formattingElement)
1355 p.afe.insert(bookmark, clone)
1356
1357 // Step 20. Fix up the stack of open elements.
1358 p.oe.remove(formattingElement)
1359 p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1360 }
1361}
1362
1363// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1364// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1365// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1366func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
1367 for i := len(p.oe) - 1; i >= 0; i-- {
1368 // Two element nodes have the same tag if they have the same Data (a
1369 // string-typed field). As an optimization, for common HTML tags, each
1370 // Data string is assigned a unique, non-zero DataAtom (a uint32-typed
1371 // field), since integer comparison is faster than string comparison.
1372 // Uncommon (custom) tags get a zero DataAtom.
1373 //
1374 // The if condition here is equivalent to (p.oe[i].Data == tagName).
1375 if (p.oe[i].DataAtom == tagAtom) &&
1376 ((tagAtom != 0) || (p.oe[i].Data == tagName)) {
1377 p.oe = p.oe[:i]
1378 break
1379 }
1380 if isSpecialElement(p.oe[i]) {
1381 break
1382 }
1383 }
1384}
1385
1386// Section 12.2.6.4.8.
1387func textIM(p *parser) bool {
1388 switch p.tok.Type {
1389 case ErrorToken:
1390 p.oe.pop()
1391 case TextToken:
1392 d := p.tok.Data
1393 if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1394 // Ignore a newline at the start of a <textarea> block.
1395 if d != "" && d[0] == '\r' {
1396 d = d[1:]
1397 }
1398 if d != "" && d[0] == '\n' {
1399 d = d[1:]
1400 }
1401 }
1402 if d == "" {
1403 return true
1404 }
1405 p.addText(d)
1406 return true
1407 case EndTagToken:
1408 p.oe.pop()
1409 }
1410 p.im = p.originalIM
1411 p.originalIM = nil
1412 return p.tok.Type == EndTagToken
1413}
1414
1415// Section 12.2.6.4.9.
1416func inTableIM(p *parser) bool {
1417 switch p.tok.Type {
1418 case TextToken:
1419 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1420 switch p.oe.top().DataAtom {
1421 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1422 if strings.Trim(p.tok.Data, whitespace) == "" {
1423 p.addText(p.tok.Data)
1424 return true
1425 }
1426 }
1427 case StartTagToken:
1428 switch p.tok.DataAtom {
1429 case a.Caption:
1430 p.clearStackToContext(tableScope)
1431 p.afe = append(p.afe, &scopeMarker)
1432 p.addElement()
1433 p.im = inCaptionIM
1434 return true
1435 case a.Colgroup:
1436 p.clearStackToContext(tableScope)
1437 p.addElement()
1438 p.im = inColumnGroupIM
1439 return true
1440 case a.Col:
1441 p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1442 return false
1443 case a.Tbody, a.Tfoot, a.Thead:
1444 p.clearStackToContext(tableScope)
1445 p.addElement()
1446 p.im = inTableBodyIM
1447 return true
1448 case a.Td, a.Th, a.Tr:
1449 p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1450 return false
1451 case a.Table:
1452 if p.popUntil(tableScope, a.Table) {
1453 p.resetInsertionMode()
1454 return false
1455 }
1456 // Ignore the token.
1457 return true
1458 case a.Style, a.Script, a.Template:
1459 return inHeadIM(p)
1460 case a.Input:
1461 for _, t := range p.tok.Attr {
1462 if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1463 p.addElement()
1464 p.oe.pop()
1465 return true
1466 }
1467 }
1468 // Otherwise drop down to the default action.
1469 case a.Form:
1470 if p.oe.contains(a.Template) || p.form != nil {
1471 // Ignore the token.
1472 return true
1473 }
1474 p.addElement()
1475 p.form = p.oe.pop()
1476 case a.Select:
1477 p.reconstructActiveFormattingElements()
1478 switch p.top().DataAtom {
1479 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1480 p.fosterParenting = true
1481 }
1482 p.addElement()
1483 p.fosterParenting = false
1484 p.framesetOK = false
1485 p.im = inSelectInTableIM
1486 return true
1487 }
1488 case EndTagToken:
1489 switch p.tok.DataAtom {
1490 case a.Table:
1491 if p.popUntil(tableScope, a.Table) {
1492 p.resetInsertionMode()
1493 return true
1494 }
1495 // Ignore the token.
1496 return true
1497 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1498 // Ignore the token.
1499 return true
1500 case a.Template:
1501 return inHeadIM(p)
1502 }
1503 case CommentToken:
1504 p.addChild(&Node{
1505 Type: CommentNode,
1506 Data: p.tok.Data,
1507 })
1508 return true
1509 case DoctypeToken:
1510 // Ignore the token.
1511 return true
1512 case ErrorToken:
1513 return inBodyIM(p)
1514 }
1515
1516 p.fosterParenting = true
1517 defer func() { p.fosterParenting = false }()
1518
1519 return inBodyIM(p)
1520}
1521
1522// Section 12.2.6.4.11.
1523func inCaptionIM(p *parser) bool {
1524 switch p.tok.Type {
1525 case StartTagToken:
1526 switch p.tok.DataAtom {
1527 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1528 if !p.popUntil(tableScope, a.Caption) {
1529 // Ignore the token.
1530 return true
1531 }
1532 p.clearActiveFormattingElements()
1533 p.im = inTableIM
1534 return false
1535 case a.Select:
1536 p.reconstructActiveFormattingElements()
1537 p.addElement()
1538 p.framesetOK = false
1539 p.im = inSelectInTableIM
1540 return true
1541 }
1542 case EndTagToken:
1543 switch p.tok.DataAtom {
1544 case a.Caption:
1545 if p.popUntil(tableScope, a.Caption) {
1546 p.clearActiveFormattingElements()
1547 p.im = inTableIM
1548 }
1549 return true
1550 case a.Table:
1551 if !p.popUntil(tableScope, a.Caption) {
1552 // Ignore the token.
1553 return true
1554 }
1555 p.clearActiveFormattingElements()
1556 p.im = inTableIM
1557 return false
1558 case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1559 // Ignore the token.
1560 return true
1561 }
1562 }
1563 return inBodyIM(p)
1564}
1565
1566// Section 12.2.6.4.12.
1567func inColumnGroupIM(p *parser) bool {
1568 switch p.tok.Type {
1569 case TextToken:
1570 s := strings.TrimLeft(p.tok.Data, whitespace)
1571 if len(s) < len(p.tok.Data) {
1572 // Add the initial whitespace to the current node.
1573 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1574 if s == "" {
1575 return true
1576 }
1577 p.tok.Data = s
1578 }
1579 case CommentToken:
1580 p.addChild(&Node{
1581 Type: CommentNode,
1582 Data: p.tok.Data,
1583 })
1584 return true
1585 case DoctypeToken:
1586 // Ignore the token.
1587 return true
1588 case StartTagToken:
1589 switch p.tok.DataAtom {
1590 case a.Html:
1591 return inBodyIM(p)
1592 case a.Col:
1593 p.addElement()
1594 p.oe.pop()
1595 p.acknowledgeSelfClosingTag()
1596 return true
1597 case a.Template:
1598 return inHeadIM(p)
1599 }
1600 case EndTagToken:
1601 switch p.tok.DataAtom {
1602 case a.Colgroup:
1603 if p.oe.top().DataAtom == a.Colgroup {
1604 p.oe.pop()
1605 p.im = inTableIM
1606 }
1607 return true
1608 case a.Col:
1609 // Ignore the token.
1610 return true
1611 case a.Template:
1612 return inHeadIM(p)
1613 }
1614 case ErrorToken:
1615 return inBodyIM(p)
1616 }
1617 if p.oe.top().DataAtom != a.Colgroup {
1618 return true
1619 }
1620 p.oe.pop()
1621 p.im = inTableIM
1622 return false
1623}
1624
1625// Section 12.2.6.4.13.
1626func inTableBodyIM(p *parser) bool {
1627 switch p.tok.Type {
1628 case StartTagToken:
1629 switch p.tok.DataAtom {
1630 case a.Tr:
1631 p.clearStackToContext(tableBodyScope)
1632 p.addElement()
1633 p.im = inRowIM
1634 return true
1635 case a.Td, a.Th:
1636 p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1637 return false
1638 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1639 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1640 p.im = inTableIM
1641 return false
1642 }
1643 // Ignore the token.
1644 return true
1645 }
1646 case EndTagToken:
1647 switch p.tok.DataAtom {
1648 case a.Tbody, a.Tfoot, a.Thead:
1649 if p.elementInScope(tableScope, p.tok.DataAtom) {
1650 p.clearStackToContext(tableBodyScope)
1651 p.oe.pop()
1652 p.im = inTableIM
1653 }
1654 return true
1655 case a.Table:
1656 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1657 p.im = inTableIM
1658 return false
1659 }
1660 // Ignore the token.
1661 return true
1662 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1663 // Ignore the token.
1664 return true
1665 }
1666 case CommentToken:
1667 p.addChild(&Node{
1668 Type: CommentNode,
1669 Data: p.tok.Data,
1670 })
1671 return true
1672 }
1673
1674 return inTableIM(p)
1675}
1676
1677// Section 12.2.6.4.14.
1678func inRowIM(p *parser) bool {
1679 switch p.tok.Type {
1680 case StartTagToken:
1681 switch p.tok.DataAtom {
1682 case a.Td, a.Th:
1683 p.clearStackToContext(tableRowScope)
1684 p.addElement()
1685 p.afe = append(p.afe, &scopeMarker)
1686 p.im = inCellIM
1687 return true
1688 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1689 if p.popUntil(tableScope, a.Tr) {
1690 p.im = inTableBodyIM
1691 return false
1692 }
1693 // Ignore the token.
1694 return true
1695 }
1696 case EndTagToken:
1697 switch p.tok.DataAtom {
1698 case a.Tr:
1699 if p.popUntil(tableScope, a.Tr) {
1700 p.im = inTableBodyIM
1701 return true
1702 }
1703 // Ignore the token.
1704 return true
1705 case a.Table:
1706 if p.popUntil(tableScope, a.Tr) {
1707 p.im = inTableBodyIM
1708 return false
1709 }
1710 // Ignore the token.
1711 return true
1712 case a.Tbody, a.Tfoot, a.Thead:
1713 if p.elementInScope(tableScope, p.tok.DataAtom) {
1714 p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1715 return false
1716 }
1717 // Ignore the token.
1718 return true
1719 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1720 // Ignore the token.
1721 return true
1722 }
1723 }
1724
1725 return inTableIM(p)
1726}
1727
1728// Section 12.2.6.4.15.
1729func inCellIM(p *parser) bool {
1730 switch p.tok.Type {
1731 case StartTagToken:
1732 switch p.tok.DataAtom {
1733 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1734 if p.popUntil(tableScope, a.Td, a.Th) {
1735 // Close the cell and reprocess.
1736 p.clearActiveFormattingElements()
1737 p.im = inRowIM
1738 return false
1739 }
1740 // Ignore the token.
1741 return true
1742 case a.Select:
1743 p.reconstructActiveFormattingElements()
1744 p.addElement()
1745 p.framesetOK = false
1746 p.im = inSelectInTableIM
1747 return true
1748 }
1749 case EndTagToken:
1750 switch p.tok.DataAtom {
1751 case a.Td, a.Th:
1752 if !p.popUntil(tableScope, p.tok.DataAtom) {
1753 // Ignore the token.
1754 return true
1755 }
1756 p.clearActiveFormattingElements()
1757 p.im = inRowIM
1758 return true
1759 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1760 // Ignore the token.
1761 return true
1762 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1763 if !p.elementInScope(tableScope, p.tok.DataAtom) {
1764 // Ignore the token.
1765 return true
1766 }
1767 // Close the cell and reprocess.
1768 if p.popUntil(tableScope, a.Td, a.Th) {
1769 p.clearActiveFormattingElements()
1770 }
1771 p.im = inRowIM
1772 return false
1773 }
1774 }
1775 return inBodyIM(p)
1776}
1777
1778// Section 12.2.6.4.16.
1779func inSelectIM(p *parser) bool {
1780 switch p.tok.Type {
1781 case TextToken:
1782 p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1783 case StartTagToken:
1784 switch p.tok.DataAtom {
1785 case a.Html:
1786 return inBodyIM(p)
1787 case a.Option:
1788 if p.top().DataAtom == a.Option {
1789 p.oe.pop()
1790 }
1791 p.addElement()
1792 case a.Optgroup:
1793 if p.top().DataAtom == a.Option {
1794 p.oe.pop()
1795 }
1796 if p.top().DataAtom == a.Optgroup {
1797 p.oe.pop()
1798 }
1799 p.addElement()
1800 case a.Select:
1801 if !p.popUntil(selectScope, a.Select) {
1802 // Ignore the token.
1803 return true
1804 }
1805 p.resetInsertionMode()
1806 case a.Input, a.Keygen, a.Textarea:
1807 if p.elementInScope(selectScope, a.Select) {
1808 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1809 return false
1810 }
1811 // In order to properly ignore <textarea>, we need to change the tokenizer mode.
1812 p.tokenizer.NextIsNotRawText()
1813 // Ignore the token.
1814 return true
1815 case a.Script, a.Template:
1816 return inHeadIM(p)
1817 case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
1818 // Don't let the tokenizer go into raw text mode when there are raw tags
1819 // to be ignored. These tags should be ignored from the tokenizer
1820 // properly.
1821 p.tokenizer.NextIsNotRawText()
1822 // Ignore the token.
1823 return true
1824 }
1825 case EndTagToken:
1826 switch p.tok.DataAtom {
1827 case a.Option:
1828 if p.top().DataAtom == a.Option {
1829 p.oe.pop()
1830 }
1831 case a.Optgroup:
1832 i := len(p.oe) - 1
1833 if p.oe[i].DataAtom == a.Option {
1834 i--
1835 }
1836 if p.oe[i].DataAtom == a.Optgroup {
1837 p.oe = p.oe[:i]
1838 }
1839 case a.Select:
1840 if !p.popUntil(selectScope, a.Select) {
1841 // Ignore the token.
1842 return true
1843 }
1844 p.resetInsertionMode()
1845 case a.Template:
1846 return inHeadIM(p)
1847 }
1848 case CommentToken:
1849 p.addChild(&Node{
1850 Type: CommentNode,
1851 Data: p.tok.Data,
1852 })
1853 case DoctypeToken:
1854 // Ignore the token.
1855 return true
1856 case ErrorToken:
1857 return inBodyIM(p)
1858 }
1859
1860 return true
1861}
1862
1863// Section 12.2.6.4.17.
1864func inSelectInTableIM(p *parser) bool {
1865 switch p.tok.Type {
1866 case StartTagToken, EndTagToken:
1867 switch p.tok.DataAtom {
1868 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1869 if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
1870 // Ignore the token.
1871 return true
1872 }
1873 // This is like p.popUntil(selectScope, a.Select), but it also
1874 // matches <math select>, not just <select>. Matching the MathML
1875 // tag is arguably incorrect (conceptually), but it mimics what
1876 // Chromium does.
1877 for i := len(p.oe) - 1; i >= 0; i-- {
1878 if n := p.oe[i]; n.DataAtom == a.Select {
1879 p.oe = p.oe[:i]
1880 break
1881 }
1882 }
1883 p.resetInsertionMode()
1884 return false
1885 }
1886 }
1887 return inSelectIM(p)
1888}
1889
1890// Section 12.2.6.4.18.
1891func inTemplateIM(p *parser) bool {
1892 switch p.tok.Type {
1893 case TextToken, CommentToken, DoctypeToken:
1894 return inBodyIM(p)
1895 case StartTagToken:
1896 switch p.tok.DataAtom {
1897 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1898 return inHeadIM(p)
1899 case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1900 p.templateStack.pop()
1901 p.templateStack = append(p.templateStack, inTableIM)
1902 p.im = inTableIM
1903 return false
1904 case a.Col:
1905 p.templateStack.pop()
1906 p.templateStack = append(p.templateStack, inColumnGroupIM)
1907 p.im = inColumnGroupIM
1908 return false
1909 case a.Tr:
1910 p.templateStack.pop()
1911 p.templateStack = append(p.templateStack, inTableBodyIM)
1912 p.im = inTableBodyIM
1913 return false
1914 case a.Td, a.Th:
1915 p.templateStack.pop()
1916 p.templateStack = append(p.templateStack, inRowIM)
1917 p.im = inRowIM
1918 return false
1919 default:
1920 p.templateStack.pop()
1921 p.templateStack = append(p.templateStack, inBodyIM)
1922 p.im = inBodyIM
1923 return false
1924 }
1925 case EndTagToken:
1926 switch p.tok.DataAtom {
1927 case a.Template:
1928 return inHeadIM(p)
1929 default:
1930 // Ignore the token.
1931 return true
1932 }
1933 case ErrorToken:
1934 if !p.oe.contains(a.Template) {
1935 // Ignore the token.
1936 return true
1937 }
1938 // TODO: remove this divergence from the HTML5 spec.
1939 //
1940 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1941 p.generateImpliedEndTags()
1942 for i := len(p.oe) - 1; i >= 0; i-- {
1943 if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1944 p.oe = p.oe[:i]
1945 break
1946 }
1947 }
1948 p.clearActiveFormattingElements()
1949 p.templateStack.pop()
1950 p.resetInsertionMode()
1951 return false
1952 }
1953 return false
1954}
1955
1956// Section 12.2.6.4.19.
1957func afterBodyIM(p *parser) bool {
1958 switch p.tok.Type {
1959 case ErrorToken:
1960 // Stop parsing.
1961 return true
1962 case TextToken:
1963 s := strings.TrimLeft(p.tok.Data, whitespace)
1964 if len(s) == 0 {
1965 // It was all whitespace.
1966 return inBodyIM(p)
1967 }
1968 case StartTagToken:
1969 if p.tok.DataAtom == a.Html {
1970 return inBodyIM(p)
1971 }
1972 case EndTagToken:
1973 if p.tok.DataAtom == a.Html {
1974 if !p.fragment {
1975 p.im = afterAfterBodyIM
1976 }
1977 return true
1978 }
1979 case CommentToken:
1980 // The comment is attached to the <html> element.
1981 if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1982 panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1983 }
1984 p.oe[0].AppendChild(&Node{
1985 Type: CommentNode,
1986 Data: p.tok.Data,
1987 })
1988 return true
1989 }
1990 p.im = inBodyIM
1991 return false
1992}
1993
1994// Section 12.2.6.4.20.
1995func inFramesetIM(p *parser) bool {
1996 switch p.tok.Type {
1997 case CommentToken:
1998 p.addChild(&Node{
1999 Type: CommentNode,
2000 Data: p.tok.Data,
2001 })
2002 case TextToken:
2003 // Ignore all text but whitespace.
2004 s := strings.Map(func(c rune) rune {
2005 switch c {
2006 case ' ', '\t', '\n', '\f', '\r':
2007 return c
2008 }
2009 return -1
2010 }, p.tok.Data)
2011 if s != "" {
2012 p.addText(s)
2013 }
2014 case StartTagToken:
2015 switch p.tok.DataAtom {
2016 case a.Html:
2017 return inBodyIM(p)
2018 case a.Frameset:
2019 p.addElement()
2020 case a.Frame:
2021 p.addElement()
2022 p.oe.pop()
2023 p.acknowledgeSelfClosingTag()
2024 case a.Noframes:
2025 return inHeadIM(p)
2026 }
2027 case EndTagToken:
2028 switch p.tok.DataAtom {
2029 case a.Frameset:
2030 if p.oe.top().DataAtom != a.Html {
2031 p.oe.pop()
2032 if p.oe.top().DataAtom != a.Frameset {
2033 p.im = afterFramesetIM
2034 return true
2035 }
2036 }
2037 }
2038 default:
2039 // Ignore the token.
2040 }
2041 return true
2042}
2043
2044// Section 12.2.6.4.21.
2045func afterFramesetIM(p *parser) bool {
2046 switch p.tok.Type {
2047 case CommentToken:
2048 p.addChild(&Node{
2049 Type: CommentNode,
2050 Data: p.tok.Data,
2051 })
2052 case TextToken:
2053 // Ignore all text but whitespace.
2054 s := strings.Map(func(c rune) rune {
2055 switch c {
2056 case ' ', '\t', '\n', '\f', '\r':
2057 return c
2058 }
2059 return -1
2060 }, p.tok.Data)
2061 if s != "" {
2062 p.addText(s)
2063 }
2064 case StartTagToken:
2065 switch p.tok.DataAtom {
2066 case a.Html:
2067 return inBodyIM(p)
2068 case a.Noframes:
2069 return inHeadIM(p)
2070 }
2071 case EndTagToken:
2072 switch p.tok.DataAtom {
2073 case a.Html:
2074 p.im = afterAfterFramesetIM
2075 return true
2076 }
2077 default:
2078 // Ignore the token.
2079 }
2080 return true
2081}
2082
2083// Section 12.2.6.4.22.
2084func afterAfterBodyIM(p *parser) bool {
2085 switch p.tok.Type {
2086 case ErrorToken:
2087 // Stop parsing.
2088 return true
2089 case TextToken:
2090 s := strings.TrimLeft(p.tok.Data, whitespace)
2091 if len(s) == 0 {
2092 // It was all whitespace.
2093 return inBodyIM(p)
2094 }
2095 case StartTagToken:
2096 if p.tok.DataAtom == a.Html {
2097 return inBodyIM(p)
2098 }
2099 case CommentToken:
2100 p.doc.AppendChild(&Node{
2101 Type: CommentNode,
2102 Data: p.tok.Data,
2103 })
2104 return true
2105 case DoctypeToken:
2106 return inBodyIM(p)
2107 }
2108 p.im = inBodyIM
2109 return false
2110}
2111
2112// Section 12.2.6.4.23.
2113func afterAfterFramesetIM(p *parser) bool {
2114 switch p.tok.Type {
2115 case CommentToken:
2116 p.doc.AppendChild(&Node{
2117 Type: CommentNode,
2118 Data: p.tok.Data,
2119 })
2120 case TextToken:
2121 // Ignore all text but whitespace.
2122 s := strings.Map(func(c rune) rune {
2123 switch c {
2124 case ' ', '\t', '\n', '\f', '\r':
2125 return c
2126 }
2127 return -1
2128 }, p.tok.Data)
2129 if s != "" {
2130 p.tok.Data = s
2131 return inBodyIM(p)
2132 }
2133 case StartTagToken:
2134 switch p.tok.DataAtom {
2135 case a.Html:
2136 return inBodyIM(p)
2137 case a.Noframes:
2138 return inHeadIM(p)
2139 }
2140 case DoctypeToken:
2141 return inBodyIM(p)
2142 default:
2143 // Ignore the token.
2144 }
2145 return true
2146}
2147
2148func ignoreTheRemainingTokens(p *parser) bool {
2149 return true
2150}
2151
2152const whitespaceOrNUL = whitespace + "\x00"
2153
2154// Section 12.2.6.5
2155func parseForeignContent(p *parser) bool {
2156 switch p.tok.Type {
2157 case TextToken:
2158 if p.framesetOK {
2159 p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2160 }
2161 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2162 p.addText(p.tok.Data)
2163 case CommentToken:
2164 p.addChild(&Node{
2165 Type: CommentNode,
2166 Data: p.tok.Data,
2167 })
2168 case StartTagToken:
2169 if !p.fragment {
2170 b := breakout[p.tok.Data]
2171 if p.tok.DataAtom == a.Font {
2172 loop:
2173 for _, attr := range p.tok.Attr {
2174 switch attr.Key {
2175 case "color", "face", "size":
2176 b = true
2177 break loop
2178 }
2179 }
2180 }
2181 if b {
2182 for i := len(p.oe) - 1; i >= 0; i-- {
2183 n := p.oe[i]
2184 if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2185 p.oe = p.oe[:i+1]
2186 break
2187 }
2188 }
2189 return false
2190 }
2191 }
2192 current := p.adjustedCurrentNode()
2193 switch current.Namespace {
2194 case "math":
2195 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2196 case "svg":
2197 // Adjust SVG tag names. The tokenizer lower-cases tag names, but
2198 // SVG wants e.g. "foreignObject" with a capital second "O".
2199 if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2200 p.tok.DataAtom = a.Lookup([]byte(x))
2201 p.tok.Data = x
2202 }
2203 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2204 default:
2205 panic("html: bad parser state: unexpected namespace")
2206 }
2207 adjustForeignAttributes(p.tok.Attr)
2208 namespace := current.Namespace
2209 p.addElement()
2210 p.top().Namespace = namespace
2211 if namespace != "" {
2212 // Don't let the tokenizer go into raw text mode in foreign content
2213 // (e.g. in an SVG <title> tag).
2214 p.tokenizer.NextIsNotRawText()
2215 }
2216 if p.hasSelfClosingToken {
2217 p.oe.pop()
2218 p.acknowledgeSelfClosingTag()
2219 }
2220 case EndTagToken:
2221 for i := len(p.oe) - 1; i >= 0; i-- {
2222 if p.oe[i].Namespace == "" {
2223 return p.im(p)
2224 }
2225 if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2226 p.oe = p.oe[:i]
2227 break
2228 }
2229 }
2230 return true
2231 default:
2232 // Ignore the token.
2233 }
2234 return true
2235}
2236
2237// Section 12.2.4.2.
2238func (p *parser) adjustedCurrentNode() *Node {
2239 if len(p.oe) == 1 && p.fragment && p.context != nil {
2240 return p.context
2241 }
2242 return p.oe.top()
2243}
2244
2245// Section 12.2.6.
2246func (p *parser) inForeignContent() bool {
2247 if len(p.oe) == 0 {
2248 return false
2249 }
2250 n := p.adjustedCurrentNode()
2251 if n.Namespace == "" {
2252 return false
2253 }
2254 if mathMLTextIntegrationPoint(n) {
2255 if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2256 return false
2257 }
2258 if p.tok.Type == TextToken {
2259 return false
2260 }
2261 }
2262 if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2263 return false
2264 }
2265 if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2266 return false
2267 }
2268 if p.tok.Type == ErrorToken {
2269 return false
2270 }
2271 return true
2272}
2273
2274// parseImpliedToken parses a token as though it had appeared in the parser's
2275// input.
2276func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2277 realToken, selfClosing := p.tok, p.hasSelfClosingToken
2278 p.tok = Token{
2279 Type: t,
2280 DataAtom: dataAtom,
2281 Data: data,
2282 }
2283 p.hasSelfClosingToken = false
2284 p.parseCurrentToken()
2285 p.tok, p.hasSelfClosingToken = realToken, selfClosing
2286}
2287
2288// parseCurrentToken runs the current token through the parsing routines
2289// until it is consumed.
2290func (p *parser) parseCurrentToken() {
2291 if p.tok.Type == SelfClosingTagToken {
2292 p.hasSelfClosingToken = true
2293 p.tok.Type = StartTagToken
2294 }
2295
2296 consumed := false
2297 for !consumed {
2298 if p.inForeignContent() {
2299 consumed = parseForeignContent(p)
2300 } else {
2301 consumed = p.im(p)
2302 }
2303 }
2304
2305 if p.hasSelfClosingToken {
2306 // This is a parse error, but ignore it.
2307 p.hasSelfClosingToken = false
2308 }
2309}
2310
2311func (p *parser) parse() error {
2312 // Iterate until EOF. Any other error will cause an early return.
2313 var err error
2314 for err != io.EOF {
2315 // CDATA sections are allowed only in foreign content.
2316 n := p.oe.top()
2317 p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2318 // Read and parse the next token.
2319 p.tokenizer.Next()
2320 p.tok = p.tokenizer.Token()
2321 if p.tok.Type == ErrorToken {
2322 err = p.tokenizer.Err()
2323 if err != nil && err != io.EOF {
2324 return err
2325 }
2326 }
2327 p.parseCurrentToken()
2328 }
2329 return nil
2330}
2331
2332// Parse returns the parse tree for the HTML from the given Reader.
2333//
2334// It implements the HTML5 parsing algorithm
2335// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2336// which is very complicated. The resultant tree can contain implicitly created
2337// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2338// differ from the nesting implied by a naive processing of start and end
2339// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2340// with no corresponding node in the resulting tree.
2341//
2342// The input is assumed to be UTF-8 encoded.
2343func Parse(r io.Reader) (*Node, error) {
2344 return ParseWithOptions(r)
2345}
2346
2347// ParseFragment parses a fragment of HTML and returns the nodes that were
2348// found. If the fragment is the InnerHTML for an existing element, pass that
2349// element in context.
2350//
2351// It has the same intricacies as Parse.
2352func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2353 return ParseFragmentWithOptions(r, context)
2354}
2355
2356// ParseOption configures a parser.
2357type ParseOption func(p *parser)
2358
2359// ParseOptionEnableScripting configures the scripting flag.
2360// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
2361//
2362// By default, scripting is enabled.
2363func ParseOptionEnableScripting(enable bool) ParseOption {
2364 return func(p *parser) {
2365 p.scripting = enable
2366 }
2367}
2368
2369// ParseWithOptions is like Parse, with options.
2370func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
2371 p := &parser{
2372 tokenizer: NewTokenizer(r),
2373 doc: &Node{
2374 Type: DocumentNode,
2375 },
2376 scripting: true,
2377 framesetOK: true,
2378 im: initialIM,
2379 }
2380
2381 for _, f := range opts {
2382 f(p)
2383 }
2384
2385 if err := p.parse(); err != nil {
2386 return nil, err
2387 }
2388 return p.doc, nil
2389}
2390
2391// ParseFragmentWithOptions is like ParseFragment, with options.
2392func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
2393 contextTag := ""
2394 if context != nil {
2395 if context.Type != ElementNode {
2396 return nil, errors.New("html: ParseFragment of non-element Node")
2397 }
2398 // The next check isn't just context.DataAtom.String() == context.Data because
2399 // it is valid to pass an element whose tag isn't a known atom. For example,
2400 // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2401 if context.DataAtom != a.Lookup([]byte(context.Data)) {
2402 return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2403 }
2404 contextTag = context.DataAtom.String()
2405 }
2406 p := &parser{
2407 doc: &Node{
2408 Type: DocumentNode,
2409 },
2410 scripting: true,
2411 fragment: true,
2412 context: context,
2413 }
2414 if context != nil && context.Namespace != "" {
2415 p.tokenizer = NewTokenizer(r)
2416 } else {
2417 p.tokenizer = NewTokenizerFragment(r, contextTag)
2418 }
2419
2420 for _, f := range opts {
2421 f(p)
2422 }
2423
2424 root := &Node{
2425 Type: ElementNode,
2426 DataAtom: a.Html,
2427 Data: a.Html.String(),
2428 }
2429 p.doc.AppendChild(root)
2430 p.oe = nodeStack{root}
2431 if context != nil && context.DataAtom == a.Template {
2432 p.templateStack = append(p.templateStack, inTemplateIM)
2433 }
2434 p.resetInsertionMode()
2435
2436 for n := context; n != nil; n = n.Parent {
2437 if n.Type == ElementNode && n.DataAtom == a.Form {
2438 p.form = n
2439 break
2440 }
2441 }
2442
2443 if err := p.parse(); err != nil {
2444 return nil, err
2445 }
2446
2447 parent := p.doc
2448 if context != nil {
2449 parent = root
2450 }
2451
2452 var result []*Node
2453 for c := parent.FirstChild; c != nil; {
2454 next := c.NextSibling
2455 parent.RemoveChild(c)
2456 result = append(result, c)
2457 c = next
2458 }
2459 return result, nil
2460}
diff --git a/vendor/golang.org/x/net/html/render.go b/vendor/golang.org/x/net/html/render.go
new file mode 100644
index 0000000..8b28031
--- /dev/null
+++ b/vendor/golang.org/x/net/html/render.go
@@ -0,0 +1,273 @@
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "bufio"
9 "errors"
10 "fmt"
11 "io"
12 "strings"
13)
14
15type writer interface {
16 io.Writer
17 io.ByteWriter
18 WriteString(string) (int, error)
19}
20
21// Render renders the parse tree n to the given writer.
22//
23// Rendering is done on a 'best effort' basis: calling Parse on the output of
24// Render will always result in something similar to the original tree, but it
25// is not necessarily an exact clone unless the original tree was 'well-formed'.
26// 'Well-formed' is not easily specified; the HTML5 specification is
27// complicated.
28//
29// Calling Parse on arbitrary input typically results in a 'well-formed' parse
30// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
31// For example, in a 'well-formed' parse tree, no <a> element is a child of
32// another <a> element: parsing "<a><a>" results in two sibling elements.
33// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
34// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
35// children; the <a> is reparented to the <table>'s parent. However, calling
36// Parse on "<a><table><a>" does not return an error, but the result has an <a>
37// element with an <a> child, and is therefore not 'well-formed'.
38//
39// Programmatically constructed trees are typically also 'well-formed', but it
40// is possible to construct a tree that looks innocuous but, when rendered and
41// re-parsed, results in a different tree. A simple example is that a solitary
42// text node would become a tree containing <html>, <head> and <body> elements.
43// Another example is that the programmatic equivalent of "a<head>b</head>c"
44// becomes "<html><head><head/><body>abc</body></html>".
45func Render(w io.Writer, n *Node) error {
46 if x, ok := w.(writer); ok {
47 return render(x, n)
48 }
49 buf := bufio.NewWriter(w)
50 if err := render(buf, n); err != nil {
51 return err
52 }
53 return buf.Flush()
54}
55
56// plaintextAbort is returned from render1 when a <plaintext> element
57// has been rendered. No more end tags should be rendered after that.
58var plaintextAbort = errors.New("html: internal error (plaintext abort)")
59
60func render(w writer, n *Node) error {
61 err := render1(w, n)
62 if err == plaintextAbort {
63 err = nil
64 }
65 return err
66}
67
68func render1(w writer, n *Node) error {
69 // Render non-element nodes; these are the easy cases.
70 switch n.Type {
71 case ErrorNode:
72 return errors.New("html: cannot render an ErrorNode node")
73 case TextNode:
74 return escape(w, n.Data)
75 case DocumentNode:
76 for c := n.FirstChild; c != nil; c = c.NextSibling {
77 if err := render1(w, c); err != nil {
78 return err
79 }
80 }
81 return nil
82 case ElementNode:
83 // No-op.
84 case CommentNode:
85 if _, err := w.WriteString("<!--"); err != nil {
86 return err
87 }
88 if err := escapeComment(w, n.Data); err != nil {
89 return err
90 }
91 if _, err := w.WriteString("-->"); err != nil {
92 return err
93 }
94 return nil
95 case DoctypeNode:
96 if _, err := w.WriteString("<!DOCTYPE "); err != nil {
97 return err
98 }
99 if err := escape(w, n.Data); err != nil {
100 return err
101 }
102 if n.Attr != nil {
103 var p, s string
104 for _, a := range n.Attr {
105 switch a.Key {
106 case "public":
107 p = a.Val
108 case "system":
109 s = a.Val
110 }
111 }
112 if p != "" {
113 if _, err := w.WriteString(" PUBLIC "); err != nil {
114 return err
115 }
116 if err := writeQuoted(w, p); err != nil {
117 return err
118 }
119 if s != "" {
120 if err := w.WriteByte(' '); err != nil {
121 return err
122 }
123 if err := writeQuoted(w, s); err != nil {
124 return err
125 }
126 }
127 } else if s != "" {
128 if _, err := w.WriteString(" SYSTEM "); err != nil {
129 return err
130 }
131 if err := writeQuoted(w, s); err != nil {
132 return err
133 }
134 }
135 }
136 return w.WriteByte('>')
137 case RawNode:
138 _, err := w.WriteString(n.Data)
139 return err
140 default:
141 return errors.New("html: unknown node type")
142 }
143
144 // Render the <xxx> opening tag.
145 if err := w.WriteByte('<'); err != nil {
146 return err
147 }
148 if _, err := w.WriteString(n.Data); err != nil {
149 return err
150 }
151 for _, a := range n.Attr {
152 if err := w.WriteByte(' '); err != nil {
153 return err
154 }
155 if a.Namespace != "" {
156 if _, err := w.WriteString(a.Namespace); err != nil {
157 return err
158 }
159 if err := w.WriteByte(':'); err != nil {
160 return err
161 }
162 }
163 if _, err := w.WriteString(a.Key); err != nil {
164 return err
165 }
166 if _, err := w.WriteString(`="`); err != nil {
167 return err
168 }
169 if err := escape(w, a.Val); err != nil {
170 return err
171 }
172 if err := w.WriteByte('"'); err != nil {
173 return err
174 }
175 }
176 if voidElements[n.Data] {
177 if n.FirstChild != nil {
178 return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
179 }
180 _, err := w.WriteString("/>")
181 return err
182 }
183 if err := w.WriteByte('>'); err != nil {
184 return err
185 }
186
187 // Add initial newline where there is danger of a newline beging ignored.
188 if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
189 switch n.Data {
190 case "pre", "listing", "textarea":
191 if err := w.WriteByte('\n'); err != nil {
192 return err
193 }
194 }
195 }
196
197 // Render any child nodes.
198 switch n.Data {
199 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
200 for c := n.FirstChild; c != nil; c = c.NextSibling {
201 if c.Type == TextNode {
202 if _, err := w.WriteString(c.Data); err != nil {
203 return err
204 }
205 } else {
206 if err := render1(w, c); err != nil {
207 return err
208 }
209 }
210 }
211 if n.Data == "plaintext" {
212 // Don't render anything else. <plaintext> must be the
213 // last element in the file, with no closing tag.
214 return plaintextAbort
215 }
216 default:
217 for c := n.FirstChild; c != nil; c = c.NextSibling {
218 if err := render1(w, c); err != nil {
219 return err
220 }
221 }
222 }
223
224 // Render the </xxx> closing tag.
225 if _, err := w.WriteString("</"); err != nil {
226 return err
227 }
228 if _, err := w.WriteString(n.Data); err != nil {
229 return err
230 }
231 return w.WriteByte('>')
232}
233
234// writeQuoted writes s to w surrounded by quotes. Normally it will use double
235// quotes, but if s contains a double quote, it will use single quotes.
236// It is used for writing the identifiers in a doctype declaration.
237// In valid HTML, they can't contain both types of quotes.
238func writeQuoted(w writer, s string) error {
239 var q byte = '"'
240 if strings.Contains(s, `"`) {
241 q = '\''
242 }
243 if err := w.WriteByte(q); err != nil {
244 return err
245 }
246 if _, err := w.WriteString(s); err != nil {
247 return err
248 }
249 if err := w.WriteByte(q); err != nil {
250 return err
251 }
252 return nil
253}
254
255// Section 12.1.2, "Elements", gives this list of void elements. Void elements
256// are those that can't have any contents.
257var voidElements = map[string]bool{
258 "area": true,
259 "base": true,
260 "br": true,
261 "col": true,
262 "embed": true,
263 "hr": true,
264 "img": true,
265 "input": true,
266 "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
267 "link": true,
268 "meta": true,
269 "param": true,
270 "source": true,
271 "track": true,
272 "wbr": true,
273}
diff --git a/vendor/golang.org/x/net/html/token.go b/vendor/golang.org/x/net/html/token.go
new file mode 100644
index 0000000..5c2a1f4
--- /dev/null
+++ b/vendor/golang.org/x/net/html/token.go
@@ -0,0 +1,1261 @@
1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "bytes"
9 "errors"
10 "io"
11 "strconv"
12 "strings"
13
14 "golang.org/x/net/html/atom"
15)
16
17// A TokenType is the type of a Token.
18type TokenType uint32
19
20const (
21 // ErrorToken means that an error occurred during tokenization.
22 ErrorToken TokenType = iota
23 // TextToken means a text node.
24 TextToken
25 // A StartTagToken looks like <a>.
26 StartTagToken
27 // An EndTagToken looks like </a>.
28 EndTagToken
29 // A SelfClosingTagToken tag looks like <br/>.
30 SelfClosingTagToken
31 // A CommentToken looks like <!--x-->.
32 CommentToken
33 // A DoctypeToken looks like <!DOCTYPE x>
34 DoctypeToken
35)
36
37// ErrBufferExceeded means that the buffering limit was exceeded.
38var ErrBufferExceeded = errors.New("max buffer exceeded")
39
40// String returns a string representation of the TokenType.
41func (t TokenType) String() string {
42 switch t {
43 case ErrorToken:
44 return "Error"
45 case TextToken:
46 return "Text"
47 case StartTagToken:
48 return "StartTag"
49 case EndTagToken:
50 return "EndTag"
51 case SelfClosingTagToken:
52 return "SelfClosingTag"
53 case CommentToken:
54 return "Comment"
55 case DoctypeToken:
56 return "Doctype"
57 }
58 return "Invalid(" + strconv.Itoa(int(t)) + ")"
59}
60
61// An Attribute is an attribute namespace-key-value triple. Namespace is
62// non-empty for foreign attributes like xlink, Key is alphabetic (and hence
63// does not contain escapable characters like '&', '<' or '>'), and Val is
64// unescaped (it looks like "a<b" rather than "a&lt;b").
65//
66// Namespace is only used by the parser, not the tokenizer.
67type Attribute struct {
68 Namespace, Key, Val string
69}
70
71// A Token consists of a TokenType and some Data (tag name for start and end
72// tags, content for text, comments and doctypes). A tag Token may also contain
73// a slice of Attributes. Data is unescaped for all Tokens (it looks like "a<b"
74// rather than "a&lt;b"). For tag Tokens, DataAtom is the atom for Data, or
75// zero if Data is not a known tag name.
76type Token struct {
77 Type TokenType
78 DataAtom atom.Atom
79 Data string
80 Attr []Attribute
81}
82
83// tagString returns a string representation of a tag Token's Data and Attr.
84func (t Token) tagString() string {
85 if len(t.Attr) == 0 {
86 return t.Data
87 }
88 buf := bytes.NewBufferString(t.Data)
89 for _, a := range t.Attr {
90 buf.WriteByte(' ')
91 buf.WriteString(a.Key)
92 buf.WriteString(`="`)
93 escape(buf, a.Val)
94 buf.WriteByte('"')
95 }
96 return buf.String()
97}
98
99// String returns a string representation of the Token.
100func (t Token) String() string {
101 switch t.Type {
102 case ErrorToken:
103 return ""
104 case TextToken:
105 return EscapeString(t.Data)
106 case StartTagToken:
107 return "<" + t.tagString() + ">"
108 case EndTagToken:
109 return "</" + t.tagString() + ">"
110 case SelfClosingTagToken:
111 return "<" + t.tagString() + "/>"
112 case CommentToken:
113 return "<!--" + escapeCommentString(t.Data) + "-->"
114 case DoctypeToken:
115 return "<!DOCTYPE " + EscapeString(t.Data) + ">"
116 }
117 return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
118}
119
120// span is a range of bytes in a Tokenizer's buffer. The start is inclusive,
121// the end is exclusive.
122type span struct {
123 start, end int
124}
125
126// A Tokenizer returns a stream of HTML Tokens.
127type Tokenizer struct {
128 // r is the source of the HTML text.
129 r io.Reader
130 // tt is the TokenType of the current token.
131 tt TokenType
132 // err is the first error encountered during tokenization. It is possible
133 // for tt != Error && err != nil to hold: this means that Next returned a
134 // valid token but the subsequent Next call will return an error token.
135 // For example, if the HTML text input was just "plain", then the first
136 // Next call would set z.err to io.EOF but return a TextToken, and all
137 // subsequent Next calls would return an ErrorToken.
138 // err is never reset. Once it becomes non-nil, it stays non-nil.
139 err error
140 // readErr is the error returned by the io.Reader r. It is separate from
141 // err because it is valid for an io.Reader to return (n int, err1 error)
142 // such that n > 0 && err1 != nil, and callers should always process the
143 // n > 0 bytes before considering the error err1.
144 readErr error
145 // buf[raw.start:raw.end] holds the raw bytes of the current token.
146 // buf[raw.end:] is buffered input that will yield future tokens.
147 raw span
148 buf []byte
149 // maxBuf limits the data buffered in buf. A value of 0 means unlimited.
150 maxBuf int
151 // buf[data.start:data.end] holds the raw bytes of the current token's data:
152 // a text token's text, a tag token's tag name, etc.
153 data span
154 // pendingAttr is the attribute key and value currently being tokenized.
155 // When complete, pendingAttr is pushed onto attr. nAttrReturned is
156 // incremented on each call to TagAttr.
157 pendingAttr [2]span
158 attr [][2]span
159 nAttrReturned int
160 // rawTag is the "script" in "</script>" that closes the next token. If
161 // non-empty, the subsequent call to Next will return a raw or RCDATA text
162 // token: one that treats "<p>" as text instead of an element.
163 // rawTag's contents are lower-cased.
164 rawTag string
165 // textIsRaw is whether the current text token's data is not escaped.
166 textIsRaw bool
167 // convertNUL is whether NUL bytes in the current token's data should
168 // be converted into \ufffd replacement characters.
169 convertNUL bool
170 // allowCDATA is whether CDATA sections are allowed in the current context.
171 allowCDATA bool
172}
173
174// AllowCDATA sets whether or not the tokenizer recognizes <![CDATA[foo]]> as
175// the text "foo". The default value is false, which means to recognize it as
176// a bogus comment "<!-- [CDATA[foo]] -->" instead.
177//
178// Strictly speaking, an HTML5 compliant tokenizer should allow CDATA if and
179// only if tokenizing foreign content, such as MathML and SVG. However,
180// tracking foreign-contentness is difficult to do purely in the tokenizer,
181// as opposed to the parser, due to HTML integration points: an <svg> element
182// can contain a <foreignObject> that is foreign-to-SVG but not foreign-to-
183// HTML. For strict compliance with the HTML5 tokenization algorithm, it is the
184// responsibility of the user of a tokenizer to call AllowCDATA as appropriate.
185// In practice, if using the tokenizer without caring whether MathML or SVG
186// CDATA is text or comments, such as tokenizing HTML to find all the anchor
187// text, it is acceptable to ignore this responsibility.
188func (z *Tokenizer) AllowCDATA(allowCDATA bool) {
189 z.allowCDATA = allowCDATA
190}
191
192// NextIsNotRawText instructs the tokenizer that the next token should not be
193// considered as 'raw text'. Some elements, such as script and title elements,
194// normally require the next token after the opening tag to be 'raw text' that
195// has no child elements. For example, tokenizing "<title>a<b>c</b>d</title>"
196// yields a start tag token for "<title>", a text token for "a<b>c</b>d", and
197// an end tag token for "</title>". There are no distinct start tag or end tag
198// tokens for the "<b>" and "</b>".
199//
200// This tokenizer implementation will generally look for raw text at the right
201// times. Strictly speaking, an HTML5 compliant tokenizer should not look for
202// raw text if in foreign content: <title> generally needs raw text, but a
203// <title> inside an <svg> does not. Another example is that a <textarea>
204// generally needs raw text, but a <textarea> is not allowed as an immediate
205// child of a <select>; in normal parsing, a <textarea> implies </select>, but
206// one cannot close the implicit element when parsing a <select>'s InnerHTML.
207// Similarly to AllowCDATA, tracking the correct moment to override raw-text-
208// ness is difficult to do purely in the tokenizer, as opposed to the parser.
209// For strict compliance with the HTML5 tokenization algorithm, it is the
210// responsibility of the user of a tokenizer to call NextIsNotRawText as
211// appropriate. In practice, like AllowCDATA, it is acceptable to ignore this
212// responsibility for basic usage.
213//
214// Note that this 'raw text' concept is different from the one offered by the
215// Tokenizer.Raw method.
216func (z *Tokenizer) NextIsNotRawText() {
217 z.rawTag = ""
218}
219
220// Err returns the error associated with the most recent ErrorToken token.
221// This is typically io.EOF, meaning the end of tokenization.
222func (z *Tokenizer) Err() error {
223 if z.tt != ErrorToken {
224 return nil
225 }
226 return z.err
227}
228
229// readByte returns the next byte from the input stream, doing a buffered read
230// from z.r into z.buf if necessary. z.buf[z.raw.start:z.raw.end] remains a contiguous byte
231// slice that holds all the bytes read so far for the current token.
232// It sets z.err if the underlying reader returns an error.
233// Pre-condition: z.err == nil.
234func (z *Tokenizer) readByte() byte {
235 if z.raw.end >= len(z.buf) {
236 // Our buffer is exhausted and we have to read from z.r. Check if the
237 // previous read resulted in an error.
238 if z.readErr != nil {
239 z.err = z.readErr
240 return 0
241 }
242 // We copy z.buf[z.raw.start:z.raw.end] to the beginning of z.buf. If the length
243 // z.raw.end - z.raw.start is more than half the capacity of z.buf, then we
244 // allocate a new buffer before the copy.
245 c := cap(z.buf)
246 d := z.raw.end - z.raw.start
247 var buf1 []byte
248 if 2*d > c {
249 buf1 = make([]byte, d, 2*c)
250 } else {
251 buf1 = z.buf[:d]
252 }
253 copy(buf1, z.buf[z.raw.start:z.raw.end])
254 if x := z.raw.start; x != 0 {
255 // Adjust the data/attr spans to refer to the same contents after the copy.
256 z.data.start -= x
257 z.data.end -= x
258 z.pendingAttr[0].start -= x
259 z.pendingAttr[0].end -= x
260 z.pendingAttr[1].start -= x
261 z.pendingAttr[1].end -= x
262 for i := range z.attr {
263 z.attr[i][0].start -= x
264 z.attr[i][0].end -= x
265 z.attr[i][1].start -= x
266 z.attr[i][1].end -= x
267 }
268 }
269 z.raw.start, z.raw.end, z.buf = 0, d, buf1[:d]
270 // Now that we have copied the live bytes to the start of the buffer,
271 // we read from z.r into the remainder.
272 var n int
273 n, z.readErr = readAtLeastOneByte(z.r, buf1[d:cap(buf1)])
274 if n == 0 {
275 z.err = z.readErr
276 return 0
277 }
278 z.buf = buf1[:d+n]
279 }
280 x := z.buf[z.raw.end]
281 z.raw.end++
282 if z.maxBuf > 0 && z.raw.end-z.raw.start >= z.maxBuf {
283 z.err = ErrBufferExceeded
284 return 0
285 }
286 return x
287}
288
289// Buffered returns a slice containing data buffered but not yet tokenized.
290func (z *Tokenizer) Buffered() []byte {
291 return z.buf[z.raw.end:]
292}
293
294// readAtLeastOneByte wraps an io.Reader so that reading cannot return (0, nil).
295// It returns io.ErrNoProgress if the underlying r.Read method returns (0, nil)
296// too many times in succession.
297func readAtLeastOneByte(r io.Reader, b []byte) (int, error) {
298 for i := 0; i < 100; i++ {
299 if n, err := r.Read(b); n != 0 || err != nil {
300 return n, err
301 }
302 }
303 return 0, io.ErrNoProgress
304}
305
306// skipWhiteSpace skips past any white space.
307func (z *Tokenizer) skipWhiteSpace() {
308 if z.err != nil {
309 return
310 }
311 for {
312 c := z.readByte()
313 if z.err != nil {
314 return
315 }
316 switch c {
317 case ' ', '\n', '\r', '\t', '\f':
318 // No-op.
319 default:
320 z.raw.end--
321 return
322 }
323 }
324}
325
326// readRawOrRCDATA reads until the next "</foo>", where "foo" is z.rawTag and
327// is typically something like "script" or "textarea".
328func (z *Tokenizer) readRawOrRCDATA() {
329 if z.rawTag == "script" {
330 z.readScript()
331 z.textIsRaw = true
332 z.rawTag = ""
333 return
334 }
335loop:
336 for {
337 c := z.readByte()
338 if z.err != nil {
339 break loop
340 }
341 if c != '<' {
342 continue loop
343 }
344 c = z.readByte()
345 if z.err != nil {
346 break loop
347 }
348 if c != '/' {
349 z.raw.end--
350 continue loop
351 }
352 if z.readRawEndTag() || z.err != nil {
353 break loop
354 }
355 }
356 z.data.end = z.raw.end
357 // A textarea's or title's RCDATA can contain escaped entities.
358 z.textIsRaw = z.rawTag != "textarea" && z.rawTag != "title"
359 z.rawTag = ""
360}
361
362// readRawEndTag attempts to read a tag like "</foo>", where "foo" is z.rawTag.
363// If it succeeds, it backs up the input position to reconsume the tag and
364// returns true. Otherwise it returns false. The opening "</" has already been
365// consumed.
366func (z *Tokenizer) readRawEndTag() bool {
367 for i := 0; i < len(z.rawTag); i++ {
368 c := z.readByte()
369 if z.err != nil {
370 return false
371 }
372 if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
373 z.raw.end--
374 return false
375 }
376 }
377 c := z.readByte()
378 if z.err != nil {
379 return false
380 }
381 switch c {
382 case ' ', '\n', '\r', '\t', '\f', '/', '>':
383 // The 3 is 2 for the leading "</" plus 1 for the trailing character c.
384 z.raw.end -= 3 + len(z.rawTag)
385 return true
386 }
387 z.raw.end--
388 return false
389}
390
391// readScript reads until the next </script> tag, following the byzantine
392// rules for escaping/hiding the closing tag.
393func (z *Tokenizer) readScript() {
394 defer func() {
395 z.data.end = z.raw.end
396 }()
397 var c byte
398
399scriptData:
400 c = z.readByte()
401 if z.err != nil {
402 return
403 }
404 if c == '<' {
405 goto scriptDataLessThanSign
406 }
407 goto scriptData
408
409scriptDataLessThanSign:
410 c = z.readByte()
411 if z.err != nil {
412 return
413 }
414 switch c {
415 case '/':
416 goto scriptDataEndTagOpen
417 case '!':
418 goto scriptDataEscapeStart
419 }
420 z.raw.end--
421 goto scriptData
422
423scriptDataEndTagOpen:
424 if z.readRawEndTag() || z.err != nil {
425 return
426 }
427 goto scriptData
428
429scriptDataEscapeStart:
430 c = z.readByte()
431 if z.err != nil {
432 return
433 }
434 if c == '-' {
435 goto scriptDataEscapeStartDash
436 }
437 z.raw.end--
438 goto scriptData
439
440scriptDataEscapeStartDash:
441 c = z.readByte()
442 if z.err != nil {
443 return
444 }
445 if c == '-' {
446 goto scriptDataEscapedDashDash
447 }
448 z.raw.end--
449 goto scriptData
450
451scriptDataEscaped:
452 c = z.readByte()
453 if z.err != nil {
454 return
455 }
456 switch c {
457 case '-':
458 goto scriptDataEscapedDash
459 case '<':
460 goto scriptDataEscapedLessThanSign
461 }
462 goto scriptDataEscaped
463
464scriptDataEscapedDash:
465 c = z.readByte()
466 if z.err != nil {
467 return
468 }
469 switch c {
470 case '-':
471 goto scriptDataEscapedDashDash
472 case '<':
473 goto scriptDataEscapedLessThanSign
474 }
475 goto scriptDataEscaped
476
477scriptDataEscapedDashDash:
478 c = z.readByte()
479 if z.err != nil {
480 return
481 }
482 switch c {
483 case '-':
484 goto scriptDataEscapedDashDash
485 case '<':
486 goto scriptDataEscapedLessThanSign
487 case '>':
488 goto scriptData
489 }
490 goto scriptDataEscaped
491
492scriptDataEscapedLessThanSign:
493 c = z.readByte()
494 if z.err != nil {
495 return
496 }
497 if c == '/' {
498 goto scriptDataEscapedEndTagOpen
499 }
500 if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
501 goto scriptDataDoubleEscapeStart
502 }
503 z.raw.end--
504 goto scriptData
505
506scriptDataEscapedEndTagOpen:
507 if z.readRawEndTag() || z.err != nil {
508 return
509 }
510 goto scriptDataEscaped
511
512scriptDataDoubleEscapeStart:
513 z.raw.end--
514 for i := 0; i < len("script"); i++ {
515 c = z.readByte()
516 if z.err != nil {
517 return
518 }
519 if c != "script"[i] && c != "SCRIPT"[i] {
520 z.raw.end--
521 goto scriptDataEscaped
522 }
523 }
524 c = z.readByte()
525 if z.err != nil {
526 return
527 }
528 switch c {
529 case ' ', '\n', '\r', '\t', '\f', '/', '>':
530 goto scriptDataDoubleEscaped
531 }
532 z.raw.end--
533 goto scriptDataEscaped
534
535scriptDataDoubleEscaped:
536 c = z.readByte()
537 if z.err != nil {
538 return
539 }
540 switch c {
541 case '-':
542 goto scriptDataDoubleEscapedDash
543 case '<':
544 goto scriptDataDoubleEscapedLessThanSign
545 }
546 goto scriptDataDoubleEscaped
547
548scriptDataDoubleEscapedDash:
549 c = z.readByte()
550 if z.err != nil {
551 return
552 }
553 switch c {
554 case '-':
555 goto scriptDataDoubleEscapedDashDash
556 case '<':
557 goto scriptDataDoubleEscapedLessThanSign
558 }
559 goto scriptDataDoubleEscaped
560
561scriptDataDoubleEscapedDashDash:
562 c = z.readByte()
563 if z.err != nil {
564 return
565 }
566 switch c {
567 case '-':
568 goto scriptDataDoubleEscapedDashDash
569 case '<':
570 goto scriptDataDoubleEscapedLessThanSign
571 case '>':
572 goto scriptData
573 }
574 goto scriptDataDoubleEscaped
575
576scriptDataDoubleEscapedLessThanSign:
577 c = z.readByte()
578 if z.err != nil {
579 return
580 }
581 if c == '/' {
582 goto scriptDataDoubleEscapeEnd
583 }
584 z.raw.end--
585 goto scriptDataDoubleEscaped
586
587scriptDataDoubleEscapeEnd:
588 if z.readRawEndTag() {
589 z.raw.end += len("</script>")
590 goto scriptDataEscaped
591 }
592 if z.err != nil {
593 return
594 }
595 goto scriptDataDoubleEscaped
596}
597
598// readComment reads the next comment token starting with "<!--". The opening
599// "<!--" has already been consumed.
600func (z *Tokenizer) readComment() {
601 // When modifying this function, consider manually increasing the
602 // maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
603 // That increase should only be temporary, not committed, as it
604 // exponentially affects the test running time.
605
606 z.data.start = z.raw.end
607 defer func() {
608 if z.data.end < z.data.start {
609 // It's a comment with no data, like <!-->.
610 z.data.end = z.data.start
611 }
612 }()
613
614 var dashCount int
615 beginning := true
616 for {
617 c := z.readByte()
618 if z.err != nil {
619 z.data.end = z.calculateAbruptCommentDataEnd()
620 return
621 }
622 switch c {
623 case '-':
624 dashCount++
625 continue
626 case '>':
627 if dashCount >= 2 || beginning {
628 z.data.end = z.raw.end - len("-->")
629 return
630 }
631 case '!':
632 if dashCount >= 2 {
633 c = z.readByte()
634 if z.err != nil {
635 z.data.end = z.calculateAbruptCommentDataEnd()
636 return
637 } else if c == '>' {
638 z.data.end = z.raw.end - len("--!>")
639 return
640 } else if c == '-' {
641 dashCount = 1
642 beginning = false
643 continue
644 }
645 }
646 }
647 dashCount = 0
648 beginning = false
649 }
650}
651
652func (z *Tokenizer) calculateAbruptCommentDataEnd() int {
653 raw := z.Raw()
654 const prefixLen = len("<!--")
655 if len(raw) >= prefixLen {
656 raw = raw[prefixLen:]
657 if hasSuffix(raw, "--!") {
658 return z.raw.end - 3
659 } else if hasSuffix(raw, "--") {
660 return z.raw.end - 2
661 } else if hasSuffix(raw, "-") {
662 return z.raw.end - 1
663 }
664 }
665 return z.raw.end
666}
667
668func hasSuffix(b []byte, suffix string) bool {
669 if len(b) < len(suffix) {
670 return false
671 }
672 b = b[len(b)-len(suffix):]
673 for i := range b {
674 if b[i] != suffix[i] {
675 return false
676 }
677 }
678 return true
679}
680
681// readUntilCloseAngle reads until the next ">".
682func (z *Tokenizer) readUntilCloseAngle() {
683 z.data.start = z.raw.end
684 for {
685 c := z.readByte()
686 if z.err != nil {
687 z.data.end = z.raw.end
688 return
689 }
690 if c == '>' {
691 z.data.end = z.raw.end - len(">")
692 return
693 }
694 }
695}
696
697// readMarkupDeclaration reads the next token starting with "<!". It might be
698// a "<!--comment-->", a "<!DOCTYPE foo>", a "<![CDATA[section]]>" or
699// "<!a bogus comment". The opening "<!" has already been consumed.
700func (z *Tokenizer) readMarkupDeclaration() TokenType {
701 z.data.start = z.raw.end
702 var c [2]byte
703 for i := 0; i < 2; i++ {
704 c[i] = z.readByte()
705 if z.err != nil {
706 z.data.end = z.raw.end
707 return CommentToken
708 }
709 }
710 if c[0] == '-' && c[1] == '-' {
711 z.readComment()
712 return CommentToken
713 }
714 z.raw.end -= 2
715 if z.readDoctype() {
716 return DoctypeToken
717 }
718 if z.allowCDATA && z.readCDATA() {
719 z.convertNUL = true
720 return TextToken
721 }
722 // It's a bogus comment.
723 z.readUntilCloseAngle()
724 return CommentToken
725}
726
727// readDoctype attempts to read a doctype declaration and returns true if
728// successful. The opening "<!" has already been consumed.
729func (z *Tokenizer) readDoctype() bool {
730 const s = "DOCTYPE"
731 for i := 0; i < len(s); i++ {
732 c := z.readByte()
733 if z.err != nil {
734 z.data.end = z.raw.end
735 return false
736 }
737 if c != s[i] && c != s[i]+('a'-'A') {
738 // Back up to read the fragment of "DOCTYPE" again.
739 z.raw.end = z.data.start
740 return false
741 }
742 }
743 if z.skipWhiteSpace(); z.err != nil {
744 z.data.start = z.raw.end
745 z.data.end = z.raw.end
746 return true
747 }
748 z.readUntilCloseAngle()
749 return true
750}
751
752// readCDATA attempts to read a CDATA section and returns true if
753// successful. The opening "<!" has already been consumed.
754func (z *Tokenizer) readCDATA() bool {
755 const s = "[CDATA["
756 for i := 0; i < len(s); i++ {
757 c := z.readByte()
758 if z.err != nil {
759 z.data.end = z.raw.end
760 return false
761 }
762 if c != s[i] {
763 // Back up to read the fragment of "[CDATA[" again.
764 z.raw.end = z.data.start
765 return false
766 }
767 }
768 z.data.start = z.raw.end
769 brackets := 0
770 for {
771 c := z.readByte()
772 if z.err != nil {
773 z.data.end = z.raw.end
774 return true
775 }
776 switch c {
777 case ']':
778 brackets++
779 case '>':
780 if brackets >= 2 {
781 z.data.end = z.raw.end - len("]]>")
782 return true
783 }
784 brackets = 0
785 default:
786 brackets = 0
787 }
788 }
789}
790
791// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
792// case-insensitively matches any element of ss.
793func (z *Tokenizer) startTagIn(ss ...string) bool {
794loop:
795 for _, s := range ss {
796 if z.data.end-z.data.start != len(s) {
797 continue loop
798 }
799 for i := 0; i < len(s); i++ {
800 c := z.buf[z.data.start+i]
801 if 'A' <= c && c <= 'Z' {
802 c += 'a' - 'A'
803 }
804 if c != s[i] {
805 continue loop
806 }
807 }
808 return true
809 }
810 return false
811}
812
813// readStartTag reads the next start tag token. The opening "<a" has already
814// been consumed, where 'a' means anything in [A-Za-z].
815func (z *Tokenizer) readStartTag() TokenType {
816 z.readTag(true)
817 if z.err != nil {
818 return ErrorToken
819 }
820 // Several tags flag the tokenizer's next token as raw.
821 c, raw := z.buf[z.data.start], false
822 if 'A' <= c && c <= 'Z' {
823 c += 'a' - 'A'
824 }
825 switch c {
826 case 'i':
827 raw = z.startTagIn("iframe")
828 case 'n':
829 raw = z.startTagIn("noembed", "noframes", "noscript")
830 case 'p':
831 raw = z.startTagIn("plaintext")
832 case 's':
833 raw = z.startTagIn("script", "style")
834 case 't':
835 raw = z.startTagIn("textarea", "title")
836 case 'x':
837 raw = z.startTagIn("xmp")
838 }
839 if raw {
840 z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
841 }
842 // Look for a self-closing token like "<br/>".
843 if z.err == nil && z.buf[z.raw.end-2] == '/' {
844 return SelfClosingTagToken
845 }
846 return StartTagToken
847}
848
849// readTag reads the next tag token and its attributes. If saveAttr, those
850// attributes are saved in z.attr, otherwise z.attr is set to an empty slice.
851// The opening "<a" or "</a" has already been consumed, where 'a' means anything
852// in [A-Za-z].
853func (z *Tokenizer) readTag(saveAttr bool) {
854 z.attr = z.attr[:0]
855 z.nAttrReturned = 0
856 // Read the tag name and attribute key/value pairs.
857 z.readTagName()
858 if z.skipWhiteSpace(); z.err != nil {
859 return
860 }
861 for {
862 c := z.readByte()
863 if z.err != nil || c == '>' {
864 break
865 }
866 z.raw.end--
867 z.readTagAttrKey()
868 z.readTagAttrVal()
869 // Save pendingAttr if saveAttr and that attribute has a non-empty key.
870 if saveAttr && z.pendingAttr[0].start != z.pendingAttr[0].end {
871 z.attr = append(z.attr, z.pendingAttr)
872 }
873 if z.skipWhiteSpace(); z.err != nil {
874 break
875 }
876 }
877}
878
879// readTagName sets z.data to the "div" in "<div k=v>". The reader (z.raw.end)
880// is positioned such that the first byte of the tag name (the "d" in "<div")
881// has already been consumed.
882func (z *Tokenizer) readTagName() {
883 z.data.start = z.raw.end - 1
884 for {
885 c := z.readByte()
886 if z.err != nil {
887 z.data.end = z.raw.end
888 return
889 }
890 switch c {
891 case ' ', '\n', '\r', '\t', '\f':
892 z.data.end = z.raw.end - 1
893 return
894 case '/', '>':
895 z.raw.end--
896 z.data.end = z.raw.end
897 return
898 }
899 }
900}
901
902// readTagAttrKey sets z.pendingAttr[0] to the "k" in "<div k=v>".
903// Precondition: z.err == nil.
904func (z *Tokenizer) readTagAttrKey() {
905 z.pendingAttr[0].start = z.raw.end
906 for {
907 c := z.readByte()
908 if z.err != nil {
909 z.pendingAttr[0].end = z.raw.end
910 return
911 }
912 switch c {
913 case ' ', '\n', '\r', '\t', '\f', '/':
914 z.pendingAttr[0].end = z.raw.end - 1
915 return
916 case '=', '>':
917 z.raw.end--
918 z.pendingAttr[0].end = z.raw.end
919 return
920 }
921 }
922}
923
924// readTagAttrVal sets z.pendingAttr[1] to the "v" in "<div k=v>".
925func (z *Tokenizer) readTagAttrVal() {
926 z.pendingAttr[1].start = z.raw.end
927 z.pendingAttr[1].end = z.raw.end
928 if z.skipWhiteSpace(); z.err != nil {
929 return
930 }
931 c := z.readByte()
932 if z.err != nil {
933 return
934 }
935 if c != '=' {
936 z.raw.end--
937 return
938 }
939 if z.skipWhiteSpace(); z.err != nil {
940 return
941 }
942 quote := z.readByte()
943 if z.err != nil {
944 return
945 }
946 switch quote {
947 case '>':
948 z.raw.end--
949 return
950
951 case '\'', '"':
952 z.pendingAttr[1].start = z.raw.end
953 for {
954 c := z.readByte()
955 if z.err != nil {
956 z.pendingAttr[1].end = z.raw.end
957 return
958 }
959 if c == quote {
960 z.pendingAttr[1].end = z.raw.end - 1
961 return
962 }
963 }
964
965 default:
966 z.pendingAttr[1].start = z.raw.end - 1
967 for {
968 c := z.readByte()
969 if z.err != nil {
970 z.pendingAttr[1].end = z.raw.end
971 return
972 }
973 switch c {
974 case ' ', '\n', '\r', '\t', '\f':
975 z.pendingAttr[1].end = z.raw.end - 1
976 return
977 case '>':
978 z.raw.end--
979 z.pendingAttr[1].end = z.raw.end
980 return
981 }
982 }
983 }
984}
985
986// Next scans the next token and returns its type.
987func (z *Tokenizer) Next() TokenType {
988 z.raw.start = z.raw.end
989 z.data.start = z.raw.end
990 z.data.end = z.raw.end
991 if z.err != nil {
992 z.tt = ErrorToken
993 return z.tt
994 }
995 if z.rawTag != "" {
996 if z.rawTag == "plaintext" {
997 // Read everything up to EOF.
998 for z.err == nil {
999 z.readByte()
1000 }
1001 z.data.end = z.raw.end
1002 z.textIsRaw = true
1003 } else {
1004 z.readRawOrRCDATA()
1005 }
1006 if z.data.end > z.data.start {
1007 z.tt = TextToken
1008 z.convertNUL = true
1009 return z.tt
1010 }
1011 }
1012 z.textIsRaw = false
1013 z.convertNUL = false
1014
1015loop:
1016 for {
1017 c := z.readByte()
1018 if z.err != nil {
1019 break loop
1020 }
1021 if c != '<' {
1022 continue loop
1023 }
1024
1025 // Check if the '<' we have just read is part of a tag, comment
1026 // or doctype. If not, it's part of the accumulated text token.
1027 c = z.readByte()
1028 if z.err != nil {
1029 break loop
1030 }
1031 var tokenType TokenType
1032 switch {
1033 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
1034 tokenType = StartTagToken
1035 case c == '/':
1036 tokenType = EndTagToken
1037 case c == '!' || c == '?':
1038 // We use CommentToken to mean any of "<!--actual comments-->",
1039 // "<!DOCTYPE declarations>" and "<?xml processing instructions?>".
1040 tokenType = CommentToken
1041 default:
1042 // Reconsume the current character.
1043 z.raw.end--
1044 continue
1045 }
1046
1047 // We have a non-text token, but we might have accumulated some text
1048 // before that. If so, we return the text first, and return the non-
1049 // text token on the subsequent call to Next.
1050 if x := z.raw.end - len("<a"); z.raw.start < x {
1051 z.raw.end = x
1052 z.data.end = x
1053 z.tt = TextToken
1054 return z.tt
1055 }
1056 switch tokenType {
1057 case StartTagToken:
1058 z.tt = z.readStartTag()
1059 return z.tt
1060 case EndTagToken:
1061 c = z.readByte()
1062 if z.err != nil {
1063 break loop
1064 }
1065 if c == '>' {
1066 // "</>" does not generate a token at all. Generate an empty comment
1067 // to allow passthrough clients to pick up the data using Raw.
1068 // Reset the tokenizer state and start again.
1069 z.tt = CommentToken
1070 return z.tt
1071 }
1072 if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
1073 z.readTag(false)
1074 if z.err != nil {
1075 z.tt = ErrorToken
1076 } else {
1077 z.tt = EndTagToken
1078 }
1079 return z.tt
1080 }
1081 z.raw.end--
1082 z.readUntilCloseAngle()
1083 z.tt = CommentToken
1084 return z.tt
1085 case CommentToken:
1086 if c == '!' {
1087 z.tt = z.readMarkupDeclaration()
1088 return z.tt
1089 }
1090 z.raw.end--
1091 z.readUntilCloseAngle()
1092 z.tt = CommentToken
1093 return z.tt
1094 }
1095 }
1096 if z.raw.start < z.raw.end {
1097 z.data.end = z.raw.end
1098 z.tt = TextToken
1099 return z.tt
1100 }
1101 z.tt = ErrorToken
1102 return z.tt
1103}
1104
1105// Raw returns the unmodified text of the current token. Calling Next, Token,
1106// Text, TagName or TagAttr may change the contents of the returned slice.
1107//
1108// The token stream's raw bytes partition the byte stream (up until an
1109// ErrorToken). There are no overlaps or gaps between two consecutive token's
1110// raw bytes. One implication is that the byte offset of the current token is
1111// the sum of the lengths of all previous tokens' raw bytes.
1112func (z *Tokenizer) Raw() []byte {
1113 return z.buf[z.raw.start:z.raw.end]
1114}
1115
1116// convertNewlines converts "\r" and "\r\n" in s to "\n".
1117// The conversion happens in place, but the resulting slice may be shorter.
1118func convertNewlines(s []byte) []byte {
1119 for i, c := range s {
1120 if c != '\r' {
1121 continue
1122 }
1123
1124 src := i + 1
1125 if src >= len(s) || s[src] != '\n' {
1126 s[i] = '\n'
1127 continue
1128 }
1129
1130 dst := i
1131 for src < len(s) {
1132 if s[src] == '\r' {
1133 if src+1 < len(s) && s[src+1] == '\n' {
1134 src++
1135 }
1136 s[dst] = '\n'
1137 } else {
1138 s[dst] = s[src]
1139 }
1140 src++
1141 dst++
1142 }
1143 return s[:dst]
1144 }
1145 return s
1146}
1147
1148var (
1149 nul = []byte("\x00")
1150 replacement = []byte("\ufffd")
1151)
1152
1153// Text returns the unescaped text of a text, comment or doctype token. The
1154// contents of the returned slice may change on the next call to Next.
1155func (z *Tokenizer) Text() []byte {
1156 switch z.tt {
1157 case TextToken, CommentToken, DoctypeToken:
1158 s := z.buf[z.data.start:z.data.end]
1159 z.data.start = z.raw.end
1160 z.data.end = z.raw.end
1161 s = convertNewlines(s)
1162 if (z.convertNUL || z.tt == CommentToken) && bytes.Contains(s, nul) {
1163 s = bytes.Replace(s, nul, replacement, -1)
1164 }
1165 if !z.textIsRaw {
1166 s = unescape(s, false)
1167 }
1168 return s
1169 }
1170 return nil
1171}
1172
1173// TagName returns the lower-cased name of a tag token (the `img` out of
1174// `<IMG SRC="foo">`) and whether the tag has attributes.
1175// The contents of the returned slice may change on the next call to Next.
1176func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
1177 if z.data.start < z.data.end {
1178 switch z.tt {
1179 case StartTagToken, EndTagToken, SelfClosingTagToken:
1180 s := z.buf[z.data.start:z.data.end]
1181 z.data.start = z.raw.end
1182 z.data.end = z.raw.end
1183 return lower(s), z.nAttrReturned < len(z.attr)
1184 }
1185 }
1186 return nil, false
1187}
1188
1189// TagAttr returns the lower-cased key and unescaped value of the next unparsed
1190// attribute for the current tag token and whether there are more attributes.
1191// The contents of the returned slices may change on the next call to Next.
1192func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
1193 if z.nAttrReturned < len(z.attr) {
1194 switch z.tt {
1195 case StartTagToken, SelfClosingTagToken:
1196 x := z.attr[z.nAttrReturned]
1197 z.nAttrReturned++
1198 key = z.buf[x[0].start:x[0].end]
1199 val = z.buf[x[1].start:x[1].end]
1200 return lower(key), unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
1201 }
1202 }
1203 return nil, nil, false
1204}
1205
1206// Token returns the current Token. The result's Data and Attr values remain
1207// valid after subsequent Next calls.
1208func (z *Tokenizer) Token() Token {
1209 t := Token{Type: z.tt}
1210 switch z.tt {
1211 case TextToken, CommentToken, DoctypeToken:
1212 t.Data = string(z.Text())
1213 case StartTagToken, SelfClosingTagToken, EndTagToken:
1214 name, moreAttr := z.TagName()
1215 for moreAttr {
1216 var key, val []byte
1217 key, val, moreAttr = z.TagAttr()
1218 t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)})
1219 }
1220 if a := atom.Lookup(name); a != 0 {
1221 t.DataAtom, t.Data = a, a.String()
1222 } else {
1223 t.DataAtom, t.Data = 0, string(name)
1224 }
1225 }
1226 return t
1227}
1228
1229// SetMaxBuf sets a limit on the amount of data buffered during tokenization.
1230// A value of 0 means unlimited.
1231func (z *Tokenizer) SetMaxBuf(n int) {
1232 z.maxBuf = n
1233}
1234
1235// NewTokenizer returns a new HTML Tokenizer for the given Reader.
1236// The input is assumed to be UTF-8 encoded.
1237func NewTokenizer(r io.Reader) *Tokenizer {
1238 return NewTokenizerFragment(r, "")
1239}
1240
1241// NewTokenizerFragment returns a new HTML Tokenizer for the given Reader, for
1242// tokenizing an existing element's InnerHTML fragment. contextTag is that
1243// element's tag, such as "div" or "iframe".
1244//
1245// For example, how the InnerHTML "a<b" is tokenized depends on whether it is
1246// for a <p> tag or a <script> tag.
1247//
1248// The input is assumed to be UTF-8 encoded.
1249func NewTokenizerFragment(r io.Reader, contextTag string) *Tokenizer {
1250 z := &Tokenizer{
1251 r: r,
1252 buf: make([]byte, 0, 4096),
1253 }
1254 if contextTag != "" {
1255 switch s := strings.ToLower(contextTag); s {
1256 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp":
1257 z.rawTag = s
1258 }
1259 }
1260 return z
1261}