1// Copyright 2010 The Go Authors. All rights reserved.
   2// Use of this source code is governed by a BSD-style
   3// license that can be found in the LICENSE file.
   4
   5package html
   6
   7import (
   8	"errors"
   9	"fmt"
  10	"io"
  11	"strings"
  12
  13	a "golang.org/x/net/html/atom"
  14)
  15
  16// A parser implements the HTML5 parsing algorithm:
  17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
  18type parser struct {
  19	// tokenizer provides the tokens for the parser.
  20	tokenizer *Tokenizer
  21	// tok is the most recently read token.
  22	tok Token
  23	// Self-closing tags like <hr/> are treated as start tags, except that
  24	// hasSelfClosingToken is set while they are being processed.
  25	hasSelfClosingToken bool
  26	// doc is the document root element.
  27	doc *Node
  28	// The stack of open elements (section 12.2.4.2) and active formatting
  29	// elements (section 12.2.4.3).
  30	oe, afe nodeStack
  31	// Element pointers (section 12.2.4.4).
  32	head, form *Node
  33	// Other parsing state flags (section 12.2.4.5).
  34	scripting, framesetOK bool
  35	// The stack of template insertion modes
  36	templateStack insertionModeStack
  37	// im is the current insertion mode.
  38	im insertionMode
  39	// originalIM is the insertion mode to go back to after completing a text
  40	// or inTableText insertion mode.
  41	originalIM insertionMode
  42	// fosterParenting is whether new elements should be inserted according to
  43	// the foster parenting rules (section 12.2.6.1).
  44	fosterParenting bool
  45	// quirks is whether the parser is operating in "quirks mode."
  46	quirks bool
  47	// fragment is whether the parser is parsing an HTML fragment.
  48	fragment bool
  49	// context is the context element when parsing an HTML fragment
  50	// (section 12.4).
  51	context *Node
  52}
  53
  54func (p *parser) top() *Node {
  55	if n := p.oe.top(); n != nil {
  56		return n
  57	}
  58	return p.doc
  59}
  60
  61// Stop tags for use in popUntil. These come from section 12.2.4.2.
  62var (
  63	defaultScopeStopTags = map[string][]a.Atom{
  64		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
  65		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
  66		"svg":  {a.Desc, a.ForeignObject, a.Title},
  67	}
  68)
  69
  70type scope int
  71
  72const (
  73	defaultScope scope = iota
  74	listItemScope
  75	buttonScope
  76	tableScope
  77	tableRowScope
  78	tableBodyScope
  79	selectScope
  80)
  81
  82// popUntil pops the stack of open elements at the highest element whose tag
  83// is in matchTags, provided there is no higher element in the scope's stop
  84// tags (as defined in section 12.2.4.2). It returns whether or not there was
  85// such an element. If there was not, popUntil leaves the stack unchanged.
  86//
  87// For example, the set of stop tags for table scope is: "html", "table". If
  88// the stack was:
  89// ["html", "body", "font", "table", "b", "i", "u"]
  90// then popUntil(tableScope, "font") would return false, but
  91// popUntil(tableScope, "i") would return true and the stack would become:
  92// ["html", "body", "font", "table", "b"]
  93//
  94// If an element's tag is in both the stop tags and matchTags, then the stack
  95// will be popped and the function returns true (provided, of course, there was
  96// no higher element in the stack that was also in the stop tags). For example,
  97// popUntil(tableScope, "table") returns true and leaves:
  98// ["html", "body", "font"]
  99func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
 100	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
 101		p.oe = p.oe[:i]
 102		return true
 103	}
 104	return false
 105}
 106
 107// indexOfElementInScope returns the index in p.oe of the highest element whose
 108// tag is in matchTags that is in scope. If no matching element is in scope, it
 109// returns -1.
 110func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
 111	for i := len(p.oe) - 1; i >= 0; i-- {
 112		tagAtom := p.oe[i].DataAtom
 113		if p.oe[i].Namespace == "" {
 114			for _, t := range matchTags {
 115				if t == tagAtom {
 116					return i
 117				}
 118			}
 119			switch s {
 120			case defaultScope:
 121				// No-op.
 122			case listItemScope:
 123				if tagAtom == a.Ol || tagAtom == a.Ul {
 124					return -1
 125				}
 126			case buttonScope:
 127				if tagAtom == a.Button {
 128					return -1
 129				}
 130			case tableScope:
 131				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
 132					return -1
 133				}
 134			case selectScope:
 135				if tagAtom != a.Optgroup && tagAtom != a.Option {
 136					return -1
 137				}
 138			default:
 139				panic("unreachable")
 140			}
 141		}
 142		switch s {
 143		case defaultScope, listItemScope, buttonScope:
 144			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
 145				if t == tagAtom {
 146					return -1
 147				}
 148			}
 149		}
 150	}
 151	return -1
 152}
 153
 154// elementInScope is like popUntil, except that it doesn't modify the stack of
 155// open elements.
 156func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
 157	return p.indexOfElementInScope(s, matchTags...) != -1
 158}
 159
 160// clearStackToContext pops elements off the stack of open elements until a
 161// scope-defined element is found.
 162func (p *parser) clearStackToContext(s scope) {
 163	for i := len(p.oe) - 1; i >= 0; i-- {
 164		tagAtom := p.oe[i].DataAtom
 165		switch s {
 166		case tableScope:
 167			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
 168				p.oe = p.oe[:i+1]
 169				return
 170			}
 171		case tableRowScope:
 172			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
 173				p.oe = p.oe[:i+1]
 174				return
 175			}
 176		case tableBodyScope:
 177			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
 178				p.oe = p.oe[:i+1]
 179				return
 180			}
 181		default:
 182			panic("unreachable")
 183		}
 184	}
 185}
 186
 187// parseGenericRawTextElement implements the generic raw text element parsing
 188// algorithm defined in 12.2.6.2.
 189// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
 190// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
 191// officially, need to make tokenizer consider both states.
 192func (p *parser) parseGenericRawTextElement() {
 193	p.addElement()
 194	p.originalIM = p.im
 195	p.im = textIM
 196}
 197
 198// generateImpliedEndTags pops nodes off the stack of open elements as long as
 199// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
 200// If exceptions are specified, nodes with that name will not be popped off.
 201func (p *parser) generateImpliedEndTags(exceptions ...string) {
 202	var i int
 203loop:
 204	for i = len(p.oe) - 1; i >= 0; i-- {
 205		n := p.oe[i]
 206		if n.Type != ElementNode {
 207			break
 208		}
 209		switch n.DataAtom {
 210		case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
 211			for _, except := range exceptions {
 212				if n.Data == except {
 213					break loop
 214				}
 215			}
 216			continue
 217		}
 218		break
 219	}
 220
 221	p.oe = p.oe[:i+1]
 222}
 223
 224// addChild adds a child node n to the top element, and pushes n onto the stack
 225// of open elements if it is an element node.
 226func (p *parser) addChild(n *Node) {
 227	if p.shouldFosterParent() {
 228		p.fosterParent(n)
 229	} else {
 230		p.top().AppendChild(n)
 231	}
 232
 233	if n.Type == ElementNode {
 234		p.oe = append(p.oe, n)
 235	}
 236}
 237
 238// shouldFosterParent returns whether the next node to be added should be
 239// foster parented.
 240func (p *parser) shouldFosterParent() bool {
 241	if p.fosterParenting {
 242		switch p.top().DataAtom {
 243		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
 244			return true
 245		}
 246	}
 247	return false
 248}
 249
 250// fosterParent adds a child node according to the foster parenting rules.
 251// Section 12.2.6.1, "foster parenting".
 252func (p *parser) fosterParent(n *Node) {
 253	var table, parent, prev, template *Node
 254	var i int
 255	for i = len(p.oe) - 1; i >= 0; i-- {
 256		if p.oe[i].DataAtom == a.Table {
 257			table = p.oe[i]
 258			break
 259		}
 260	}
 261
 262	var j int
 263	for j = len(p.oe) - 1; j >= 0; j-- {
 264		if p.oe[j].DataAtom == a.Template {
 265			template = p.oe[j]
 266			break
 267		}
 268	}
 269
 270	if template != nil && (table == nil || j > i) {
 271		template.AppendChild(n)
 272		return
 273	}
 274
 275	if table == nil {
 276		// The foster parent is the html element.
 277		parent = p.oe[0]
 278	} else {
 279		parent = table.Parent
 280	}
 281	if parent == nil {
 282		parent = p.oe[i-1]
 283	}
 284
 285	if table != nil {
 286		prev = table.PrevSibling
 287	} else {
 288		prev = parent.LastChild
 289	}
 290	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
 291		prev.Data += n.Data
 292		return
 293	}
 294
 295	parent.InsertBefore(n, table)
 296}
 297
 298// addText adds text to the preceding node if it is a text node, or else it
 299// calls addChild with a new text node.
 300func (p *parser) addText(text string) {
 301	if text == "" {
 302		return
 303	}
 304
 305	if p.shouldFosterParent() {
 306		p.fosterParent(&Node{
 307			Type: TextNode,
 308			Data: text,
 309		})
 310		return
 311	}
 312
 313	t := p.top()
 314	if n := t.LastChild; n != nil && n.Type == TextNode {
 315		n.Data += text
 316		return
 317	}
 318	p.addChild(&Node{
 319		Type: TextNode,
 320		Data: text,
 321	})
 322}
 323
 324// addElement adds a child element based on the current token.
 325func (p *parser) addElement() {
 326	p.addChild(&Node{
 327		Type:     ElementNode,
 328		DataAtom: p.tok.DataAtom,
 329		Data:     p.tok.Data,
 330		Attr:     p.tok.Attr,
 331	})
 332}
 333
 334// Section 12.2.4.3.
 335func (p *parser) addFormattingElement() {
 336	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
 337	p.addElement()
 338
 339	// Implement the Noah's Ark clause, but with three per family instead of two.
 340	identicalElements := 0
 341findIdenticalElements:
 342	for i := len(p.afe) - 1; i >= 0; i-- {
 343		n := p.afe[i]
 344		if n.Type == scopeMarkerNode {
 345			break
 346		}
 347		if n.Type != ElementNode {
 348			continue
 349		}
 350		if n.Namespace != "" {
 351			continue
 352		}
 353		if n.DataAtom != tagAtom {
 354			continue
 355		}
 356		if len(n.Attr) != len(attr) {
 357			continue
 358		}
 359	compareAttributes:
 360		for _, t0 := range n.Attr {
 361			for _, t1 := range attr {
 362				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
 363					// Found a match for this attribute, continue with the next attribute.
 364					continue compareAttributes
 365				}
 366			}
 367			// If we get here, there is no attribute that matches a.
 368			// Therefore the element is not identical to the new one.
 369			continue findIdenticalElements
 370		}
 371
 372		identicalElements++
 373		if identicalElements >= 3 {
 374			p.afe.remove(n)
 375		}
 376	}
 377
 378	p.afe = append(p.afe, p.top())
 379}
 380
 381// Section 12.2.4.3.
 382func (p *parser) clearActiveFormattingElements() {
 383	for {
 384		if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
 385			return
 386		}
 387	}
 388}
 389
 390// Section 12.2.4.3.
 391func (p *parser) reconstructActiveFormattingElements() {
 392	n := p.afe.top()
 393	if n == nil {
 394		return
 395	}
 396	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
 397		return
 398	}
 399	i := len(p.afe) - 1
 400	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
 401		if i == 0 {
 402			i = -1
 403			break
 404		}
 405		i--
 406		n = p.afe[i]
 407	}
 408	for {
 409		i++
 410		clone := p.afe[i].clone()
 411		p.addChild(clone)
 412		p.afe[i] = clone
 413		if i == len(p.afe)-1 {
 414			break
 415		}
 416	}
 417}
 418
 419// Section 12.2.5.
 420func (p *parser) acknowledgeSelfClosingTag() {
 421	p.hasSelfClosingToken = false
 422}
 423
 424// An insertion mode (section 12.2.4.1) is the state transition function from
 425// a particular state in the HTML5 parser's state machine. It updates the
 426// parser's fields depending on parser.tok (where ErrorToken means EOF).
 427// It returns whether the token was consumed.
 428type insertionMode func(*parser) bool
 429
 430// setOriginalIM sets the insertion mode to return to after completing a text or
 431// inTableText insertion mode.
 432// Section 12.2.4.1, "using the rules for".
 433func (p *parser) setOriginalIM() {
 434	if p.originalIM != nil {
 435		panic("html: bad parser state: originalIM was set twice")
 436	}
 437	p.originalIM = p.im
 438}
 439
 440// Section 12.2.4.1, "reset the insertion mode".
 441func (p *parser) resetInsertionMode() {
 442	for i := len(p.oe) - 1; i >= 0; i-- {
 443		n := p.oe[i]
 444		last := i == 0
 445		if last && p.context != nil {
 446			n = p.context
 447		}
 448
 449		switch n.DataAtom {
 450		case a.Select:
 451			if !last {
 452				for ancestor, first := n, p.oe[0]; ancestor != first; {
 453					ancestor = p.oe[p.oe.index(ancestor)-1]
 454					switch ancestor.DataAtom {
 455					case a.Template:
 456						p.im = inSelectIM
 457						return
 458					case a.Table:
 459						p.im = inSelectInTableIM
 460						return
 461					}
 462				}
 463			}
 464			p.im = inSelectIM
 465		case a.Td, a.Th:
 466			// TODO: remove this divergence from the HTML5 spec.
 467			//
 468			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 469			p.im = inCellIM
 470		case a.Tr:
 471			p.im = inRowIM
 472		case a.Tbody, a.Thead, a.Tfoot:
 473			p.im = inTableBodyIM
 474		case a.Caption:
 475			p.im = inCaptionIM
 476		case a.Colgroup:
 477			p.im = inColumnGroupIM
 478		case a.Table:
 479			p.im = inTableIM
 480		case a.Template:
 481			// TODO: remove this divergence from the HTML5 spec.
 482			if n.Namespace != "" {
 483				continue
 484			}
 485			p.im = p.templateStack.top()
 486		case a.Head:
 487			// TODO: remove this divergence from the HTML5 spec.
 488			//
 489			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 490			p.im = inHeadIM
 491		case a.Body:
 492			p.im = inBodyIM
 493		case a.Frameset:
 494			p.im = inFramesetIM
 495		case a.Html:
 496			if p.head == nil {
 497				p.im = beforeHeadIM
 498			} else {
 499				p.im = afterHeadIM
 500			}
 501		default:
 502			if last {
 503				p.im = inBodyIM
 504				return
 505			}
 506			continue
 507		}
 508		return
 509	}
 510}
 511
 512const whitespace = " \t\r\n\f"
 513
 514// Section 12.2.6.4.1.
 515func initialIM(p *parser) bool {
 516	switch p.tok.Type {
 517	case TextToken:
 518		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 519		if len(p.tok.Data) == 0 {
 520			// It was all whitespace, so ignore it.
 521			return true
 522		}
 523	case CommentToken:
 524		p.doc.AppendChild(&Node{
 525			Type: CommentNode,
 526			Data: p.tok.Data,
 527		})
 528		return true
 529	case DoctypeToken:
 530		n, quirks := parseDoctype(p.tok.Data)
 531		p.doc.AppendChild(n)
 532		p.quirks = quirks
 533		p.im = beforeHTMLIM
 534		return true
 535	}
 536	p.quirks = true
 537	p.im = beforeHTMLIM
 538	return false
 539}
 540
 541// Section 12.2.6.4.2.
 542func beforeHTMLIM(p *parser) bool {
 543	switch p.tok.Type {
 544	case DoctypeToken:
 545		// Ignore the token.
 546		return true
 547	case TextToken:
 548		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 549		if len(p.tok.Data) == 0 {
 550			// It was all whitespace, so ignore it.
 551			return true
 552		}
 553	case StartTagToken:
 554		if p.tok.DataAtom == a.Html {
 555			p.addElement()
 556			p.im = beforeHeadIM
 557			return true
 558		}
 559	case EndTagToken:
 560		switch p.tok.DataAtom {
 561		case a.Head, a.Body, a.Html, a.Br:
 562			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
 563			return false
 564		default:
 565			// Ignore the token.
 566			return true
 567		}
 568	case CommentToken:
 569		p.doc.AppendChild(&Node{
 570			Type: CommentNode,
 571			Data: p.tok.Data,
 572		})
 573		return true
 574	}
 575	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
 576	return false
 577}
 578
 579// Section 12.2.6.4.3.
 580func beforeHeadIM(p *parser) bool {
 581	switch p.tok.Type {
 582	case TextToken:
 583		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 584		if len(p.tok.Data) == 0 {
 585			// It was all whitespace, so ignore it.
 586			return true
 587		}
 588	case StartTagToken:
 589		switch p.tok.DataAtom {
 590		case a.Head:
 591			p.addElement()
 592			p.head = p.top()
 593			p.im = inHeadIM
 594			return true
 595		case a.Html:
 596			return inBodyIM(p)
 597		}
 598	case EndTagToken:
 599		switch p.tok.DataAtom {
 600		case a.Head, a.Body, a.Html, a.Br:
 601			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
 602			return false
 603		default:
 604			// Ignore the token.
 605			return true
 606		}
 607	case CommentToken:
 608		p.addChild(&Node{
 609			Type: CommentNode,
 610			Data: p.tok.Data,
 611		})
 612		return true
 613	case DoctypeToken:
 614		// Ignore the token.
 615		return true
 616	}
 617
 618	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
 619	return false
 620}
 621
 622// Section 12.2.6.4.4.
 623func inHeadIM(p *parser) bool {
 624	switch p.tok.Type {
 625	case TextToken:
 626		s := strings.TrimLeft(p.tok.Data, whitespace)
 627		if len(s) < len(p.tok.Data) {
 628			// Add the initial whitespace to the current node.
 629			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
 630			if s == "" {
 631				return true
 632			}
 633			p.tok.Data = s
 634		}
 635	case StartTagToken:
 636		switch p.tok.DataAtom {
 637		case a.Html:
 638			return inBodyIM(p)
 639		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
 640			p.addElement()
 641			p.oe.pop()
 642			p.acknowledgeSelfClosingTag()
 643			return true
 644		case a.Noscript:
 645			if p.scripting {
 646				p.parseGenericRawTextElement()
 647				return true
 648			}
 649			p.addElement()
 650			p.im = inHeadNoscriptIM
 651			// Don't let the tokenizer go into raw text mode when scripting is disabled.
 652			p.tokenizer.NextIsNotRawText()
 653			return true
 654		case a.Script, a.Title:
 655			p.addElement()
 656			p.setOriginalIM()
 657			p.im = textIM
 658			return true
 659		case a.Noframes, a.Style:
 660			p.parseGenericRawTextElement()
 661			return true
 662		case a.Head:
 663			// Ignore the token.
 664			return true
 665		case a.Template:
 666			// TODO: remove this divergence from the HTML5 spec.
 667			//
 668			// We don't handle all of the corner cases when mixing foreign
 669			// content (i.e. <math> or <svg>) with <template>. Without this
 670			// early return, we can get into an infinite loop, possibly because
 671			// of the "TODO... further divergence" a little below.
 672			//
 673			// As a workaround, if we are mixing foreign content and templates,
 674			// just ignore the rest of the HTML. Foreign content is rare and a
 675			// relatively old HTML feature. Templates are also rare and a
 676			// relatively new HTML feature. Their combination is very rare.
 677			for _, e := range p.oe {
 678				if e.Namespace != "" {
 679					p.im = ignoreTheRemainingTokens
 680					return true
 681				}
 682			}
 683
 684			p.addElement()
 685			p.afe = append(p.afe, &scopeMarker)
 686			p.framesetOK = false
 687			p.im = inTemplateIM
 688			p.templateStack = append(p.templateStack, inTemplateIM)
 689			return true
 690		}
 691	case EndTagToken:
 692		switch p.tok.DataAtom {
 693		case a.Head:
 694			p.oe.pop()
 695			p.im = afterHeadIM
 696			return true
 697		case a.Body, a.Html, a.Br:
 698			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
 699			return false
 700		case a.Template:
 701			if !p.oe.contains(a.Template) {
 702				return true
 703			}
 704			// TODO: remove this further divergence from the HTML5 spec.
 705			//
 706			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 707			p.generateImpliedEndTags()
 708			for i := len(p.oe) - 1; i >= 0; i-- {
 709				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
 710					p.oe = p.oe[:i]
 711					break
 712				}
 713			}
 714			p.clearActiveFormattingElements()
 715			p.templateStack.pop()
 716			p.resetInsertionMode()
 717			return true
 718		default:
 719			// Ignore the token.
 720			return true
 721		}
 722	case CommentToken:
 723		p.addChild(&Node{
 724			Type: CommentNode,
 725			Data: p.tok.Data,
 726		})
 727		return true
 728	case DoctypeToken:
 729		// Ignore the token.
 730		return true
 731	}
 732
 733	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
 734	return false
 735}
 736
 737// Section 12.2.6.4.5.
 738func inHeadNoscriptIM(p *parser) bool {
 739	switch p.tok.Type {
 740	case DoctypeToken:
 741		// Ignore the token.
 742		return true
 743	case StartTagToken:
 744		switch p.tok.DataAtom {
 745		case a.Html:
 746			return inBodyIM(p)
 747		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
 748			return inHeadIM(p)
 749		case a.Head:
 750			// Ignore the token.
 751			return true
 752		case a.Noscript:
 753			// Don't let the tokenizer go into raw text mode even when a <noscript>
 754			// tag is in "in head noscript" insertion mode.
 755			p.tokenizer.NextIsNotRawText()
 756			// Ignore the token.
 757			return true
 758		}
 759	case EndTagToken:
 760		switch p.tok.DataAtom {
 761		case a.Noscript, a.Br:
 762		default:
 763			// Ignore the token.
 764			return true
 765		}
 766	case TextToken:
 767		s := strings.TrimLeft(p.tok.Data, whitespace)
 768		if len(s) == 0 {
 769			// It was all whitespace.
 770			return inHeadIM(p)
 771		}
 772	case CommentToken:
 773		return inHeadIM(p)
 774	}
 775	p.oe.pop()
 776	if p.top().DataAtom != a.Head {
 777		panic("html: the new current node will be a head element.")
 778	}
 779	p.im = inHeadIM
 780	if p.tok.DataAtom == a.Noscript {
 781		return true
 782	}
 783	return false
 784}
 785
 786// Section 12.2.6.4.6.
 787func afterHeadIM(p *parser) bool {
 788	switch p.tok.Type {
 789	case TextToken:
 790		s := strings.TrimLeft(p.tok.Data, whitespace)
 791		if len(s) < len(p.tok.Data) {
 792			// Add the initial whitespace to the current node.
 793			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
 794			if s == "" {
 795				return true
 796			}
 797			p.tok.Data = s
 798		}
 799	case StartTagToken:
 800		switch p.tok.DataAtom {
 801		case a.Html:
 802			return inBodyIM(p)
 803		case a.Body:
 804			p.addElement()
 805			p.framesetOK = false
 806			p.im = inBodyIM
 807			return true
 808		case a.Frameset:
 809			p.addElement()
 810			p.im = inFramesetIM
 811			return true
 812		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
 813			p.oe = append(p.oe, p.head)
 814			defer p.oe.remove(p.head)
 815			return inHeadIM(p)
 816		case a.Head:
 817			// Ignore the token.
 818			return true
 819		}
 820	case EndTagToken:
 821		switch p.tok.DataAtom {
 822		case a.Body, a.Html, a.Br:
 823			// Drop down to creating an implied <body> tag.
 824		case a.Template:
 825			return inHeadIM(p)
 826		default:
 827			// Ignore the token.
 828			return true
 829		}
 830	case CommentToken:
 831		p.addChild(&Node{
 832			Type: CommentNode,
 833			Data: p.tok.Data,
 834		})
 835		return true
 836	case DoctypeToken:
 837		// Ignore the token.
 838		return true
 839	}
 840
 841	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
 842	p.framesetOK = true
 843	return false
 844}
 845
 846// copyAttributes copies attributes of src not found on dst to dst.
 847func copyAttributes(dst *Node, src Token) {
 848	if len(src.Attr) == 0 {
 849		return
 850	}
 851	attr := map[string]string{}
 852	for _, t := range dst.Attr {
 853		attr[t.Key] = t.Val
 854	}
 855	for _, t := range src.Attr {
 856		if _, ok := attr[t.Key]; !ok {
 857			dst.Attr = append(dst.Attr, t)
 858			attr[t.Key] = t.Val
 859		}
 860	}
 861}
 862
 863// Section 12.2.6.4.7.
 864func inBodyIM(p *parser) bool {
 865	switch p.tok.Type {
 866	case TextToken:
 867		d := p.tok.Data
 868		switch n := p.oe.top(); n.DataAtom {
 869		case a.Pre, a.Listing:
 870			if n.FirstChild == nil {
 871				// Ignore a newline at the start of a <pre> block.
 872				if d != "" && d[0] == '\r' {
 873					d = d[1:]
 874				}
 875				if d != "" && d[0] == '\n' {
 876					d = d[1:]
 877				}
 878			}
 879		}
 880		d = strings.Replace(d, "\x00", "", -1)
 881		if d == "" {
 882			return true
 883		}
 884		p.reconstructActiveFormattingElements()
 885		p.addText(d)
 886		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
 887			// There were non-whitespace characters inserted.
 888			p.framesetOK = false
 889		}
 890	case StartTagToken:
 891		switch p.tok.DataAtom {
 892		case a.Html:
 893			if p.oe.contains(a.Template) {
 894				return true
 895			}
 896			copyAttributes(p.oe[0], p.tok)
 897		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
 898			return inHeadIM(p)
 899		case a.Body:
 900			if p.oe.contains(a.Template) {
 901				return true
 902			}
 903			if len(p.oe) >= 2 {
 904				body := p.oe[1]
 905				if body.Type == ElementNode && body.DataAtom == a.Body {
 906					p.framesetOK = false
 907					copyAttributes(body, p.tok)
 908				}
 909			}
 910		case a.Frameset:
 911			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
 912				// Ignore the token.
 913				return true
 914			}
 915			body := p.oe[1]
 916			if body.Parent != nil {
 917				body.Parent.RemoveChild(body)
 918			}
 919			p.oe = p.oe[:1]
 920			p.addElement()
 921			p.im = inFramesetIM
 922			return true
 923		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
 924			p.popUntil(buttonScope, a.P)
 925			p.addElement()
 926		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
 927			p.popUntil(buttonScope, a.P)
 928			switch n := p.top(); n.DataAtom {
 929			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
 930				p.oe.pop()
 931			}
 932			p.addElement()
 933		case a.Pre, a.Listing:
 934			p.popUntil(buttonScope, a.P)
 935			p.addElement()
 936			// The newline, if any, will be dealt with by the TextToken case.
 937			p.framesetOK = false
 938		case a.Form:
 939			if p.form != nil && !p.oe.contains(a.Template) {
 940				// Ignore the token
 941				return true
 942			}
 943			p.popUntil(buttonScope, a.P)
 944			p.addElement()
 945			if !p.oe.contains(a.Template) {
 946				p.form = p.top()
 947			}
 948		case a.Li:
 949			p.framesetOK = false
 950			for i := len(p.oe) - 1; i >= 0; i-- {
 951				node := p.oe[i]
 952				switch node.DataAtom {
 953				case a.Li:
 954					p.oe = p.oe[:i]
 955				case a.Address, a.Div, a.P:
 956					continue
 957				default:
 958					if !isSpecialElement(node) {
 959						continue
 960					}
 961				}
 962				break
 963			}
 964			p.popUntil(buttonScope, a.P)
 965			p.addElement()
 966		case a.Dd, a.Dt:
 967			p.framesetOK = false
 968			for i := len(p.oe) - 1; i >= 0; i-- {
 969				node := p.oe[i]
 970				switch node.DataAtom {
 971				case a.Dd, a.Dt:
 972					p.oe = p.oe[:i]
 973				case a.Address, a.Div, a.P:
 974					continue
 975				default:
 976					if !isSpecialElement(node) {
 977						continue
 978					}
 979				}
 980				break
 981			}
 982			p.popUntil(buttonScope, a.P)
 983			p.addElement()
 984		case a.Plaintext:
 985			p.popUntil(buttonScope, a.P)
 986			p.addElement()
 987		case a.Button:
 988			p.popUntil(defaultScope, a.Button)
 989			p.reconstructActiveFormattingElements()
 990			p.addElement()
 991			p.framesetOK = false
 992		case a.A:
 993			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
 994				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
 995					p.inBodyEndTagFormatting(a.A, "a")
 996					p.oe.remove(n)
 997					p.afe.remove(n)
 998					break
 999				}
1000			}
1001			p.reconstructActiveFormattingElements()
1002			p.addFormattingElement()
1003		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1004			p.reconstructActiveFormattingElements()
1005			p.addFormattingElement()
1006		case a.Nobr:
1007			p.reconstructActiveFormattingElements()
1008			if p.elementInScope(defaultScope, a.Nobr) {
1009				p.inBodyEndTagFormatting(a.Nobr, "nobr")
1010				p.reconstructActiveFormattingElements()
1011			}
1012			p.addFormattingElement()
1013		case a.Applet, a.Marquee, a.Object:
1014			p.reconstructActiveFormattingElements()
1015			p.addElement()
1016			p.afe = append(p.afe, &scopeMarker)
1017			p.framesetOK = false
1018		case a.Table:
1019			if !p.quirks {
1020				p.popUntil(buttonScope, a.P)
1021			}
1022			p.addElement()
1023			p.framesetOK = false
1024			p.im = inTableIM
1025			return true
1026		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
1027			p.reconstructActiveFormattingElements()
1028			p.addElement()
1029			p.oe.pop()
1030			p.acknowledgeSelfClosingTag()
1031			if p.tok.DataAtom == a.Input {
1032				for _, t := range p.tok.Attr {
1033					if t.Key == "type" {
1034						if strings.ToLower(t.Val) == "hidden" {
1035							// Skip setting framesetOK = false
1036							return true
1037						}
1038					}
1039				}
1040			}
1041			p.framesetOK = false
1042		case a.Param, a.Source, a.Track:
1043			p.addElement()
1044			p.oe.pop()
1045			p.acknowledgeSelfClosingTag()
1046		case a.Hr:
1047			p.popUntil(buttonScope, a.P)
1048			p.addElement()
1049			p.oe.pop()
1050			p.acknowledgeSelfClosingTag()
1051			p.framesetOK = false
1052		case a.Image:
1053			p.tok.DataAtom = a.Img
1054			p.tok.Data = a.Img.String()
1055			return false
1056		case a.Textarea:
1057			p.addElement()
1058			p.setOriginalIM()
1059			p.framesetOK = false
1060			p.im = textIM
1061		case a.Xmp:
1062			p.popUntil(buttonScope, a.P)
1063			p.reconstructActiveFormattingElements()
1064			p.framesetOK = false
1065			p.parseGenericRawTextElement()
1066		case a.Iframe:
1067			p.framesetOK = false
1068			p.parseGenericRawTextElement()
1069		case a.Noembed:
1070			p.parseGenericRawTextElement()
1071		case a.Noscript:
1072			if p.scripting {
1073				p.parseGenericRawTextElement()
1074				return true
1075			}
1076			p.reconstructActiveFormattingElements()
1077			p.addElement()
1078			// Don't let the tokenizer go into raw text mode when scripting is disabled.
1079			p.tokenizer.NextIsNotRawText()
1080		case a.Select:
1081			p.reconstructActiveFormattingElements()
1082			p.addElement()
1083			p.framesetOK = false
1084			p.im = inSelectIM
1085			return true
1086		case a.Optgroup, a.Option:
1087			if p.top().DataAtom == a.Option {
1088				p.oe.pop()
1089			}
1090			p.reconstructActiveFormattingElements()
1091			p.addElement()
1092		case a.Rb, a.Rtc:
1093			if p.elementInScope(defaultScope, a.Ruby) {
1094				p.generateImpliedEndTags()
1095			}
1096			p.addElement()
1097		case a.Rp, a.Rt:
1098			if p.elementInScope(defaultScope, a.Ruby) {
1099				p.generateImpliedEndTags("rtc")
1100			}
1101			p.addElement()
1102		case a.Math, a.Svg:
1103			p.reconstructActiveFormattingElements()
1104			if p.tok.DataAtom == a.Math {
1105				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1106			} else {
1107				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1108			}
1109			adjustForeignAttributes(p.tok.Attr)
1110			p.addElement()
1111			p.top().Namespace = p.tok.Data
1112			if p.hasSelfClosingToken {
1113				p.oe.pop()
1114				p.acknowledgeSelfClosingTag()
1115			}
1116			return true
1117		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1118			// Ignore the token.
1119		default:
1120			p.reconstructActiveFormattingElements()
1121			p.addElement()
1122		}
1123	case EndTagToken:
1124		switch p.tok.DataAtom {
1125		case a.Body:
1126			if p.elementInScope(defaultScope, a.Body) {
1127				p.im = afterBodyIM
1128			}
1129		case a.Html:
1130			if p.elementInScope(defaultScope, a.Body) {
1131				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1132				return false
1133			}
1134			return true
1135		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1136			p.popUntil(defaultScope, p.tok.DataAtom)
1137		case a.Form:
1138			if p.oe.contains(a.Template) {
1139				i := p.indexOfElementInScope(defaultScope, a.Form)
1140				if i == -1 {
1141					// Ignore the token.
1142					return true
1143				}
1144				p.generateImpliedEndTags()
1145				if p.oe[i].DataAtom != a.Form {
1146					// Ignore the token.
1147					return true
1148				}
1149				p.popUntil(defaultScope, a.Form)
1150			} else {
1151				node := p.form
1152				p.form = nil
1153				i := p.indexOfElementInScope(defaultScope, a.Form)
1154				if node == nil || i == -1 || p.oe[i] != node {
1155					// Ignore the token.
1156					return true
1157				}
1158				p.generateImpliedEndTags()
1159				p.oe.remove(node)
1160			}
1161		case a.P:
1162			if !p.elementInScope(buttonScope, a.P) {
1163				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1164			}
1165			p.popUntil(buttonScope, a.P)
1166		case a.Li:
1167			p.popUntil(listItemScope, a.Li)
1168		case a.Dd, a.Dt:
1169			p.popUntil(defaultScope, p.tok.DataAtom)
1170		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1171			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1172		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1173			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
1174		case a.Applet, a.Marquee, a.Object:
1175			if p.popUntil(defaultScope, p.tok.DataAtom) {
1176				p.clearActiveFormattingElements()
1177			}
1178		case a.Br:
1179			p.tok.Type = StartTagToken
1180			return false
1181		case a.Template:
1182			return inHeadIM(p)
1183		default:
1184			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
1185		}
1186	case CommentToken:
1187		p.addChild(&Node{
1188			Type: CommentNode,
1189			Data: p.tok.Data,
1190		})
1191	case ErrorToken:
1192		// TODO: remove this divergence from the HTML5 spec.
1193		if len(p.templateStack) > 0 {
1194			p.im = inTemplateIM
1195			return false
1196		}
1197		for _, e := range p.oe {
1198			switch e.DataAtom {
1199			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1200				a.Thead, a.Tr, a.Body, a.Html:
1201			default:
1202				return true
1203			}
1204		}
1205	}
1206
1207	return true
1208}
1209
1210func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
1211	// This is the "adoption agency" algorithm, described at
1212	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1213
1214	// TODO: this is a fairly literal line-by-line translation of that algorithm.
1215	// Once the code successfully parses the comprehensive test suite, we should
1216	// refactor this code to be more idiomatic.
1217
1218	// Steps 1-2
1219	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
1220		p.oe.pop()
1221		return
1222	}
1223
1224	// Steps 3-5. The outer loop.
1225	for i := 0; i < 8; i++ {
1226		// Step 6. Find the formatting element.
1227		var formattingElement *Node
1228		for j := len(p.afe) - 1; j >= 0; j-- {
1229			if p.afe[j].Type == scopeMarkerNode {
1230				break
1231			}
1232			if p.afe[j].DataAtom == tagAtom {
1233				formattingElement = p.afe[j]
1234				break
1235			}
1236		}
1237		if formattingElement == nil {
1238			p.inBodyEndTagOther(tagAtom, tagName)
1239			return
1240		}
1241
1242		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
1243		feIndex := p.oe.index(formattingElement)
1244		if feIndex == -1 {
1245			p.afe.remove(formattingElement)
1246			return
1247		}
1248		// Step 8. Ignore the tag if formatting element is not in the scope.
1249		if !p.elementInScope(defaultScope, tagAtom) {
1250			// Ignore the tag.
1251			return
1252		}
1253
1254		// Step 9. This step is omitted because it's just a parse error but no need to return.
1255
1256		// Steps 10-11. Find the furthest block.
1257		var furthestBlock *Node
1258		for _, e := range p.oe[feIndex:] {
1259			if isSpecialElement(e) {
1260				furthestBlock = e
1261				break
1262			}
1263		}
1264		if furthestBlock == nil {
1265			e := p.oe.pop()
1266			for e != formattingElement {
1267				e = p.oe.pop()
1268			}
1269			p.afe.remove(e)
1270			return
1271		}
1272
1273		// Steps 12-13. Find the common ancestor and bookmark node.
1274		commonAncestor := p.oe[feIndex-1]
1275		bookmark := p.afe.index(formattingElement)
1276
1277		// Step 14. The inner loop. Find the lastNode to reparent.
1278		lastNode := furthestBlock
1279		node := furthestBlock
1280		x := p.oe.index(node)
1281		// Step 14.1.
1282		j := 0
1283		for {
1284			// Step 14.2.
1285			j++
1286			// Step. 14.3.
1287			x--
1288			node = p.oe[x]
1289			// Step 14.4. Go to the next step if node is formatting element.
1290			if node == formattingElement {
1291				break
1292			}
1293			// Step 14.5. Remove node from the list of active formatting elements if
1294			// inner loop counter is greater than three and node is in the list of
1295			// active formatting elements.
1296			if ni := p.afe.index(node); j > 3 && ni > -1 {
1297				p.afe.remove(node)
1298				// If any element of the list of active formatting elements is removed,
1299				// we need to take care whether bookmark should be decremented or not.
1300				// This is because the value of bookmark may exceed the size of the
1301				// list by removing elements from the list.
1302				if ni <= bookmark {
1303					bookmark--
1304				}
1305				continue
1306			}
1307			// Step 14.6. Continue the next inner loop if node is not in the list of
1308			// active formatting elements.
1309			if p.afe.index(node) == -1 {
1310				p.oe.remove(node)
1311				continue
1312			}
1313			// Step 14.7.
1314			clone := node.clone()
1315			p.afe[p.afe.index(node)] = clone
1316			p.oe[p.oe.index(node)] = clone
1317			node = clone
1318			// Step 14.8.
1319			if lastNode == furthestBlock {
1320				bookmark = p.afe.index(node) + 1
1321			}
1322			// Step 14.9.
1323			if lastNode.Parent != nil {
1324				lastNode.Parent.RemoveChild(lastNode)
1325			}
1326			node.AppendChild(lastNode)
1327			// Step 14.10.
1328			lastNode = node
1329		}
1330
1331		// Step 15. Reparent lastNode to the common ancestor,
1332		// or for misnested table nodes, to the foster parent.
1333		if lastNode.Parent != nil {
1334			lastNode.Parent.RemoveChild(lastNode)
1335		}
1336		switch commonAncestor.DataAtom {
1337		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1338			p.fosterParent(lastNode)
1339		default:
1340			commonAncestor.AppendChild(lastNode)
1341		}
1342
1343		// Steps 16-18. Reparent nodes from the furthest block's children
1344		// to a clone of the formatting element.
1345		clone := formattingElement.clone()
1346		reparentChildren(clone, furthestBlock)
1347		furthestBlock.AppendChild(clone)
1348
1349		// Step 19. Fix up the list of active formatting elements.
1350		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1351			// Move the bookmark with the rest of the list.
1352			bookmark--
1353		}
1354		p.afe.remove(formattingElement)
1355		p.afe.insert(bookmark, clone)
1356
1357		// Step 20. Fix up the stack of open elements.
1358		p.oe.remove(formattingElement)
1359		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1360	}
1361}
1362
1363// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1364// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1365// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1366func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
1367	for i := len(p.oe) - 1; i >= 0; i-- {
1368		// Two element nodes have the same tag if they have the same Data (a
1369		// string-typed field). As an optimization, for common HTML tags, each
1370		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
1371		// field), since integer comparison is faster than string comparison.
1372		// Uncommon (custom) tags get a zero DataAtom.
1373		//
1374		// The if condition here is equivalent to (p.oe[i].Data == tagName).
1375		if (p.oe[i].DataAtom == tagAtom) &&
1376			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
1377			p.oe = p.oe[:i]
1378			break
1379		}
1380		if isSpecialElement(p.oe[i]) {
1381			break
1382		}
1383	}
1384}
1385
1386// Section 12.2.6.4.8.
1387func textIM(p *parser) bool {
1388	switch p.tok.Type {
1389	case ErrorToken:
1390		p.oe.pop()
1391	case TextToken:
1392		d := p.tok.Data
1393		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1394			// Ignore a newline at the start of a <textarea> block.
1395			if d != "" && d[0] == '\r' {
1396				d = d[1:]
1397			}
1398			if d != "" && d[0] == '\n' {
1399				d = d[1:]
1400			}
1401		}
1402		if d == "" {
1403			return true
1404		}
1405		p.addText(d)
1406		return true
1407	case EndTagToken:
1408		p.oe.pop()
1409	}
1410	p.im = p.originalIM
1411	p.originalIM = nil
1412	return p.tok.Type == EndTagToken
1413}
1414
1415// Section 12.2.6.4.9.
1416func inTableIM(p *parser) bool {
1417	switch p.tok.Type {
1418	case TextToken:
1419		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1420		switch p.oe.top().DataAtom {
1421		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1422			if strings.Trim(p.tok.Data, whitespace) == "" {
1423				p.addText(p.tok.Data)
1424				return true
1425			}
1426		}
1427	case StartTagToken:
1428		switch p.tok.DataAtom {
1429		case a.Caption:
1430			p.clearStackToContext(tableScope)
1431			p.afe = append(p.afe, &scopeMarker)
1432			p.addElement()
1433			p.im = inCaptionIM
1434			return true
1435		case a.Colgroup:
1436			p.clearStackToContext(tableScope)
1437			p.addElement()
1438			p.im = inColumnGroupIM
1439			return true
1440		case a.Col:
1441			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1442			return false
1443		case a.Tbody, a.Tfoot, a.Thead:
1444			p.clearStackToContext(tableScope)
1445			p.addElement()
1446			p.im = inTableBodyIM
1447			return true
1448		case a.Td, a.Th, a.Tr:
1449			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1450			return false
1451		case a.Table:
1452			if p.popUntil(tableScope, a.Table) {
1453				p.resetInsertionMode()
1454				return false
1455			}
1456			// Ignore the token.
1457			return true
1458		case a.Style, a.Script, a.Template:
1459			return inHeadIM(p)
1460		case a.Input:
1461			for _, t := range p.tok.Attr {
1462				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1463					p.addElement()
1464					p.oe.pop()
1465					return true
1466				}
1467			}
1468			// Otherwise drop down to the default action.
1469		case a.Form:
1470			if p.oe.contains(a.Template) || p.form != nil {
1471				// Ignore the token.
1472				return true
1473			}
1474			p.addElement()
1475			p.form = p.oe.pop()
1476		case a.Select:
1477			p.reconstructActiveFormattingElements()
1478			switch p.top().DataAtom {
1479			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1480				p.fosterParenting = true
1481			}
1482			p.addElement()
1483			p.fosterParenting = false
1484			p.framesetOK = false
1485			p.im = inSelectInTableIM
1486			return true
1487		}
1488	case EndTagToken:
1489		switch p.tok.DataAtom {
1490		case a.Table:
1491			if p.popUntil(tableScope, a.Table) {
1492				p.resetInsertionMode()
1493				return true
1494			}
1495			// Ignore the token.
1496			return true
1497		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1498			// Ignore the token.
1499			return true
1500		case a.Template:
1501			return inHeadIM(p)
1502		}
1503	case CommentToken:
1504		p.addChild(&Node{
1505			Type: CommentNode,
1506			Data: p.tok.Data,
1507		})
1508		return true
1509	case DoctypeToken:
1510		// Ignore the token.
1511		return true
1512	case ErrorToken:
1513		return inBodyIM(p)
1514	}
1515
1516	p.fosterParenting = true
1517	defer func() { p.fosterParenting = false }()
1518
1519	return inBodyIM(p)
1520}
1521
1522// Section 12.2.6.4.11.
1523func inCaptionIM(p *parser) bool {
1524	switch p.tok.Type {
1525	case StartTagToken:
1526		switch p.tok.DataAtom {
1527		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1528			if !p.popUntil(tableScope, a.Caption) {
1529				// Ignore the token.
1530				return true
1531			}
1532			p.clearActiveFormattingElements()
1533			p.im = inTableIM
1534			return false
1535		case a.Select:
1536			p.reconstructActiveFormattingElements()
1537			p.addElement()
1538			p.framesetOK = false
1539			p.im = inSelectInTableIM
1540			return true
1541		}
1542	case EndTagToken:
1543		switch p.tok.DataAtom {
1544		case a.Caption:
1545			if p.popUntil(tableScope, a.Caption) {
1546				p.clearActiveFormattingElements()
1547				p.im = inTableIM
1548			}
1549			return true
1550		case a.Table:
1551			if !p.popUntil(tableScope, a.Caption) {
1552				// Ignore the token.
1553				return true
1554			}
1555			p.clearActiveFormattingElements()
1556			p.im = inTableIM
1557			return false
1558		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1559			// Ignore the token.
1560			return true
1561		}
1562	}
1563	return inBodyIM(p)
1564}
1565
1566// Section 12.2.6.4.12.
1567func inColumnGroupIM(p *parser) bool {
1568	switch p.tok.Type {
1569	case TextToken:
1570		s := strings.TrimLeft(p.tok.Data, whitespace)
1571		if len(s) < len(p.tok.Data) {
1572			// Add the initial whitespace to the current node.
1573			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1574			if s == "" {
1575				return true
1576			}
1577			p.tok.Data = s
1578		}
1579	case CommentToken:
1580		p.addChild(&Node{
1581			Type: CommentNode,
1582			Data: p.tok.Data,
1583		})
1584		return true
1585	case DoctypeToken:
1586		// Ignore the token.
1587		return true
1588	case StartTagToken:
1589		switch p.tok.DataAtom {
1590		case a.Html:
1591			return inBodyIM(p)
1592		case a.Col:
1593			p.addElement()
1594			p.oe.pop()
1595			p.acknowledgeSelfClosingTag()
1596			return true
1597		case a.Template:
1598			return inHeadIM(p)
1599		}
1600	case EndTagToken:
1601		switch p.tok.DataAtom {
1602		case a.Colgroup:
1603			if p.oe.top().DataAtom == a.Colgroup {
1604				p.oe.pop()
1605				p.im = inTableIM
1606			}
1607			return true
1608		case a.Col:
1609			// Ignore the token.
1610			return true
1611		case a.Template:
1612			return inHeadIM(p)
1613		}
1614	case ErrorToken:
1615		return inBodyIM(p)
1616	}
1617	if p.oe.top().DataAtom != a.Colgroup {
1618		return true
1619	}
1620	p.oe.pop()
1621	p.im = inTableIM
1622	return false
1623}
1624
1625// Section 12.2.6.4.13.
1626func inTableBodyIM(p *parser) bool {
1627	switch p.tok.Type {
1628	case StartTagToken:
1629		switch p.tok.DataAtom {
1630		case a.Tr:
1631			p.clearStackToContext(tableBodyScope)
1632			p.addElement()
1633			p.im = inRowIM
1634			return true
1635		case a.Td, a.Th:
1636			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1637			return false
1638		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1639			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1640				p.im = inTableIM
1641				return false
1642			}
1643			// Ignore the token.
1644			return true
1645		}
1646	case EndTagToken:
1647		switch p.tok.DataAtom {
1648		case a.Tbody, a.Tfoot, a.Thead:
1649			if p.elementInScope(tableScope, p.tok.DataAtom) {
1650				p.clearStackToContext(tableBodyScope)
1651				p.oe.pop()
1652				p.im = inTableIM
1653			}
1654			return true
1655		case a.Table:
1656			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1657				p.im = inTableIM
1658				return false
1659			}
1660			// Ignore the token.
1661			return true
1662		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1663			// Ignore the token.
1664			return true
1665		}
1666	case CommentToken:
1667		p.addChild(&Node{
1668			Type: CommentNode,
1669			Data: p.tok.Data,
1670		})
1671		return true
1672	}
1673
1674	return inTableIM(p)
1675}
1676
1677// Section 12.2.6.4.14.
1678func inRowIM(p *parser) bool {
1679	switch p.tok.Type {
1680	case StartTagToken:
1681		switch p.tok.DataAtom {
1682		case a.Td, a.Th:
1683			p.clearStackToContext(tableRowScope)
1684			p.addElement()
1685			p.afe = append(p.afe, &scopeMarker)
1686			p.im = inCellIM
1687			return true
1688		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1689			if p.popUntil(tableScope, a.Tr) {
1690				p.im = inTableBodyIM
1691				return false
1692			}
1693			// Ignore the token.
1694			return true
1695		}
1696	case EndTagToken:
1697		switch p.tok.DataAtom {
1698		case a.Tr:
1699			if p.popUntil(tableScope, a.Tr) {
1700				p.im = inTableBodyIM
1701				return true
1702			}
1703			// Ignore the token.
1704			return true
1705		case a.Table:
1706			if p.popUntil(tableScope, a.Tr) {
1707				p.im = inTableBodyIM
1708				return false
1709			}
1710			// Ignore the token.
1711			return true
1712		case a.Tbody, a.Tfoot, a.Thead:
1713			if p.elementInScope(tableScope, p.tok.DataAtom) {
1714				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1715				return false
1716			}
1717			// Ignore the token.
1718			return true
1719		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1720			// Ignore the token.
1721			return true
1722		}
1723	}
1724
1725	return inTableIM(p)
1726}
1727
1728// Section 12.2.6.4.15.
1729func inCellIM(p *parser) bool {
1730	switch p.tok.Type {
1731	case StartTagToken:
1732		switch p.tok.DataAtom {
1733		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1734			if p.popUntil(tableScope, a.Td, a.Th) {
1735				// Close the cell and reprocess.
1736				p.clearActiveFormattingElements()
1737				p.im = inRowIM
1738				return false
1739			}
1740			// Ignore the token.
1741			return true
1742		case a.Select:
1743			p.reconstructActiveFormattingElements()
1744			p.addElement()
1745			p.framesetOK = false
1746			p.im = inSelectInTableIM
1747			return true
1748		}
1749	case EndTagToken:
1750		switch p.tok.DataAtom {
1751		case a.Td, a.Th:
1752			if !p.popUntil(tableScope, p.tok.DataAtom) {
1753				// Ignore the token.
1754				return true
1755			}
1756			p.clearActiveFormattingElements()
1757			p.im = inRowIM
1758			return true
1759		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1760			// Ignore the token.
1761			return true
1762		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1763			if !p.elementInScope(tableScope, p.tok.DataAtom) {
1764				// Ignore the token.
1765				return true
1766			}
1767			// Close the cell and reprocess.
1768			if p.popUntil(tableScope, a.Td, a.Th) {
1769				p.clearActiveFormattingElements()
1770			}
1771			p.im = inRowIM
1772			return false
1773		}
1774	}
1775	return inBodyIM(p)
1776}
1777
1778// Section 12.2.6.4.16.
1779func inSelectIM(p *parser) bool {
1780	switch p.tok.Type {
1781	case TextToken:
1782		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1783	case StartTagToken:
1784		switch p.tok.DataAtom {
1785		case a.Html:
1786			return inBodyIM(p)
1787		case a.Option:
1788			if p.top().DataAtom == a.Option {
1789				p.oe.pop()
1790			}
1791			p.addElement()
1792		case a.Optgroup:
1793			if p.top().DataAtom == a.Option {
1794				p.oe.pop()
1795			}
1796			if p.top().DataAtom == a.Optgroup {
1797				p.oe.pop()
1798			}
1799			p.addElement()
1800		case a.Select:
1801			if !p.popUntil(selectScope, a.Select) {
1802				// Ignore the token.
1803				return true
1804			}
1805			p.resetInsertionMode()
1806		case a.Input, a.Keygen, a.Textarea:
1807			if p.elementInScope(selectScope, a.Select) {
1808				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1809				return false
1810			}
1811			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
1812			p.tokenizer.NextIsNotRawText()
1813			// Ignore the token.
1814			return true
1815		case a.Script, a.Template:
1816			return inHeadIM(p)
1817		case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
1818			// Don't let the tokenizer go into raw text mode when there are raw tags
1819			// to be ignored. These tags should be ignored from the tokenizer
1820			// properly.
1821			p.tokenizer.NextIsNotRawText()
1822			// Ignore the token.
1823			return true
1824		}
1825	case EndTagToken:
1826		switch p.tok.DataAtom {
1827		case a.Option:
1828			if p.top().DataAtom == a.Option {
1829				p.oe.pop()
1830			}
1831		case a.Optgroup:
1832			i := len(p.oe) - 1
1833			if p.oe[i].DataAtom == a.Option {
1834				i--
1835			}
1836			if p.oe[i].DataAtom == a.Optgroup {
1837				p.oe = p.oe[:i]
1838			}
1839		case a.Select:
1840			if !p.popUntil(selectScope, a.Select) {
1841				// Ignore the token.
1842				return true
1843			}
1844			p.resetInsertionMode()
1845		case a.Template:
1846			return inHeadIM(p)
1847		}
1848	case CommentToken:
1849		p.addChild(&Node{
1850			Type: CommentNode,
1851			Data: p.tok.Data,
1852		})
1853	case DoctypeToken:
1854		// Ignore the token.
1855		return true
1856	case ErrorToken:
1857		return inBodyIM(p)
1858	}
1859
1860	return true
1861}
1862
1863// Section 12.2.6.4.17.
1864func inSelectInTableIM(p *parser) bool {
1865	switch p.tok.Type {
1866	case StartTagToken, EndTagToken:
1867		switch p.tok.DataAtom {
1868		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1869			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
1870				// Ignore the token.
1871				return true
1872			}
1873			// This is like p.popUntil(selectScope, a.Select), but it also
1874			// matches <math select>, not just <select>. Matching the MathML
1875			// tag is arguably incorrect (conceptually), but it mimics what
1876			// Chromium does.
1877			for i := len(p.oe) - 1; i >= 0; i-- {
1878				if n := p.oe[i]; n.DataAtom == a.Select {
1879					p.oe = p.oe[:i]
1880					break
1881				}
1882			}
1883			p.resetInsertionMode()
1884			return false
1885		}
1886	}
1887	return inSelectIM(p)
1888}
1889
1890// Section 12.2.6.4.18.
1891func inTemplateIM(p *parser) bool {
1892	switch p.tok.Type {
1893	case TextToken, CommentToken, DoctypeToken:
1894		return inBodyIM(p)
1895	case StartTagToken:
1896		switch p.tok.DataAtom {
1897		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1898			return inHeadIM(p)
1899		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1900			p.templateStack.pop()
1901			p.templateStack = append(p.templateStack, inTableIM)
1902			p.im = inTableIM
1903			return false
1904		case a.Col:
1905			p.templateStack.pop()
1906			p.templateStack = append(p.templateStack, inColumnGroupIM)
1907			p.im = inColumnGroupIM
1908			return false
1909		case a.Tr:
1910			p.templateStack.pop()
1911			p.templateStack = append(p.templateStack, inTableBodyIM)
1912			p.im = inTableBodyIM
1913			return false
1914		case a.Td, a.Th:
1915			p.templateStack.pop()
1916			p.templateStack = append(p.templateStack, inRowIM)
1917			p.im = inRowIM
1918			return false
1919		default:
1920			p.templateStack.pop()
1921			p.templateStack = append(p.templateStack, inBodyIM)
1922			p.im = inBodyIM
1923			return false
1924		}
1925	case EndTagToken:
1926		switch p.tok.DataAtom {
1927		case a.Template:
1928			return inHeadIM(p)
1929		default:
1930			// Ignore the token.
1931			return true
1932		}
1933	case ErrorToken:
1934		if !p.oe.contains(a.Template) {
1935			// Ignore the token.
1936			return true
1937		}
1938		// TODO: remove this divergence from the HTML5 spec.
1939		//
1940		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1941		p.generateImpliedEndTags()
1942		for i := len(p.oe) - 1; i >= 0; i-- {
1943			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1944				p.oe = p.oe[:i]
1945				break
1946			}
1947		}
1948		p.clearActiveFormattingElements()
1949		p.templateStack.pop()
1950		p.resetInsertionMode()
1951		return false
1952	}
1953	return false
1954}
1955
1956// Section 12.2.6.4.19.
1957func afterBodyIM(p *parser) bool {
1958	switch p.tok.Type {
1959	case ErrorToken:
1960		// Stop parsing.
1961		return true
1962	case TextToken:
1963		s := strings.TrimLeft(p.tok.Data, whitespace)
1964		if len(s) == 0 {
1965			// It was all whitespace.
1966			return inBodyIM(p)
1967		}
1968	case StartTagToken:
1969		if p.tok.DataAtom == a.Html {
1970			return inBodyIM(p)
1971		}
1972	case EndTagToken:
1973		if p.tok.DataAtom == a.Html {
1974			if !p.fragment {
1975				p.im = afterAfterBodyIM
1976			}
1977			return true
1978		}
1979	case CommentToken:
1980		// The comment is attached to the <html> element.
1981		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1982			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1983		}
1984		p.oe[0].AppendChild(&Node{
1985			Type: CommentNode,
1986			Data: p.tok.Data,
1987		})
1988		return true
1989	}
1990	p.im = inBodyIM
1991	return false
1992}
1993
1994// Section 12.2.6.4.20.
1995func inFramesetIM(p *parser) bool {
1996	switch p.tok.Type {
1997	case CommentToken:
1998		p.addChild(&Node{
1999			Type: CommentNode,
2000			Data: p.tok.Data,
2001		})
2002	case TextToken:
2003		// Ignore all text but whitespace.
2004		s := strings.Map(func(c rune) rune {
2005			switch c {
2006			case ' ', '\t', '\n', '\f', '\r':
2007				return c
2008			}
2009			return -1
2010		}, p.tok.Data)
2011		if s != "" {
2012			p.addText(s)
2013		}
2014	case StartTagToken:
2015		switch p.tok.DataAtom {
2016		case a.Html:
2017			return inBodyIM(p)
2018		case a.Frameset:
2019			p.addElement()
2020		case a.Frame:
2021			p.addElement()
2022			p.oe.pop()
2023			p.acknowledgeSelfClosingTag()
2024		case a.Noframes:
2025			return inHeadIM(p)
2026		}
2027	case EndTagToken:
2028		switch p.tok.DataAtom {
2029		case a.Frameset:
2030			if p.oe.top().DataAtom != a.Html {
2031				p.oe.pop()
2032				if p.oe.top().DataAtom != a.Frameset {
2033					p.im = afterFramesetIM
2034					return true
2035				}
2036			}
2037		}
2038	default:
2039		// Ignore the token.
2040	}
2041	return true
2042}
2043
2044// Section 12.2.6.4.21.
2045func afterFramesetIM(p *parser) bool {
2046	switch p.tok.Type {
2047	case CommentToken:
2048		p.addChild(&Node{
2049			Type: CommentNode,
2050			Data: p.tok.Data,
2051		})
2052	case TextToken:
2053		// Ignore all text but whitespace.
2054		s := strings.Map(func(c rune) rune {
2055			switch c {
2056			case ' ', '\t', '\n', '\f', '\r':
2057				return c
2058			}
2059			return -1
2060		}, p.tok.Data)
2061		if s != "" {
2062			p.addText(s)
2063		}
2064	case StartTagToken:
2065		switch p.tok.DataAtom {
2066		case a.Html:
2067			return inBodyIM(p)
2068		case a.Noframes:
2069			return inHeadIM(p)
2070		}
2071	case EndTagToken:
2072		switch p.tok.DataAtom {
2073		case a.Html:
2074			p.im = afterAfterFramesetIM
2075			return true
2076		}
2077	default:
2078		// Ignore the token.
2079	}
2080	return true
2081}
2082
2083// Section 12.2.6.4.22.
2084func afterAfterBodyIM(p *parser) bool {
2085	switch p.tok.Type {
2086	case ErrorToken:
2087		// Stop parsing.
2088		return true
2089	case TextToken:
2090		s := strings.TrimLeft(p.tok.Data, whitespace)
2091		if len(s) == 0 {
2092			// It was all whitespace.
2093			return inBodyIM(p)
2094		}
2095	case StartTagToken:
2096		if p.tok.DataAtom == a.Html {
2097			return inBodyIM(p)
2098		}
2099	case CommentToken:
2100		p.doc.AppendChild(&Node{
2101			Type: CommentNode,
2102			Data: p.tok.Data,
2103		})
2104		return true
2105	case DoctypeToken:
2106		return inBodyIM(p)
2107	}
2108	p.im = inBodyIM
2109	return false
2110}
2111
2112// Section 12.2.6.4.23.
2113func afterAfterFramesetIM(p *parser) bool {
2114	switch p.tok.Type {
2115	case CommentToken:
2116		p.doc.AppendChild(&Node{
2117			Type: CommentNode,
2118			Data: p.tok.Data,
2119		})
2120	case TextToken:
2121		// Ignore all text but whitespace.
2122		s := strings.Map(func(c rune) rune {
2123			switch c {
2124			case ' ', '\t', '\n', '\f', '\r':
2125				return c
2126			}
2127			return -1
2128		}, p.tok.Data)
2129		if s != "" {
2130			p.tok.Data = s
2131			return inBodyIM(p)
2132		}
2133	case StartTagToken:
2134		switch p.tok.DataAtom {
2135		case a.Html:
2136			return inBodyIM(p)
2137		case a.Noframes:
2138			return inHeadIM(p)
2139		}
2140	case DoctypeToken:
2141		return inBodyIM(p)
2142	default:
2143		// Ignore the token.
2144	}
2145	return true
2146}
2147
2148func ignoreTheRemainingTokens(p *parser) bool {
2149	return true
2150}
2151
2152const whitespaceOrNUL = whitespace + "\x00"
2153
2154// Section 12.2.6.5
2155func parseForeignContent(p *parser) bool {
2156	switch p.tok.Type {
2157	case TextToken:
2158		if p.framesetOK {
2159			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2160		}
2161		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2162		p.addText(p.tok.Data)
2163	case CommentToken:
2164		p.addChild(&Node{
2165			Type: CommentNode,
2166			Data: p.tok.Data,
2167		})
2168	case StartTagToken:
2169		if !p.fragment {
2170			b := breakout[p.tok.Data]
2171			if p.tok.DataAtom == a.Font {
2172			loop:
2173				for _, attr := range p.tok.Attr {
2174					switch attr.Key {
2175					case "color", "face", "size":
2176						b = true
2177						break loop
2178					}
2179				}
2180			}
2181			if b {
2182				for i := len(p.oe) - 1; i >= 0; i-- {
2183					n := p.oe[i]
2184					if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2185						p.oe = p.oe[:i+1]
2186						break
2187					}
2188				}
2189				return false
2190			}
2191		}
2192		current := p.adjustedCurrentNode()
2193		switch current.Namespace {
2194		case "math":
2195			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2196		case "svg":
2197			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
2198			// SVG wants e.g. "foreignObject" with a capital second "O".
2199			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2200				p.tok.DataAtom = a.Lookup([]byte(x))
2201				p.tok.Data = x
2202			}
2203			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2204		default:
2205			panic("html: bad parser state: unexpected namespace")
2206		}
2207		adjustForeignAttributes(p.tok.Attr)
2208		namespace := current.Namespace
2209		p.addElement()
2210		p.top().Namespace = namespace
2211		if namespace != "" {
2212			// Don't let the tokenizer go into raw text mode in foreign content
2213			// (e.g. in an SVG <title> tag).
2214			p.tokenizer.NextIsNotRawText()
2215		}
2216		if p.hasSelfClosingToken {
2217			p.oe.pop()
2218			p.acknowledgeSelfClosingTag()
2219		}
2220	case EndTagToken:
2221		for i := len(p.oe) - 1; i >= 0; i-- {
2222			if p.oe[i].Namespace == "" {
2223				return p.im(p)
2224			}
2225			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2226				p.oe = p.oe[:i]
2227				break
2228			}
2229		}
2230		return true
2231	default:
2232		// Ignore the token.
2233	}
2234	return true
2235}
2236
2237// Section 12.2.4.2.
2238func (p *parser) adjustedCurrentNode() *Node {
2239	if len(p.oe) == 1 && p.fragment && p.context != nil {
2240		return p.context
2241	}
2242	return p.oe.top()
2243}
2244
2245// Section 12.2.6.
2246func (p *parser) inForeignContent() bool {
2247	if len(p.oe) == 0 {
2248		return false
2249	}
2250	n := p.adjustedCurrentNode()
2251	if n.Namespace == "" {
2252		return false
2253	}
2254	if mathMLTextIntegrationPoint(n) {
2255		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2256			return false
2257		}
2258		if p.tok.Type == TextToken {
2259			return false
2260		}
2261	}
2262	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2263		return false
2264	}
2265	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2266		return false
2267	}
2268	if p.tok.Type == ErrorToken {
2269		return false
2270	}
2271	return true
2272}
2273
2274// parseImpliedToken parses a token as though it had appeared in the parser's
2275// input.
2276func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2277	realToken, selfClosing := p.tok, p.hasSelfClosingToken
2278	p.tok = Token{
2279		Type:     t,
2280		DataAtom: dataAtom,
2281		Data:     data,
2282	}
2283	p.hasSelfClosingToken = false
2284	p.parseCurrentToken()
2285	p.tok, p.hasSelfClosingToken = realToken, selfClosing
2286}
2287
2288// parseCurrentToken runs the current token through the parsing routines
2289// until it is consumed.
2290func (p *parser) parseCurrentToken() {
2291	if p.tok.Type == SelfClosingTagToken {
2292		p.hasSelfClosingToken = true
2293		p.tok.Type = StartTagToken
2294	}
2295
2296	consumed := false
2297	for !consumed {
2298		if p.inForeignContent() {
2299			consumed = parseForeignContent(p)
2300		} else {
2301			consumed = p.im(p)
2302		}
2303	}
2304
2305	if p.hasSelfClosingToken {
2306		// This is a parse error, but ignore it.
2307		p.hasSelfClosingToken = false
2308	}
2309}
2310
2311func (p *parser) parse() error {
2312	// Iterate until EOF. Any other error will cause an early return.
2313	var err error
2314	for err != io.EOF {
2315		// CDATA sections are allowed only in foreign content.
2316		n := p.oe.top()
2317		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2318		// Read and parse the next token.
2319		p.tokenizer.Next()
2320		p.tok = p.tokenizer.Token()
2321		if p.tok.Type == ErrorToken {
2322			err = p.tokenizer.Err()
2323			if err != nil && err != io.EOF {
2324				return err
2325			}
2326		}
2327		p.parseCurrentToken()
2328	}
2329	return nil
2330}
2331
2332// Parse returns the parse tree for the HTML from the given Reader.
2333//
2334// It implements the HTML5 parsing algorithm
2335// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2336// which is very complicated. The resultant tree can contain implicitly created
2337// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2338// differ from the nesting implied by a naive processing of start and end
2339// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2340// with no corresponding node in the resulting tree.
2341//
2342// The input is assumed to be UTF-8 encoded.
2343func Parse(r io.Reader) (*Node, error) {
2344	return ParseWithOptions(r)
2345}
2346
2347// ParseFragment parses a fragment of HTML and returns the nodes that were
2348// found. If the fragment is the InnerHTML for an existing element, pass that
2349// element in context.
2350//
2351// It has the same intricacies as Parse.
2352func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2353	return ParseFragmentWithOptions(r, context)
2354}
2355
2356// ParseOption configures a parser.
2357type ParseOption func(p *parser)
2358
2359// ParseOptionEnableScripting configures the scripting flag.
2360// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
2361//
2362// By default, scripting is enabled.
2363func ParseOptionEnableScripting(enable bool) ParseOption {
2364	return func(p *parser) {
2365		p.scripting = enable
2366	}
2367}
2368
2369// ParseWithOptions is like Parse, with options.
2370func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
2371	p := &parser{
2372		tokenizer: NewTokenizer(r),
2373		doc: &Node{
2374			Type: DocumentNode,
2375		},
2376		scripting:  true,
2377		framesetOK: true,
2378		im:         initialIM,
2379	}
2380
2381	for _, f := range opts {
2382		f(p)
2383	}
2384
2385	if err := p.parse(); err != nil {
2386		return nil, err
2387	}
2388	return p.doc, nil
2389}
2390
2391// ParseFragmentWithOptions is like ParseFragment, with options.
2392func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
2393	contextTag := ""
2394	if context != nil {
2395		if context.Type != ElementNode {
2396			return nil, errors.New("html: ParseFragment of non-element Node")
2397		}
2398		// The next check isn't just context.DataAtom.String() == context.Data because
2399		// it is valid to pass an element whose tag isn't a known atom. For example,
2400		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2401		if context.DataAtom != a.Lookup([]byte(context.Data)) {
2402			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2403		}
2404		contextTag = context.DataAtom.String()
2405	}
2406	p := &parser{
2407		doc: &Node{
2408			Type: DocumentNode,
2409		},
2410		scripting: true,
2411		fragment:  true,
2412		context:   context,
2413	}
2414	if context != nil && context.Namespace != "" {
2415		p.tokenizer = NewTokenizer(r)
2416	} else {
2417		p.tokenizer = NewTokenizerFragment(r, contextTag)
2418	}
2419
2420	for _, f := range opts {
2421		f(p)
2422	}
2423
2424	root := &Node{
2425		Type:     ElementNode,
2426		DataAtom: a.Html,
2427		Data:     a.Html.String(),
2428	}
2429	p.doc.AppendChild(root)
2430	p.oe = nodeStack{root}
2431	if context != nil && context.DataAtom == a.Template {
2432		p.templateStack = append(p.templateStack, inTemplateIM)
2433	}
2434	p.resetInsertionMode()
2435
2436	for n := context; n != nil; n = n.Parent {
2437		if n.Type == ElementNode && n.DataAtom == a.Form {
2438			p.form = n
2439			break
2440		}
2441	}
2442
2443	if err := p.parse(); err != nil {
2444		return nil, err
2445	}
2446
2447	parent := p.doc
2448	if context != nil {
2449		parent = root
2450	}
2451
2452	var result []*Node
2453	for c := parent.FirstChild; c != nil; {
2454		next := c.NextSibling
2455		parent.RemoveChild(c)
2456		result = append(result, c)
2457		c = next
2458	}
2459	return result, nil
2460}