1package css
  2
  3import (
  4	"bytes"
  5	"fmt"
  6	"strconv"
  7
  8	"github.com/tdewolff/parse/v2"
  9	"github.com/tdewolff/parse/v2/buffer"
 10)
 11
 12var wsBytes = []byte(" ")
 13var endBytes = []byte("}")
 14var emptyBytes = []byte("")
 15
 16// GrammarType determines the type of grammar.
 17type GrammarType uint32
 18
 19// GrammarType values.
 20const (
 21	ErrorGrammar GrammarType = iota // extra token when errors occur
 22	CommentGrammar
 23	AtRuleGrammar
 24	BeginAtRuleGrammar
 25	EndAtRuleGrammar
 26	QualifiedRuleGrammar
 27	BeginRulesetGrammar
 28	EndRulesetGrammar
 29	DeclarationGrammar
 30	TokenGrammar
 31	CustomPropertyGrammar
 32)
 33
 34// String returns the string representation of a GrammarType.
 35func (tt GrammarType) String() string {
 36	switch tt {
 37	case ErrorGrammar:
 38		return "Error"
 39	case CommentGrammar:
 40		return "Comment"
 41	case AtRuleGrammar:
 42		return "AtRule"
 43	case BeginAtRuleGrammar:
 44		return "BeginAtRule"
 45	case EndAtRuleGrammar:
 46		return "EndAtRule"
 47	case QualifiedRuleGrammar:
 48		return "QualifiedRule"
 49	case BeginRulesetGrammar:
 50		return "BeginRuleset"
 51	case EndRulesetGrammar:
 52		return "EndRuleset"
 53	case DeclarationGrammar:
 54		return "Declaration"
 55	case TokenGrammar:
 56		return "Token"
 57	case CustomPropertyGrammar:
 58		return "CustomProperty"
 59	}
 60	return "Invalid(" + strconv.Itoa(int(tt)) + ")"
 61}
 62
 63////////////////////////////////////////////////////////////////
 64
 65// State is the state function the parser currently is in.
 66type State func(*Parser) GrammarType
 67
 68// Token is a single TokenType and its associated data.
 69type Token struct {
 70	TokenType
 71	Data []byte
 72}
 73
 74func (t Token) String() string {
 75	return t.TokenType.String() + "('" + string(t.Data) + "')"
 76}
 77
 78// Parser is the state for the parser.
 79type Parser struct {
 80	l      *Lexer
 81	state  []State
 82	err    string
 83	errPos int
 84
 85	buf   []Token
 86	level int
 87
 88	data        []byte
 89	tt          TokenType
 90	keepWS      bool
 91	prevWS      bool
 92	prevEnd     bool
 93	prevComment bool
 94}
 95
 96// NewParser returns a new CSS parser from an io.Reader. isInline specifies whether this is an inline style attribute.
 97func NewParser(r *parse.Input, isInline bool) *Parser {
 98	l := NewLexer(r)
 99	p := &Parser{
100		l:     l,
101		state: make([]State, 0, 4),
102	}
103
104	if isInline {
105		p.state = append(p.state, (*Parser).parseDeclarationList)
106	} else {
107		p.state = append(p.state, (*Parser).parseStylesheet)
108	}
109	return p
110}
111
112// HasParseError returns true if there is a parse error (and not a read error).
113func (p *Parser) HasParseError() bool {
114	return p.err != ""
115}
116
117// Err returns the error encountered during parsing, this is often io.EOF but also other errors can be returned.
118func (p *Parser) Err() error {
119	if p.err != "" {
120		r := buffer.NewReader(p.l.r.Bytes())
121		return parse.NewError(r, p.errPos, p.err)
122	}
123	return p.l.Err()
124}
125
126// Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message.
127func (p *Parser) Next() (GrammarType, TokenType, []byte) {
128	p.err = ""
129
130	if p.prevEnd {
131		p.tt, p.data = RightBraceToken, endBytes
132		p.prevEnd = false
133	} else {
134		p.tt, p.data = p.popToken(true)
135	}
136	gt := p.state[len(p.state)-1](p)
137	return gt, p.tt, p.data
138}
139
140// Offset return offset for current Grammar
141func (p *Parser) Offset() int {
142	return p.l.r.Offset()
143}
144
145// Values returns a slice of Tokens for the last Grammar. Only AtRuleGrammar, BeginAtRuleGrammar, BeginRulesetGrammar and Declaration will return the at-rule components, ruleset selector and declaration values respectively.
146func (p *Parser) Values() []Token {
147	return p.buf
148}
149
150func (p *Parser) popToken(allowComment bool) (TokenType, []byte) {
151	p.prevWS = false
152	p.prevComment = false
153	tt, data := p.l.Next()
154	for !p.keepWS && tt == WhitespaceToken || tt == CommentToken {
155		if tt == WhitespaceToken {
156			p.prevWS = true
157		} else {
158			p.prevComment = true
159			if allowComment && len(p.state) == 1 {
160				break
161			}
162		}
163		tt, data = p.l.Next()
164	}
165	return tt, data
166}
167
168func (p *Parser) initBuf() {
169	p.buf = p.buf[:0]
170}
171
172func (p *Parser) pushBuf(tt TokenType, data []byte) {
173	p.buf = append(p.buf, Token{tt, data})
174}
175
176////////////////////////////////////////////////////////////////
177
178func (p *Parser) parseStylesheet() GrammarType {
179	if p.tt == CDOToken || p.tt == CDCToken {
180		return TokenGrammar
181	} else if p.tt == AtKeywordToken {
182		return p.parseAtRule()
183	} else if p.tt == CommentToken {
184		return CommentGrammar
185	} else if p.tt == ErrorToken {
186		return ErrorGrammar
187	}
188	return p.parseQualifiedRule()
189}
190
191func (p *Parser) parseDeclarationList() GrammarType {
192	if p.tt == CommentToken {
193		p.tt, p.data = p.popToken(false)
194	}
195	for p.tt == SemicolonToken {
196		p.tt, p.data = p.popToken(false)
197	}
198
199	// IE hack: *color:red;
200	if p.tt == DelimToken && p.data[0] == '*' {
201		tt, data := p.popToken(false)
202		p.tt = tt
203		p.data = append(p.data, data...)
204	}
205
206	if p.tt == ErrorToken {
207		return ErrorGrammar
208	} else if p.tt == AtKeywordToken {
209		return p.parseAtRule()
210	} else if p.tt == IdentToken || p.tt == DelimToken {
211		return p.parseDeclaration()
212	} else if p.tt == CustomPropertyNameToken {
213		return p.parseCustomProperty()
214	}
215
216	// parse error
217	p.initBuf()
218	p.l.r.Move(-len(p.data))
219	p.err, p.errPos = fmt.Sprintf("CSS parse error: unexpected token '%s' in declaration", string(p.data)), p.l.r.Offset()
220	p.l.r.Move(len(p.data))
221
222	if p.tt == RightBraceToken {
223		// right brace token will occur when we've had a decl error that ended in a right brace token
224		// as these are not handled by decl error, we handle it here explicitly. Normally its used to end eg. the qual rule.
225		p.pushBuf(p.tt, p.data)
226		return ErrorGrammar
227	}
228	return p.parseDeclarationError(p.tt, p.data)
229}
230
231////////////////////////////////////////////////////////////////
232
233func (p *Parser) parseAtRule() GrammarType {
234	p.initBuf()
235	p.data = parse.ToLower(parse.Copy(p.data))
236	atRuleName := p.data
237	if len(atRuleName) > 0 && atRuleName[1] == '-' {
238		if i := bytes.IndexByte(atRuleName[2:], '-'); i != -1 {
239			atRuleName = atRuleName[i+2:] // skip vendor specific prefix
240		}
241	}
242	atRule := ToHash(atRuleName[1:])
243
244	first := true
245	skipWS := false
246	for {
247		tt, data := p.popToken(false)
248		if tt == LeftBraceToken && p.level == 0 {
249			if atRule == Font_Face || atRule == Page {
250				p.state = append(p.state, (*Parser).parseAtRuleDeclarationList)
251			} else if atRule == Document || atRule == Keyframes || atRule == Media || atRule == Supports {
252				p.state = append(p.state, (*Parser).parseAtRuleRuleList)
253			} else {
254				p.state = append(p.state, (*Parser).parseAtRuleUnknown)
255			}
256			return BeginAtRuleGrammar
257		} else if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
258			p.prevEnd = (tt == RightBraceToken)
259			return AtRuleGrammar
260		} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
261			p.level++
262		} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
263			p.level--
264		}
265		if first {
266			if tt == LeftParenthesisToken || tt == LeftBracketToken {
267				p.prevWS = false
268			}
269			first = false
270		}
271		if len(data) == 1 && (data[0] == ',' || data[0] == ':') {
272			skipWS = true
273		} else if p.prevWS && !skipWS && tt != RightParenthesisToken {
274			p.pushBuf(WhitespaceToken, wsBytes)
275		} else {
276			skipWS = false
277		}
278		if tt == LeftParenthesisToken {
279			skipWS = true
280		}
281		p.pushBuf(tt, data)
282	}
283}
284
285func (p *Parser) parseAtRuleRuleList() GrammarType {
286	if p.tt == RightBraceToken || p.tt == ErrorToken {
287		p.state = p.state[:len(p.state)-1]
288		return EndAtRuleGrammar
289	} else if p.tt == AtKeywordToken {
290		return p.parseAtRule()
291	} else {
292		return p.parseQualifiedRule()
293	}
294}
295
296func (p *Parser) parseAtRuleDeclarationList() GrammarType {
297	for p.tt == SemicolonToken {
298		p.tt, p.data = p.popToken(false)
299	}
300	if p.tt == RightBraceToken || p.tt == ErrorToken {
301		p.state = p.state[:len(p.state)-1]
302		return EndAtRuleGrammar
303	}
304	return p.parseDeclarationList()
305}
306
307func (p *Parser) parseAtRuleUnknown() GrammarType {
308	p.keepWS = true
309	if p.tt == RightBraceToken && p.level == 0 || p.tt == ErrorToken {
310		p.state = p.state[:len(p.state)-1]
311		p.keepWS = false
312		return EndAtRuleGrammar
313	}
314	if p.tt == LeftParenthesisToken || p.tt == LeftBraceToken || p.tt == LeftBracketToken || p.tt == FunctionToken {
315		p.level++
316	} else if p.tt == RightParenthesisToken || p.tt == RightBraceToken || p.tt == RightBracketToken {
317		p.level--
318	}
319	return TokenGrammar
320}
321
322func (p *Parser) parseQualifiedRule() GrammarType {
323	p.initBuf()
324	first := true
325	inAttrSel := false
326	skipWS := true
327	var tt TokenType
328	var data []byte
329	for {
330		if first {
331			tt, data = p.tt, p.data
332			p.tt = WhitespaceToken
333			p.data = emptyBytes
334			first = false
335		} else {
336			tt, data = p.popToken(false)
337		}
338		if tt == LeftBraceToken && p.level == 0 {
339			p.state = append(p.state, (*Parser).parseQualifiedRuleDeclarationList)
340			return BeginRulesetGrammar
341		} else if tt == ErrorToken {
342			p.err, p.errPos = "CSS parse error: unexpected ending in qualified rule", p.l.r.Offset()
343			return ErrorGrammar
344		} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
345			p.level++
346		} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
347			p.level--
348		}
349		if len(data) == 1 && (data[0] == ',' || data[0] == '>' || data[0] == '+' || data[0] == '~') {
350			if data[0] == ',' {
351				return QualifiedRuleGrammar
352			}
353			skipWS = true
354		} else if p.prevWS && !skipWS && !inAttrSel {
355			p.pushBuf(WhitespaceToken, wsBytes)
356		} else {
357			skipWS = false
358		}
359		if tt == LeftBracketToken {
360			inAttrSel = true
361		} else if tt == RightBracketToken {
362			inAttrSel = false
363		}
364		p.pushBuf(tt, data)
365	}
366}
367
368func (p *Parser) parseQualifiedRuleDeclarationList() GrammarType {
369	for p.tt == SemicolonToken {
370		p.tt, p.data = p.popToken(false)
371	}
372	if p.tt == RightBraceToken || p.tt == ErrorToken {
373		p.state = p.state[:len(p.state)-1]
374		return EndRulesetGrammar
375	}
376	return p.parseDeclarationList()
377}
378
379func (p *Parser) parseDeclaration() GrammarType {
380	p.initBuf()
381	p.data = parse.ToLower(parse.Copy(p.data))
382
383	ttName, dataName := p.tt, p.data
384	tt, data := p.popToken(false)
385	if tt != ColonToken {
386		p.l.r.Move(-len(data))
387		p.err, p.errPos = "CSS parse error: expected colon in declaration", p.l.r.Offset()
388		p.l.r.Move(len(data))
389		p.pushBuf(ttName, dataName)
390		return p.parseDeclarationError(tt, data)
391	}
392
393	skipWS := true
394	for {
395		tt, data := p.popToken(false)
396		if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
397			p.prevEnd = (tt == RightBraceToken)
398			return DeclarationGrammar
399		} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
400			p.level++
401		} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
402			p.level--
403		}
404		if len(data) == 1 && (data[0] == ',' || data[0] == '/' || data[0] == ':' || data[0] == '!' || data[0] == '=') {
405			skipWS = true
406		} else if (p.prevWS || p.prevComment) && !skipWS {
407			p.pushBuf(WhitespaceToken, wsBytes)
408		} else {
409			skipWS = false
410		}
411		p.pushBuf(tt, data)
412	}
413}
414
415func (p *Parser) parseDeclarationError(tt TokenType, data []byte) GrammarType {
416	// we're on the offending (tt,data), keep popping tokens till we reach ;, }, or EOF
417	p.tt, p.data = tt, data
418	for {
419		if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
420			p.prevEnd = (tt == RightBraceToken)
421			if tt == SemicolonToken {
422				p.pushBuf(tt, data)
423			}
424			return ErrorGrammar
425		} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
426			p.level++
427		} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
428			p.level--
429		}
430
431		if p.prevWS {
432			p.pushBuf(WhitespaceToken, wsBytes)
433		}
434		p.pushBuf(tt, data)
435
436		tt, data = p.popToken(false)
437	}
438}
439
440func (p *Parser) parseCustomProperty() GrammarType {
441	p.initBuf()
442	if tt, data := p.popToken(false); tt != ColonToken {
443		p.l.r.Move(-len(data))
444		p.err, p.errPos = "CSS parse error: expected colon in custom property", p.l.r.Offset()
445		p.l.r.Move(len(data))
446		return ErrorGrammar
447	}
448	val := []byte{}
449	for {
450		tt, data := p.l.Next()
451		if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
452			p.prevEnd = (tt == RightBraceToken)
453			p.pushBuf(CustomPropertyValueToken, val)
454			return CustomPropertyGrammar
455		} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
456			p.level++
457		} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
458			p.level--
459		}
460		val = append(val, data...)
461	}
462}