1package css
2
3import (
4 "bytes"
5 "fmt"
6 "strconv"
7
8 "github.com/tdewolff/parse/v2"
9 "github.com/tdewolff/parse/v2/buffer"
10)
11
12var wsBytes = []byte(" ")
13var endBytes = []byte("}")
14var emptyBytes = []byte("")
15
16// GrammarType determines the type of grammar.
17type GrammarType uint32
18
19// GrammarType values.
20const (
21 ErrorGrammar GrammarType = iota // extra token when errors occur
22 CommentGrammar
23 AtRuleGrammar
24 BeginAtRuleGrammar
25 EndAtRuleGrammar
26 QualifiedRuleGrammar
27 BeginRulesetGrammar
28 EndRulesetGrammar
29 DeclarationGrammar
30 TokenGrammar
31 CustomPropertyGrammar
32)
33
34// String returns the string representation of a GrammarType.
35func (tt GrammarType) String() string {
36 switch tt {
37 case ErrorGrammar:
38 return "Error"
39 case CommentGrammar:
40 return "Comment"
41 case AtRuleGrammar:
42 return "AtRule"
43 case BeginAtRuleGrammar:
44 return "BeginAtRule"
45 case EndAtRuleGrammar:
46 return "EndAtRule"
47 case QualifiedRuleGrammar:
48 return "QualifiedRule"
49 case BeginRulesetGrammar:
50 return "BeginRuleset"
51 case EndRulesetGrammar:
52 return "EndRuleset"
53 case DeclarationGrammar:
54 return "Declaration"
55 case TokenGrammar:
56 return "Token"
57 case CustomPropertyGrammar:
58 return "CustomProperty"
59 }
60 return "Invalid(" + strconv.Itoa(int(tt)) + ")"
61}
62
63////////////////////////////////////////////////////////////////
64
65// State is the state function the parser currently is in.
66type State func(*Parser) GrammarType
67
68// Token is a single TokenType and its associated data.
69type Token struct {
70 TokenType
71 Data []byte
72}
73
74func (t Token) String() string {
75 return t.TokenType.String() + "('" + string(t.Data) + "')"
76}
77
78// Parser is the state for the parser.
79type Parser struct {
80 l *Lexer
81 state []State
82 err string
83 errPos int
84
85 buf []Token
86 level int
87
88 data []byte
89 tt TokenType
90 keepWS bool
91 prevWS bool
92 prevEnd bool
93 prevComment bool
94}
95
96// NewParser returns a new CSS parser from an io.Reader. isInline specifies whether this is an inline style attribute.
97func NewParser(r *parse.Input, isInline bool) *Parser {
98 l := NewLexer(r)
99 p := &Parser{
100 l: l,
101 state: make([]State, 0, 4),
102 }
103
104 if isInline {
105 p.state = append(p.state, (*Parser).parseDeclarationList)
106 } else {
107 p.state = append(p.state, (*Parser).parseStylesheet)
108 }
109 return p
110}
111
112// HasParseError returns true if there is a parse error (and not a read error).
113func (p *Parser) HasParseError() bool {
114 return p.err != ""
115}
116
117// Err returns the error encountered during parsing, this is often io.EOF but also other errors can be returned.
118func (p *Parser) Err() error {
119 if p.err != "" {
120 r := buffer.NewReader(p.l.r.Bytes())
121 return parse.NewError(r, p.errPos, p.err)
122 }
123 return p.l.Err()
124}
125
126// Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message.
127func (p *Parser) Next() (GrammarType, TokenType, []byte) {
128 p.err = ""
129
130 if p.prevEnd {
131 p.tt, p.data = RightBraceToken, endBytes
132 p.prevEnd = false
133 } else {
134 p.tt, p.data = p.popToken(true)
135 }
136 gt := p.state[len(p.state)-1](p)
137 return gt, p.tt, p.data
138}
139
140// Offset return offset for current Grammar
141func (p *Parser) Offset() int {
142 return p.l.r.Offset()
143}
144
145// Values returns a slice of Tokens for the last Grammar. Only AtRuleGrammar, BeginAtRuleGrammar, BeginRulesetGrammar and Declaration will return the at-rule components, ruleset selector and declaration values respectively.
146func (p *Parser) Values() []Token {
147 return p.buf
148}
149
150func (p *Parser) popToken(allowComment bool) (TokenType, []byte) {
151 p.prevWS = false
152 p.prevComment = false
153 tt, data := p.l.Next()
154 for !p.keepWS && tt == WhitespaceToken || tt == CommentToken {
155 if tt == WhitespaceToken {
156 p.prevWS = true
157 } else {
158 p.prevComment = true
159 if allowComment && len(p.state) == 1 {
160 break
161 }
162 }
163 tt, data = p.l.Next()
164 }
165 return tt, data
166}
167
168func (p *Parser) initBuf() {
169 p.buf = p.buf[:0]
170}
171
172func (p *Parser) pushBuf(tt TokenType, data []byte) {
173 p.buf = append(p.buf, Token{tt, data})
174}
175
176////////////////////////////////////////////////////////////////
177
178func (p *Parser) parseStylesheet() GrammarType {
179 if p.tt == CDOToken || p.tt == CDCToken {
180 return TokenGrammar
181 } else if p.tt == AtKeywordToken {
182 return p.parseAtRule()
183 } else if p.tt == CommentToken {
184 return CommentGrammar
185 } else if p.tt == ErrorToken {
186 return ErrorGrammar
187 }
188 return p.parseQualifiedRule()
189}
190
191func (p *Parser) parseDeclarationList() GrammarType {
192 if p.tt == CommentToken {
193 p.tt, p.data = p.popToken(false)
194 }
195 for p.tt == SemicolonToken {
196 p.tt, p.data = p.popToken(false)
197 }
198
199 // IE hack: *color:red;
200 if p.tt == DelimToken && p.data[0] == '*' {
201 tt, data := p.popToken(false)
202 p.tt = tt
203 p.data = append(p.data, data...)
204 }
205
206 if p.tt == ErrorToken {
207 return ErrorGrammar
208 } else if p.tt == AtKeywordToken {
209 return p.parseAtRule()
210 } else if p.tt == IdentToken || p.tt == DelimToken {
211 return p.parseDeclaration()
212 } else if p.tt == CustomPropertyNameToken {
213 return p.parseCustomProperty()
214 }
215
216 // parse error
217 p.initBuf()
218 p.l.r.Move(-len(p.data))
219 p.err, p.errPos = fmt.Sprintf("CSS parse error: unexpected token '%s' in declaration", string(p.data)), p.l.r.Offset()
220 p.l.r.Move(len(p.data))
221
222 if p.tt == RightBraceToken {
223 // right brace token will occur when we've had a decl error that ended in a right brace token
224 // as these are not handled by decl error, we handle it here explicitly. Normally its used to end eg. the qual rule.
225 p.pushBuf(p.tt, p.data)
226 return ErrorGrammar
227 }
228 return p.parseDeclarationError(p.tt, p.data)
229}
230
231////////////////////////////////////////////////////////////////
232
233func (p *Parser) parseAtRule() GrammarType {
234 p.initBuf()
235 p.data = parse.ToLower(parse.Copy(p.data))
236 atRuleName := p.data
237 if len(atRuleName) > 0 && atRuleName[1] == '-' {
238 if i := bytes.IndexByte(atRuleName[2:], '-'); i != -1 {
239 atRuleName = atRuleName[i+2:] // skip vendor specific prefix
240 }
241 }
242 atRule := ToHash(atRuleName[1:])
243
244 first := true
245 skipWS := false
246 for {
247 tt, data := p.popToken(false)
248 if tt == LeftBraceToken && p.level == 0 {
249 if atRule == Font_Face || atRule == Page {
250 p.state = append(p.state, (*Parser).parseAtRuleDeclarationList)
251 } else if atRule == Document || atRule == Keyframes || atRule == Media || atRule == Supports {
252 p.state = append(p.state, (*Parser).parseAtRuleRuleList)
253 } else {
254 p.state = append(p.state, (*Parser).parseAtRuleUnknown)
255 }
256 return BeginAtRuleGrammar
257 } else if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
258 p.prevEnd = (tt == RightBraceToken)
259 return AtRuleGrammar
260 } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
261 p.level++
262 } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
263 p.level--
264 }
265 if first {
266 if tt == LeftParenthesisToken || tt == LeftBracketToken {
267 p.prevWS = false
268 }
269 first = false
270 }
271 if len(data) == 1 && (data[0] == ',' || data[0] == ':') {
272 skipWS = true
273 } else if p.prevWS && !skipWS && tt != RightParenthesisToken {
274 p.pushBuf(WhitespaceToken, wsBytes)
275 } else {
276 skipWS = false
277 }
278 if tt == LeftParenthesisToken {
279 skipWS = true
280 }
281 p.pushBuf(tt, data)
282 }
283}
284
285func (p *Parser) parseAtRuleRuleList() GrammarType {
286 if p.tt == RightBraceToken || p.tt == ErrorToken {
287 p.state = p.state[:len(p.state)-1]
288 return EndAtRuleGrammar
289 } else if p.tt == AtKeywordToken {
290 return p.parseAtRule()
291 } else {
292 return p.parseQualifiedRule()
293 }
294}
295
296func (p *Parser) parseAtRuleDeclarationList() GrammarType {
297 for p.tt == SemicolonToken {
298 p.tt, p.data = p.popToken(false)
299 }
300 if p.tt == RightBraceToken || p.tt == ErrorToken {
301 p.state = p.state[:len(p.state)-1]
302 return EndAtRuleGrammar
303 }
304 return p.parseDeclarationList()
305}
306
307func (p *Parser) parseAtRuleUnknown() GrammarType {
308 p.keepWS = true
309 if p.tt == RightBraceToken && p.level == 0 || p.tt == ErrorToken {
310 p.state = p.state[:len(p.state)-1]
311 p.keepWS = false
312 return EndAtRuleGrammar
313 }
314 if p.tt == LeftParenthesisToken || p.tt == LeftBraceToken || p.tt == LeftBracketToken || p.tt == FunctionToken {
315 p.level++
316 } else if p.tt == RightParenthesisToken || p.tt == RightBraceToken || p.tt == RightBracketToken {
317 p.level--
318 }
319 return TokenGrammar
320}
321
322func (p *Parser) parseQualifiedRule() GrammarType {
323 p.initBuf()
324 first := true
325 inAttrSel := false
326 skipWS := true
327 var tt TokenType
328 var data []byte
329 for {
330 if first {
331 tt, data = p.tt, p.data
332 p.tt = WhitespaceToken
333 p.data = emptyBytes
334 first = false
335 } else {
336 tt, data = p.popToken(false)
337 }
338 if tt == LeftBraceToken && p.level == 0 {
339 p.state = append(p.state, (*Parser).parseQualifiedRuleDeclarationList)
340 return BeginRulesetGrammar
341 } else if tt == ErrorToken {
342 p.err, p.errPos = "CSS parse error: unexpected ending in qualified rule", p.l.r.Offset()
343 return ErrorGrammar
344 } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
345 p.level++
346 } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
347 p.level--
348 }
349 if len(data) == 1 && (data[0] == ',' || data[0] == '>' || data[0] == '+' || data[0] == '~') {
350 if data[0] == ',' {
351 return QualifiedRuleGrammar
352 }
353 skipWS = true
354 } else if p.prevWS && !skipWS && !inAttrSel {
355 p.pushBuf(WhitespaceToken, wsBytes)
356 } else {
357 skipWS = false
358 }
359 if tt == LeftBracketToken {
360 inAttrSel = true
361 } else if tt == RightBracketToken {
362 inAttrSel = false
363 }
364 p.pushBuf(tt, data)
365 }
366}
367
368func (p *Parser) parseQualifiedRuleDeclarationList() GrammarType {
369 for p.tt == SemicolonToken {
370 p.tt, p.data = p.popToken(false)
371 }
372 if p.tt == RightBraceToken || p.tt == ErrorToken {
373 p.state = p.state[:len(p.state)-1]
374 return EndRulesetGrammar
375 }
376 return p.parseDeclarationList()
377}
378
379func (p *Parser) parseDeclaration() GrammarType {
380 p.initBuf()
381 p.data = parse.ToLower(parse.Copy(p.data))
382
383 ttName, dataName := p.tt, p.data
384 tt, data := p.popToken(false)
385 if tt != ColonToken {
386 p.l.r.Move(-len(data))
387 p.err, p.errPos = "CSS parse error: expected colon in declaration", p.l.r.Offset()
388 p.l.r.Move(len(data))
389 p.pushBuf(ttName, dataName)
390 return p.parseDeclarationError(tt, data)
391 }
392
393 skipWS := true
394 for {
395 tt, data := p.popToken(false)
396 if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
397 p.prevEnd = (tt == RightBraceToken)
398 return DeclarationGrammar
399 } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
400 p.level++
401 } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
402 p.level--
403 }
404 if len(data) == 1 && (data[0] == ',' || data[0] == '/' || data[0] == ':' || data[0] == '!' || data[0] == '=') {
405 skipWS = true
406 } else if (p.prevWS || p.prevComment) && !skipWS {
407 p.pushBuf(WhitespaceToken, wsBytes)
408 } else {
409 skipWS = false
410 }
411 p.pushBuf(tt, data)
412 }
413}
414
415func (p *Parser) parseDeclarationError(tt TokenType, data []byte) GrammarType {
416 // we're on the offending (tt,data), keep popping tokens till we reach ;, }, or EOF
417 p.tt, p.data = tt, data
418 for {
419 if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
420 p.prevEnd = (tt == RightBraceToken)
421 if tt == SemicolonToken {
422 p.pushBuf(tt, data)
423 }
424 return ErrorGrammar
425 } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
426 p.level++
427 } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
428 p.level--
429 }
430
431 if p.prevWS {
432 p.pushBuf(WhitespaceToken, wsBytes)
433 }
434 p.pushBuf(tt, data)
435
436 tt, data = p.popToken(false)
437 }
438}
439
440func (p *Parser) parseCustomProperty() GrammarType {
441 p.initBuf()
442 if tt, data := p.popToken(false); tt != ColonToken {
443 p.l.r.Move(-len(data))
444 p.err, p.errPos = "CSS parse error: expected colon in custom property", p.l.r.Offset()
445 p.l.r.Move(len(data))
446 return ErrorGrammar
447 }
448 val := []byte{}
449 for {
450 tt, data := p.l.Next()
451 if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken {
452 p.prevEnd = (tt == RightBraceToken)
453 p.pushBuf(CustomPropertyValueToken, val)
454 return CustomPropertyGrammar
455 } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
456 p.level++
457 } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
458 p.level--
459 }
460 val = append(val, data...)
461 }
462}