aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/tdewolff/minify/v2/html/html.go
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2024-10-25 00:47:47 +0200
committerMitja Felicijan <mitja.felicijan@gmail.com>2024-10-25 00:47:47 +0200
commitc6cc0108ca7738023b45e0eeac0fa2390532dd93 (patch)
tree36890e6cd3091bbab8efbe686cc56f467f645bfd /vendor/github.com/tdewolff/minify/v2/html/html.go
parent0130404a1dc663d4aa68d780c9bcb23a4243e68d (diff)
downloadjbmafp-c6cc0108ca7738023b45e0eeac0fa2390532dd93.tar.gz
Added vendor lock on depsHEADmaster
Diffstat (limited to 'vendor/github.com/tdewolff/minify/v2/html/html.go')
-rw-r--r--vendor/github.com/tdewolff/minify/v2/html/html.go514
1 files changed, 514 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/minify/v2/html/html.go b/vendor/github.com/tdewolff/minify/v2/html/html.go
new file mode 100644
index 0000000..616a9ba
--- /dev/null
+++ b/vendor/github.com/tdewolff/minify/v2/html/html.go
@@ -0,0 +1,514 @@
1// Package html minifies HTML5 following the specifications at http://www.w3.org/TR/html5/syntax.html.
2package html
3
4import (
5 "bytes"
6 "io"
7
8 "github.com/tdewolff/minify/v2"
9 "github.com/tdewolff/parse/v2"
10 "github.com/tdewolff/parse/v2/buffer"
11 "github.com/tdewolff/parse/v2/html"
12)
13
14var (
15 gtBytes = []byte(">")
16 isBytes = []byte("=")
17 spaceBytes = []byte(" ")
18 doctypeBytes = []byte("<!doctype html>")
19 jsMimeBytes = []byte("application/javascript")
20 cssMimeBytes = []byte("text/css")
21 htmlMimeBytes = []byte("text/html")
22 svgMimeBytes = []byte("image/svg+xml")
23 formMimeBytes = []byte("application/x-www-form-urlencoded")
24 mathMimeBytes = []byte("application/mathml+xml")
25 dataSchemeBytes = []byte("data:")
26 jsSchemeBytes = []byte("javascript:")
27 httpBytes = []byte("http")
28 radioBytes = []byte("radio")
29 onBytes = []byte("on")
30 textBytes = []byte("text")
31 noneBytes = []byte("none")
32 submitBytes = []byte("submit")
33 allBytes = []byte("all")
34 rectBytes = []byte("rect")
35 dataBytes = []byte("data")
36 getBytes = []byte("get")
37 autoBytes = []byte("auto")
38 oneBytes = []byte("one")
39 inlineParams = map[string]string{"inline": "1"}
40)
41
42////////////////////////////////////////////////////////////////
43
44// Minifier is an HTML minifier.
45type Minifier struct {
46 KeepComments bool
47 KeepConditionalComments bool
48 KeepDefaultAttrVals bool
49 KeepDocumentTags bool
50 KeepEndTags bool
51 KeepQuotes bool
52 KeepWhitespace bool
53}
54
55// Minify minifies HTML data, it reads from r and writes to w.
56func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
57 return (&Minifier{}).Minify(m, w, r, params)
58}
59
60// Minify minifies HTML data, it reads from r and writes to w.
61func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
62 var rawTagHash Hash
63 var rawTagMediatype []byte
64
65 omitSpace := true // if true the next leading space is omitted
66 inPre := false
67
68 attrMinifyBuffer := buffer.NewWriter(make([]byte, 0, 64))
69 attrByteBuffer := make([]byte, 0, 64)
70
71 z := parse.NewInput(r)
72 defer z.Restore()
73
74 l := html.NewLexer(z)
75 tb := NewTokenBuffer(z, l)
76 for {
77 t := *tb.Shift()
78 switch t.TokenType {
79 case html.ErrorToken:
80 if _, err := w.Write(nil); err != nil {
81 return err
82 }
83 if l.Err() == io.EOF {
84 return nil
85 }
86 return l.Err()
87 case html.DoctypeToken:
88 w.Write(doctypeBytes)
89 case html.CommentToken:
90 if o.KeepComments {
91 w.Write(t.Data)
92 } else if o.KeepConditionalComments && 6 < len(t.Text) && (bytes.HasPrefix(t.Text, []byte("[if ")) || bytes.HasSuffix(t.Text, []byte("[endif]")) || bytes.HasSuffix(t.Text, []byte("[endif]--"))) {
93 // [if ...] is always 7 or more characters, [endif] is only encountered for downlevel-revealed
94 // see https://msdn.microsoft.com/en-us/library/ms537512(v=vs.85).aspx#syntax
95 if bytes.HasPrefix(t.Data, []byte("<!--[if ")) && bytes.HasSuffix(t.Data, []byte("<![endif]-->")) { // downlevel-hidden
96 begin := bytes.IndexByte(t.Data, '>') + 1
97 end := len(t.Data) - len("<![endif]-->")
98 w.Write(t.Data[:begin])
99 if err := o.Minify(m, w, buffer.NewReader(t.Data[begin:end]), nil); err != nil {
100 return minify.UpdateErrorPosition(err, z, t.Offset)
101 }
102 w.Write(t.Data[end:])
103 } else {
104 w.Write(t.Data) // downlevel-revealed or short downlevel-hidden
105 }
106 } else if 1 < len(t.Text) && t.Text[0] == '#' {
107 // SSI tags
108 w.Write(t.Data)
109 }
110 case html.SvgToken:
111 if err := m.MinifyMimetype(svgMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil {
112 if err != minify.ErrNotExist {
113 return minify.UpdateErrorPosition(err, z, t.Offset)
114 }
115 w.Write(t.Data)
116 }
117 case html.MathToken:
118 if err := m.MinifyMimetype(mathMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil {
119 if err != minify.ErrNotExist {
120 return minify.UpdateErrorPosition(err, z, t.Offset)
121 }
122 w.Write(t.Data)
123 }
124 case html.TextToken:
125 // CSS and JS minifiers for inline code
126 if rawTagHash != 0 {
127 if rawTagHash == Style || rawTagHash == Script || rawTagHash == Iframe {
128 var mimetype []byte
129 var params map[string]string
130 if rawTagHash == Iframe {
131 mimetype = htmlMimeBytes
132 } else if len(rawTagMediatype) > 0 {
133 mimetype, params = parse.Mediatype(rawTagMediatype)
134 } else if rawTagHash == Script {
135 mimetype = jsMimeBytes
136 } else if rawTagHash == Style {
137 mimetype = cssMimeBytes
138 }
139 if err := m.MinifyMimetype(mimetype, w, buffer.NewReader(t.Data), params); err != nil {
140 if err != minify.ErrNotExist {
141 return minify.UpdateErrorPosition(err, z, t.Offset)
142 }
143 w.Write(t.Data)
144 }
145 } else {
146 w.Write(t.Data)
147 }
148 } else if inPre {
149 w.Write(t.Data)
150 } else {
151 t.Data = parse.ReplaceMultipleWhitespaceAndEntities(t.Data, EntitiesMap, TextRevEntitiesMap)
152
153 // whitespace removal; trim left
154 if omitSpace && parse.IsWhitespace(t.Data[0]) {
155 t.Data = t.Data[1:]
156 }
157
158 // whitespace removal; trim right
159 omitSpace = false
160 if len(t.Data) == 0 {
161 omitSpace = true
162 } else if parse.IsWhitespace(t.Data[len(t.Data)-1]) {
163 omitSpace = true
164 i := 0
165 for {
166 next := tb.Peek(i)
167 // trim if EOF, text token with leading whitespace or block token
168 if next.TokenType == html.ErrorToken {
169 t.Data = t.Data[:len(t.Data)-1]
170 omitSpace = false
171 break
172 } else if next.TokenType == html.TextToken {
173 // this only happens when a comment, doctype or phrasing end tag (only for !o.KeepWhitespace) was in between
174 // remove if the text token starts with a whitespace
175 if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) {
176 t.Data = t.Data[:len(t.Data)-1]
177 omitSpace = false
178 }
179 break
180 } else if next.TokenType == html.StartTagToken || next.TokenType == html.EndTagToken {
181 if o.KeepWhitespace {
182 break
183 }
184 // remove when followed up by a block tag
185 if next.Traits&nonPhrasingTag != 0 {
186 t.Data = t.Data[:len(t.Data)-1]
187 omitSpace = false
188 break
189 } else if next.TokenType == html.StartTagToken {
190 break
191 }
192 }
193 i++
194 }
195 }
196
197 w.Write(t.Data)
198 }
199 case html.StartTagToken, html.EndTagToken:
200 rawTagHash = 0
201 hasAttributes := false
202 if t.TokenType == html.StartTagToken {
203 if next := tb.Peek(0); next.TokenType == html.AttributeToken {
204 hasAttributes = true
205 }
206 if t.Traits&rawTag != 0 {
207 // ignore empty script and style tags
208 if !hasAttributes && (t.Hash == Script || t.Hash == Style) {
209 if next := tb.Peek(1); next.TokenType == html.EndTagToken {
210 tb.Shift()
211 tb.Shift()
212 break
213 }
214 }
215 rawTagHash = t.Hash
216 rawTagMediatype = nil
217
218 // do not minify content of <style amp-boilerplate>
219 if hasAttributes && t.Hash == Style {
220 if attrs := tb.Attributes(Amp_Boilerplate); attrs[0] != nil {
221 rawTagHash = 0
222 }
223 }
224 }
225 } else if t.Hash == Template {
226 omitSpace = true // EndTagToken
227 }
228
229 if t.Hash == Pre {
230 inPre = t.TokenType == html.StartTagToken
231 }
232
233 // remove superfluous tags, except for html, head and body tags when KeepDocumentTags is set
234 if !hasAttributes && (!o.KeepDocumentTags && (t.Hash == Html || t.Hash == Head || t.Hash == Body) || t.Hash == Colgroup) {
235 break
236 } else if t.TokenType == html.EndTagToken {
237 omitEndTag := false
238 if !o.KeepEndTags {
239 if t.Hash == Thead || t.Hash == Tbody || t.Hash == Tfoot || t.Hash == Tr || t.Hash == Th ||
240 t.Hash == Td || t.Hash == Option || t.Hash == Dd || t.Hash == Dt || t.Hash == Li ||
241 t.Hash == Rb || t.Hash == Rt || t.Hash == Rtc || t.Hash == Rp {
242 omitEndTag = true // omit end tags
243 } else if t.Hash == P {
244 i := 0
245 for {
246 next := tb.Peek(i)
247 i++
248 // continue if text token is empty or whitespace
249 if next.TokenType == html.TextToken && parse.IsAllWhitespace(next.Data) {
250 continue
251 }
252 if next.TokenType == html.ErrorToken || next.TokenType == html.EndTagToken && next.Traits&keepPTag == 0 || next.TokenType == html.StartTagToken && next.Traits&omitPTag != 0 {
253 omitEndTag = true // omit p end tag
254 }
255 break
256 }
257 } else if t.Hash == Optgroup {
258 i := 0
259 for {
260 next := tb.Peek(i)
261 i++
262 // continue if text token
263 if next.TokenType == html.TextToken {
264 continue
265 }
266 if next.TokenType == html.ErrorToken || next.Hash != Option {
267 omitEndTag = true // omit optgroup end tag
268 }
269 break
270 }
271 }
272 }
273
274 if t.Traits&nonPhrasingTag != 0 {
275 omitSpace = true // omit spaces after block elements
276 } else if o.KeepWhitespace || t.Traits&objectTag != 0 {
277 omitSpace = false
278 }
279
280 if !omitEndTag {
281 if len(t.Data) > 3+len(t.Text) {
282 t.Data[2+len(t.Text)] = '>'
283 t.Data = t.Data[:3+len(t.Text)]
284 }
285 w.Write(t.Data)
286 }
287
288 // skip text in select and optgroup tags
289 if t.Hash == Option || t.Hash == Optgroup {
290 if next := tb.Peek(0); next.TokenType == html.TextToken {
291 tb.Shift()
292 }
293 }
294 break
295 }
296
297 if o.KeepWhitespace || t.Traits&objectTag != 0 {
298 omitSpace = false
299 } else if t.Traits&nonPhrasingTag != 0 {
300 omitSpace = true // omit spaces after block elements
301 }
302
303 w.Write(t.Data)
304
305 if hasAttributes {
306 if t.Hash == Meta {
307 attrs := tb.Attributes(Content, Http_Equiv, Charset, Name)
308 if content := attrs[0]; content != nil {
309 if httpEquiv := attrs[1]; httpEquiv != nil {
310 httpEquiv.AttrVal = parse.TrimWhitespace(httpEquiv.AttrVal)
311 if charset := attrs[2]; charset == nil && parse.EqualFold(httpEquiv.AttrVal, []byte("content-type")) {
312 content.AttrVal = minify.Mediatype(content.AttrVal)
313 if bytes.Equal(content.AttrVal, []byte("text/html;charset=utf-8")) {
314 httpEquiv.Text = nil
315 content.Text = []byte("charset")
316 content.Hash = Charset
317 content.AttrVal = []byte("utf-8")
318 }
319 }
320 }
321 if name := attrs[3]; name != nil {
322 name.AttrVal = parse.TrimWhitespace(name.AttrVal)
323 if parse.EqualFold(name.AttrVal, []byte("keywords")) {
324 content.AttrVal = bytes.ReplaceAll(content.AttrVal, []byte(", "), []byte(","))
325 } else if parse.EqualFold(name.AttrVal, []byte("viewport")) {
326 content.AttrVal = bytes.ReplaceAll(content.AttrVal, []byte(" "), []byte(""))
327 for i := 0; i < len(content.AttrVal); i++ {
328 if content.AttrVal[i] == '=' && i+2 < len(content.AttrVal) {
329 i++
330 if n := parse.Number(content.AttrVal[i:]); n > 0 {
331 minNum := minify.Number(content.AttrVal[i:i+n], -1)
332 if len(minNum) < n {
333 copy(content.AttrVal[i:i+len(minNum)], minNum)
334 copy(content.AttrVal[i+len(minNum):], content.AttrVal[i+n:])
335 content.AttrVal = content.AttrVal[:len(content.AttrVal)+len(minNum)-n]
336 }
337 i += len(minNum)
338 }
339 i-- // mitigate for-loop increase
340 }
341 }
342 }
343 }
344 }
345 } else if t.Hash == Script {
346 attrs := tb.Attributes(Src, Charset)
347 if attrs[0] != nil && attrs[1] != nil {
348 attrs[1].Text = nil
349 }
350 } else if t.Hash == Input {
351 attrs := tb.Attributes(Type, Value)
352 if t, value := attrs[0], attrs[1]; t != nil && value != nil {
353 isRadio := parse.EqualFold(t.AttrVal, radioBytes)
354 if !isRadio && len(value.AttrVal) == 0 {
355 value.Text = nil
356 } else if isRadio && parse.EqualFold(value.AttrVal, onBytes) {
357 value.Text = nil
358 }
359 }
360 } else if t.Hash == A {
361 attrs := tb.Attributes(Id, Name)
362 if id, name := attrs[0], attrs[1]; id != nil && name != nil {
363 if bytes.Equal(id.AttrVal, name.AttrVal) {
364 name.Text = nil
365 }
366 }
367 }
368
369 // write attributes
370 for {
371 attr := *tb.Shift()
372 if attr.TokenType != html.AttributeToken {
373 break
374 } else if attr.Text == nil {
375 continue // removed attribute
376 }
377
378 val := attr.AttrVal
379 if attr.Traits&trimAttr != 0 {
380 val = parse.ReplaceMultipleWhitespaceAndEntities(val, EntitiesMap, nil)
381 val = parse.TrimWhitespace(val)
382 } else {
383 val = parse.ReplaceEntities(val, EntitiesMap, nil)
384 }
385 if t.Traits != 0 {
386 if len(val) == 0 && (attr.Hash == Class ||
387 attr.Hash == Dir ||
388 attr.Hash == Id ||
389 attr.Hash == Name ||
390 attr.Hash == Action && t.Hash == Form) {
391 continue // omit empty attribute values
392 }
393 if attr.Traits&caselessAttr != 0 {
394 val = parse.ToLower(val)
395 }
396 if rawTagHash != 0 && attr.Hash == Type {
397 rawTagMediatype = parse.Copy(val)
398 }
399
400 if attr.Hash == Enctype || attr.Hash == Codetype || attr.Hash == Accept || attr.Hash == Type && (t.Hash == A || t.Hash == Link || t.Hash == Embed || t.Hash == Object || t.Hash == Source || t.Hash == Script || t.Hash == Style) {
401 val = minify.Mediatype(val)
402 }
403
404 // default attribute values can be omitted
405 if !o.KeepDefaultAttrVals && (attr.Hash == Type && (t.Hash == Script && jsMimetypes[string(val)] ||
406 t.Hash == Style && bytes.Equal(val, cssMimeBytes) ||
407 t.Hash == Link && bytes.Equal(val, cssMimeBytes) ||
408 t.Hash == Input && bytes.Equal(val, textBytes) ||
409 t.Hash == Button && bytes.Equal(val, submitBytes)) ||
410 attr.Hash == Language && t.Hash == Script ||
411 attr.Hash == Method && bytes.Equal(val, getBytes) ||
412 attr.Hash == Enctype && bytes.Equal(val, formMimeBytes) ||
413 attr.Hash == Colspan && bytes.Equal(val, oneBytes) ||
414 attr.Hash == Rowspan && bytes.Equal(val, oneBytes) ||
415 attr.Hash == Shape && bytes.Equal(val, rectBytes) ||
416 attr.Hash == Span && bytes.Equal(val, oneBytes) ||
417 attr.Hash == Clear && bytes.Equal(val, noneBytes) ||
418 attr.Hash == Frameborder && bytes.Equal(val, oneBytes) ||
419 attr.Hash == Scrolling && bytes.Equal(val, autoBytes) ||
420 attr.Hash == Valuetype && bytes.Equal(val, dataBytes) ||
421 attr.Hash == Media && t.Hash == Style && bytes.Equal(val, allBytes)) {
422 continue
423 }
424
425 if attr.Hash == Style {
426 // CSS minifier for attribute inline code
427 val = parse.TrimWhitespace(val)
428 attrMinifyBuffer.Reset()
429 if err := m.MinifyMimetype(cssMimeBytes, attrMinifyBuffer, buffer.NewReader(val), inlineParams); err == nil {
430 val = attrMinifyBuffer.Bytes()
431 } else if err != minify.ErrNotExist {
432 return minify.UpdateErrorPosition(err, z, attr.Offset)
433 }
434 if len(val) == 0 {
435 continue
436 }
437 } else if len(attr.Text) > 2 && attr.Text[0] == 'o' && attr.Text[1] == 'n' {
438 // JS minifier for attribute inline code
439 val = parse.TrimWhitespace(val)
440 if len(val) >= 11 && parse.EqualFold(val[:11], jsSchemeBytes) {
441 val = val[11:]
442 }
443 attrMinifyBuffer.Reset()
444 if err := m.MinifyMimetype(jsMimeBytes, attrMinifyBuffer, buffer.NewReader(val), nil); err == nil {
445 val = attrMinifyBuffer.Bytes()
446 } else if err != minify.ErrNotExist {
447 return minify.UpdateErrorPosition(err, z, attr.Offset)
448 }
449 if len(val) == 0 {
450 continue
451 }
452 } else if attr.Traits&urlAttr != 0 { // anchors are already handled
453 val = parse.TrimWhitespace(val)
454 if 5 < len(val) {
455 if parse.EqualFold(val[:4], httpBytes) {
456 if val[4] == ':' {
457 if m.URL != nil && m.URL.Scheme == "http" {
458 val = val[5:]
459 } else {
460 parse.ToLower(val[:4])
461 }
462 } else if (val[4] == 's' || val[4] == 'S') && val[5] == ':' {
463 if m.URL != nil && m.URL.Scheme == "https" {
464 val = val[6:]
465 } else {
466 parse.ToLower(val[:5])
467 }
468 }
469 } else if parse.EqualFold(val[:5], dataSchemeBytes) {
470 val = minify.DataURI(m, val)
471 }
472 }
473 }
474 }
475
476 w.Write(spaceBytes)
477 w.Write(attr.Text)
478 if len(val) > 0 && attr.Traits&booleanAttr == 0 {
479 w.Write(isBytes)
480
481 // use double quotes for RDFa attributes
482 isXML := attr.Hash == Vocab || attr.Hash == Typeof || attr.Hash == Property || attr.Hash == Resource || attr.Hash == Prefix || attr.Hash == Content || attr.Hash == About || attr.Hash == Rev || attr.Hash == Datatype || attr.Hash == Inlist
483
484 // no quotes if possible, else prefer single or double depending on which occurs more often in value
485 var quote byte
486
487 if 0 < len(attr.Data) && (attr.Data[len(attr.Data)-1] == '\'' || attr.Data[len(attr.Data)-1] == '"') {
488 quote = attr.Data[len(attr.Data)-1]
489 }
490 val = html.EscapeAttrVal(&attrByteBuffer, val, quote, o.KeepQuotes, isXML)
491 w.Write(val)
492 }
493 }
494 } else {
495 _ = tb.Shift() // StartTagClose
496 }
497 w.Write(gtBytes)
498
499 // skip text in select and optgroup tags
500 if t.Hash == Select || t.Hash == Optgroup {
501 if next := tb.Peek(0); next.TokenType == html.TextToken {
502 tb.Shift()
503 }
504 }
505
506 // keep space after phrasing tags (<i>, <span>, ...) FontAwesome etc.
507 if t.TokenType == html.StartTagToken && t.Traits&nonPhrasingTag == 0 {
508 if next := tb.Peek(0); next.Hash == t.Hash && next.TokenType == html.EndTagToken {
509 omitSpace = false
510 }
511 }
512 }
513 }
514}