1// Package parse contains a collection of parsers for various formats in its subpackages.
2package parse
3
4import (
5 "bytes"
6 "encoding/base64"
7 "errors"
8)
9
10var (
11 dataSchemeBytes = []byte("data:")
12 base64Bytes = []byte("base64")
13 textMimeBytes = []byte("text/plain")
14)
15
16// ErrBadDataURI is returned by DataURI when the byte slice does not start with 'data:' or is too short.
17var ErrBadDataURI = errors.New("not a data URI")
18
19// Number returns the number of bytes that parse as a number of the regex format (+|-)?([0-9]+(\.[0-9]+)?|\.[0-9]+)((e|E)(+|-)?[0-9]+)?.
20func Number(b []byte) int {
21 if len(b) == 0 {
22 return 0
23 }
24 i := 0
25 if b[i] == '+' || b[i] == '-' {
26 i++
27 if i >= len(b) {
28 return 0
29 }
30 }
31 firstDigit := (b[i] >= '0' && b[i] <= '9')
32 if firstDigit {
33 i++
34 for i < len(b) && b[i] >= '0' && b[i] <= '9' {
35 i++
36 }
37 }
38 if i < len(b) && b[i] == '.' {
39 i++
40 if i < len(b) && b[i] >= '0' && b[i] <= '9' {
41 i++
42 for i < len(b) && b[i] >= '0' && b[i] <= '9' {
43 i++
44 }
45 } else if firstDigit {
46 // . could belong to the next token
47 i--
48 return i
49 } else {
50 return 0
51 }
52 } else if !firstDigit {
53 return 0
54 }
55 iOld := i
56 if i < len(b) && (b[i] == 'e' || b[i] == 'E') {
57 i++
58 if i < len(b) && (b[i] == '+' || b[i] == '-') {
59 i++
60 }
61 if i >= len(b) || b[i] < '0' || b[i] > '9' {
62 // e could belong to next token
63 return iOld
64 }
65 for i < len(b) && b[i] >= '0' && b[i] <= '9' {
66 i++
67 }
68 }
69 return i
70}
71
72// Dimension parses a byte-slice and returns the length of the number and its unit.
73func Dimension(b []byte) (int, int) {
74 num := Number(b)
75 if num == 0 || num == len(b) {
76 return num, 0
77 } else if b[num] == '%' {
78 return num, 1
79 } else if b[num] >= 'a' && b[num] <= 'z' || b[num] >= 'A' && b[num] <= 'Z' {
80 i := num + 1
81 for i < len(b) && (b[i] >= 'a' && b[i] <= 'z' || b[i] >= 'A' && b[i] <= 'Z') {
82 i++
83 }
84 return num, i - num
85 }
86 return num, 0
87}
88
89// Mediatype parses a given mediatype and splits the mimetype from the parameters.
90// It works similar to mime.ParseMediaType but is faster.
91func Mediatype(b []byte) ([]byte, map[string]string) {
92 i := 0
93 for i < len(b) && b[i] == ' ' {
94 i++
95 }
96 b = b[i:]
97 n := len(b)
98 mimetype := b
99 var params map[string]string
100 for i := 3; i < n; i++ { // mimetype is at least three characters long
101 if b[i] == ';' || b[i] == ' ' {
102 mimetype = b[:i]
103 if b[i] == ' ' {
104 i++ // space
105 for i < n && b[i] == ' ' {
106 i++
107 }
108 if n <= i || b[i] != ';' {
109 break
110 }
111 }
112 params = map[string]string{}
113 s := string(b)
114 PARAM:
115 i++ // semicolon
116 for i < n && s[i] == ' ' {
117 i++
118 }
119 start := i
120 for i < n && s[i] != '=' && s[i] != ';' && s[i] != ' ' {
121 i++
122 }
123 key := s[start:i]
124 for i < n && s[i] == ' ' {
125 i++
126 }
127 if i < n && s[i] == '=' {
128 i++
129 for i < n && s[i] == ' ' {
130 i++
131 }
132 start = i
133 for i < n && s[i] != ';' && s[i] != ' ' {
134 i++
135 }
136 } else {
137 start = i
138 }
139 params[key] = s[start:i]
140 for i < n && s[i] == ' ' {
141 i++
142 }
143 if i < n && s[i] == ';' {
144 goto PARAM
145 }
146 break
147 }
148 }
149 return mimetype, params
150}
151
152// DataURI parses the given data URI and returns the mediatype, data and ok.
153func DataURI(dataURI []byte) ([]byte, []byte, error) {
154 if len(dataURI) > 5 && bytes.Equal(dataURI[:5], dataSchemeBytes) {
155 dataURI = dataURI[5:]
156 inBase64 := false
157 var mediatype []byte
158 i := 0
159 for j := 0; j < len(dataURI); j++ {
160 c := dataURI[j]
161 if c == '=' || c == ';' || c == ',' {
162 if c != '=' && bytes.Equal(TrimWhitespace(dataURI[i:j]), base64Bytes) {
163 if len(mediatype) > 0 {
164 mediatype = mediatype[:len(mediatype)-1]
165 }
166 inBase64 = true
167 i = j
168 } else if c != ',' {
169 mediatype = append(append(mediatype, TrimWhitespace(dataURI[i:j])...), c)
170 i = j + 1
171 } else {
172 mediatype = append(mediatype, TrimWhitespace(dataURI[i:j])...)
173 }
174 if c == ',' {
175 if len(mediatype) == 0 || mediatype[0] == ';' {
176 mediatype = textMimeBytes
177 }
178 data := dataURI[j+1:]
179 if inBase64 {
180 decoded := make([]byte, base64.StdEncoding.DecodedLen(len(data)))
181 n, err := base64.StdEncoding.Decode(decoded, data)
182 if err != nil {
183 return nil, nil, err
184 }
185 data = decoded[:n]
186 } else {
187 data = DecodeURL(data)
188 }
189 return mediatype, data, nil
190 }
191 }
192 }
193 }
194 return nil, nil, ErrBadDataURI
195}
196
197// QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length.
198// TODO: deprecated
199func QuoteEntity(b []byte) (quote byte, n int) {
200 if len(b) < 5 || b[0] != '&' {
201 return 0, 0
202 }
203 if b[1] == '#' {
204 if b[2] == 'x' {
205 i := 3
206 for i < len(b) && b[i] == '0' {
207 i++
208 }
209 if i+2 < len(b) && b[i] == '2' && b[i+2] == ';' {
210 if b[i+1] == '2' {
211 return '"', i + 3 // "
212 } else if b[i+1] == '7' {
213 return '\'', i + 3 // '
214 }
215 }
216 } else {
217 i := 2
218 for i < len(b) && b[i] == '0' {
219 i++
220 }
221 if i+2 < len(b) && b[i] == '3' && b[i+2] == ';' {
222 if b[i+1] == '4' {
223 return '"', i + 3 // "
224 } else if b[i+1] == '9' {
225 return '\'', i + 3 // '
226 }
227 }
228 }
229 } else if len(b) >= 6 && b[5] == ';' {
230 if bytes.Equal(b[1:5], []byte{'q', 'u', 'o', 't'}) {
231 return '"', 6 // "
232 } else if bytes.Equal(b[1:5], []byte{'a', 'p', 'o', 's'}) {
233 return '\'', 6 // '
234 }
235 }
236 return 0, 0
237}