1// Package parse contains a collection of parsers for various formats in its subpackages.
  2package parse
  3
  4import (
  5	"bytes"
  6	"encoding/base64"
  7	"errors"
  8)
  9
 10var (
 11	dataSchemeBytes = []byte("data:")
 12	base64Bytes     = []byte("base64")
 13	textMimeBytes   = []byte("text/plain")
 14)
 15
 16// ErrBadDataURI is returned by DataURI when the byte slice does not start with 'data:' or is too short.
 17var ErrBadDataURI = errors.New("not a data URI")
 18
 19// Number returns the number of bytes that parse as a number of the regex format (+|-)?([0-9]+(\.[0-9]+)?|\.[0-9]+)((e|E)(+|-)?[0-9]+)?.
 20func Number(b []byte) int {
 21	if len(b) == 0 {
 22		return 0
 23	}
 24	i := 0
 25	if b[i] == '+' || b[i] == '-' {
 26		i++
 27		if i >= len(b) {
 28			return 0
 29		}
 30	}
 31	firstDigit := (b[i] >= '0' && b[i] <= '9')
 32	if firstDigit {
 33		i++
 34		for i < len(b) && b[i] >= '0' && b[i] <= '9' {
 35			i++
 36		}
 37	}
 38	if i < len(b) && b[i] == '.' {
 39		i++
 40		if i < len(b) && b[i] >= '0' && b[i] <= '9' {
 41			i++
 42			for i < len(b) && b[i] >= '0' && b[i] <= '9' {
 43				i++
 44			}
 45		} else if firstDigit {
 46			// . could belong to the next token
 47			i--
 48			return i
 49		} else {
 50			return 0
 51		}
 52	} else if !firstDigit {
 53		return 0
 54	}
 55	iOld := i
 56	if i < len(b) && (b[i] == 'e' || b[i] == 'E') {
 57		i++
 58		if i < len(b) && (b[i] == '+' || b[i] == '-') {
 59			i++
 60		}
 61		if i >= len(b) || b[i] < '0' || b[i] > '9' {
 62			// e could belong to next token
 63			return iOld
 64		}
 65		for i < len(b) && b[i] >= '0' && b[i] <= '9' {
 66			i++
 67		}
 68	}
 69	return i
 70}
 71
 72// Dimension parses a byte-slice and returns the length of the number and its unit.
 73func Dimension(b []byte) (int, int) {
 74	num := Number(b)
 75	if num == 0 || num == len(b) {
 76		return num, 0
 77	} else if b[num] == '%' {
 78		return num, 1
 79	} else if b[num] >= 'a' && b[num] <= 'z' || b[num] >= 'A' && b[num] <= 'Z' {
 80		i := num + 1
 81		for i < len(b) && (b[i] >= 'a' && b[i] <= 'z' || b[i] >= 'A' && b[i] <= 'Z') {
 82			i++
 83		}
 84		return num, i - num
 85	}
 86	return num, 0
 87}
 88
 89// Mediatype parses a given mediatype and splits the mimetype from the parameters.
 90// It works similar to mime.ParseMediaType but is faster.
 91func Mediatype(b []byte) ([]byte, map[string]string) {
 92	i := 0
 93	for i < len(b) && b[i] == ' ' {
 94		i++
 95	}
 96	b = b[i:]
 97	n := len(b)
 98	mimetype := b
 99	var params map[string]string
100	for i := 3; i < n; i++ { // mimetype is at least three characters long
101		if b[i] == ';' || b[i] == ' ' {
102			mimetype = b[:i]
103			if b[i] == ' ' {
104				i++ // space
105				for i < n && b[i] == ' ' {
106					i++
107				}
108				if n <= i || b[i] != ';' {
109					break
110				}
111			}
112			params = map[string]string{}
113			s := string(b)
114		PARAM:
115			i++ // semicolon
116			for i < n && s[i] == ' ' {
117				i++
118			}
119			start := i
120			for i < n && s[i] != '=' && s[i] != ';' && s[i] != ' ' {
121				i++
122			}
123			key := s[start:i]
124			for i < n && s[i] == ' ' {
125				i++
126			}
127			if i < n && s[i] == '=' {
128				i++
129				for i < n && s[i] == ' ' {
130					i++
131				}
132				start = i
133				for i < n && s[i] != ';' && s[i] != ' ' {
134					i++
135				}
136			} else {
137				start = i
138			}
139			params[key] = s[start:i]
140			for i < n && s[i] == ' ' {
141				i++
142			}
143			if i < n && s[i] == ';' {
144				goto PARAM
145			}
146			break
147		}
148	}
149	return mimetype, params
150}
151
152// DataURI parses the given data URI and returns the mediatype, data and ok.
153func DataURI(dataURI []byte) ([]byte, []byte, error) {
154	if len(dataURI) > 5 && bytes.Equal(dataURI[:5], dataSchemeBytes) {
155		dataURI = dataURI[5:]
156		inBase64 := false
157		var mediatype []byte
158		i := 0
159		for j := 0; j < len(dataURI); j++ {
160			c := dataURI[j]
161			if c == '=' || c == ';' || c == ',' {
162				if c != '=' && bytes.Equal(TrimWhitespace(dataURI[i:j]), base64Bytes) {
163					if len(mediatype) > 0 {
164						mediatype = mediatype[:len(mediatype)-1]
165					}
166					inBase64 = true
167					i = j
168				} else if c != ',' {
169					mediatype = append(append(mediatype, TrimWhitespace(dataURI[i:j])...), c)
170					i = j + 1
171				} else {
172					mediatype = append(mediatype, TrimWhitespace(dataURI[i:j])...)
173				}
174				if c == ',' {
175					if len(mediatype) == 0 || mediatype[0] == ';' {
176						mediatype = textMimeBytes
177					}
178					data := dataURI[j+1:]
179					if inBase64 {
180						decoded := make([]byte, base64.StdEncoding.DecodedLen(len(data)))
181						n, err := base64.StdEncoding.Decode(decoded, data)
182						if err != nil {
183							return nil, nil, err
184						}
185						data = decoded[:n]
186					} else {
187						data = DecodeURL(data)
188					}
189					return mediatype, data, nil
190				}
191			}
192		}
193	}
194	return nil, nil, ErrBadDataURI
195}
196
197// QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length.
198// TODO: deprecated
199func QuoteEntity(b []byte) (quote byte, n int) {
200	if len(b) < 5 || b[0] != '&' {
201		return 0, 0
202	}
203	if b[1] == '#' {
204		if b[2] == 'x' {
205			i := 3
206			for i < len(b) && b[i] == '0' {
207				i++
208			}
209			if i+2 < len(b) && b[i] == '2' && b[i+2] == ';' {
210				if b[i+1] == '2' {
211					return '"', i + 3 // &#x22;
212				} else if b[i+1] == '7' {
213					return '\'', i + 3 // &#x27;
214				}
215			}
216		} else {
217			i := 2
218			for i < len(b) && b[i] == '0' {
219				i++
220			}
221			if i+2 < len(b) && b[i] == '3' && b[i+2] == ';' {
222				if b[i+1] == '4' {
223					return '"', i + 3 // &#34;
224				} else if b[i+1] == '9' {
225					return '\'', i + 3 // &#39;
226				}
227			}
228		}
229	} else if len(b) >= 6 && b[5] == ';' {
230		if bytes.Equal(b[1:5], []byte{'q', 'u', 'o', 't'}) {
231			return '"', 6 // &quot;
232		} else if bytes.Equal(b[1:5], []byte{'a', 'p', 'o', 's'}) {
233			return '\'', 6 // &apos;
234		}
235	}
236	return 0, 0
237}