aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c')
-rw-r--r--vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c318
1 files changed, 318 insertions, 0 deletions
diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c b/vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c
new file mode 100644
index 0000000..854a7d0
--- /dev/null
+++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c
@@ -0,0 +1,318 @@
1#include <stdbool.h>
2#include <stdlib.h>
3#include <string.h>
4#include <wctype.h>
5
6#include "parser.h"
7
8#define MAX_HEREDOCS 10
9#define DEL_SPACE 512
10
11typedef struct {
12 bool in_heredoc;
13 bool stripping_heredoc;
14 unsigned heredoc_count;
15 char *heredocs[MAX_HEREDOCS];
16} scanner_state;
17
18enum TokenType {
19 HEREDOC_MARKER,
20 HEREDOC_LINE,
21 HEREDOC_END,
22 HEREDOC_NL,
23 ERROR_SENTINEL,
24};
25
26void *tree_sitter_dockerfile_external_scanner_create() {
27 scanner_state *state = malloc(sizeof(scanner_state));
28 memset(state, 0, sizeof(scanner_state));
29 return state;
30}
31
32void tree_sitter_dockerfile_external_scanner_destroy(void *payload) {
33 if (!payload)
34 return;
35
36 scanner_state *state = payload;
37 for (unsigned i = 0; i < MAX_HEREDOCS; i++) {
38 if (state->heredocs[i]) {
39 free(state->heredocs[i]);
40 }
41 }
42
43 free(state);
44}
45
46unsigned tree_sitter_dockerfile_external_scanner_serialize(void *payload,
47 char *buffer) {
48 scanner_state *state = payload;
49
50 unsigned pos = 0;
51 buffer[pos++] = state->in_heredoc;
52 buffer[pos++] = state->stripping_heredoc;
53
54 for (unsigned i = 0; i < state->heredoc_count; i++) {
55 // Add the ending null byte to the length since we'll have to copy it as
56 // well.
57 unsigned len = strlen(state->heredocs[i]) + 1;
58
59 // If we run out of space, just drop the heredocs that don't fit.
60 // We need at least len + 1 bytes space since we'll copy len bytes below
61 // and later add a null byte at the end.
62 if (pos + len + 1 > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
63 break;
64 }
65
66 memcpy(&buffer[pos], state->heredocs[i], len);
67 pos += len;
68 }
69
70 // Add a null byte at the end to make it easy to detect.
71 buffer[pos++] = 0;
72 return pos;
73}
74
75void tree_sitter_dockerfile_external_scanner_deserialize(void *payload,
76 const char *buffer,
77 unsigned length) {
78 scanner_state *state = payload;
79 // Free all current heredocs to avoid leaking memory when we overwrite the
80 // array later.
81 for (unsigned i = 0; i < state->heredoc_count; i++) {
82 free(state->heredocs[i]);
83 state->heredocs[i] = NULL;
84 }
85
86 if (length == 0) {
87 state->in_heredoc = false;
88 state->stripping_heredoc = false;
89 state->heredoc_count = 0;
90 } else {
91 unsigned pos = 0;
92 state->in_heredoc = buffer[pos++];
93 state->stripping_heredoc = buffer[pos++];
94
95 unsigned heredoc_count = 0;
96 for (unsigned i = 0; i < MAX_HEREDOCS; i++) {
97 unsigned len = strlen(&buffer[pos]);
98
99 // We found the ending null byte which means that we're done.
100 if (len == 0)
101 break;
102
103 // Account for the ending null byte in strings (again).
104 len++;
105 char *heredoc = malloc(len);
106 memcpy(heredoc, &buffer[pos], len);
107 state->heredocs[i] = heredoc;
108 heredoc_count++;
109
110 pos += len;
111 }
112
113 state->heredoc_count = heredoc_count;
114 }
115}
116
117static void skip_whitespace(TSLexer *lexer) {
118 while (lexer->lookahead != '\0' && lexer->lookahead != '\n' &&
119 iswspace(lexer->lookahead))
120 lexer->advance(lexer, true);
121}
122
123static bool scan_marker(scanner_state *state, TSLexer *lexer) {
124 skip_whitespace(lexer);
125
126 if (lexer->lookahead != '<')
127 return false;
128 lexer->advance(lexer, false);
129
130 if (lexer->lookahead != '<')
131 return false;
132 lexer->advance(lexer, false);
133
134 bool stripping = false;
135 if (lexer->lookahead == '-') {
136 stripping = true;
137 lexer->advance(lexer, false);
138 }
139
140 int32_t quote = 0;
141 if (lexer->lookahead == '"' || lexer->lookahead == '\'') {
142 quote = lexer->lookahead;
143 lexer->advance(lexer, false);
144 }
145
146 // Reserve a reasonable amount of space for the heredoc delimiter string.
147 // Most heredocs (like EOF, EOT, EOS, FILE, etc.) are pretty short so we'll
148 // usually only need a few bytes. We're also limited to less than 1024 bytes
149 // by tree-sitter since our state has to fit in
150 // TREE_SITTER_SERIALIZATION_BUFFER_SIZE.
151 char delimiter[DEL_SPACE];
152
153 // We start recording the actual string at position 1 since we store whether
154 // it's a stripping heredoc in the first position (with either a dash or a
155 // space).
156 unsigned del_idx = 1;
157
158 while (lexer->lookahead != '\0' &&
159 (quote ? lexer->lookahead != quote : !iswspace(lexer->lookahead))) {
160 if (lexer->lookahead == '\\') {
161 lexer->advance(lexer, false);
162
163 if (lexer->lookahead == '\0') {
164 return false;
165 }
166 }
167
168 if (del_idx > 0) {
169 delimiter[del_idx++] = lexer->lookahead;
170 }
171 lexer->advance(lexer, false);
172
173 // If we run out of space, stop recording the delimiter but keep
174 // advancing the lexer to ensure that we at least parse the marker
175 // correctly. Reserve two bytes: one for the strip indicator and
176 // one for the terminating null byte.
177 if (del_idx >= DEL_SPACE - 2) {
178 del_idx = 0;
179 }
180 }
181
182 if (quote) {
183 if (lexer->lookahead != quote) {
184 return false;
185 }
186 lexer->advance(lexer, false);
187 }
188
189 if (del_idx == 0) {
190 lexer->result_symbol = HEREDOC_MARKER;
191 return true;
192 }
193
194 delimiter[0] = stripping ? '-' : ' ';
195 delimiter[del_idx] = '\0';
196
197 // We copy the delimiter string to the heap here since we can't store our
198 // stack-allocated string in our state (which is stored on the heap).
199 char *del_copy = malloc(del_idx + 1);
200 memcpy(del_copy, delimiter, del_idx + 1);
201
202 if (state->heredoc_count == 0) {
203 state->heredoc_count = 1;
204 state->heredocs[0] = del_copy;
205 state->stripping_heredoc = stripping;
206 } else if (state->heredoc_count >= MAX_HEREDOCS) {
207 free(del_copy);
208 } else {
209 state->heredocs[state->heredoc_count++] = del_copy;
210 }
211
212 lexer->result_symbol = HEREDOC_MARKER;
213 return true;
214}
215
216static bool scan_content(scanner_state *state, TSLexer *lexer,
217 const bool *valid_symbols) {
218 if (state->heredoc_count == 0) {
219 state->in_heredoc = false;
220 return false;
221 }
222
223 state->in_heredoc = true;
224
225 if (state->stripping_heredoc) {
226 skip_whitespace(lexer);
227 }
228
229 if (valid_symbols[HEREDOC_END]) {
230 unsigned delim_idx = 1;
231 // Look for the current heredoc delimiter.
232 while (state->heredocs[0][delim_idx] != '\0' &&
233 lexer->lookahead != '\0' &&
234 lexer->lookahead == state->heredocs[0][delim_idx]) {
235 lexer->advance(lexer, false);
236 delim_idx++;
237 }
238
239 // Check if the entire string matched.
240 if (state->heredocs[0][delim_idx] == '\0') {
241 lexer->result_symbol = HEREDOC_END;
242
243 // Shift the first heredoc off the list.
244 free(state->heredocs[0]);
245
246 for (unsigned i = 1; i < state->heredoc_count; i++) {
247 state->heredocs[i - 1] = state->heredocs[i];
248 }
249 state->heredocs[state->heredoc_count - 1] = NULL;
250 state->heredoc_count--;
251
252 if (state->heredoc_count > 0) {
253 state->stripping_heredoc = state->heredocs[0][0] == '-';
254 } else {
255 state->in_heredoc = false;
256 }
257
258 return true;
259 }
260 }
261
262 if (!valid_symbols[HEREDOC_LINE])
263 return false;
264
265 lexer->result_symbol = HEREDOC_LINE;
266
267 for (;;) {
268 switch (lexer->lookahead) {
269 case '\0':
270 if (lexer->eof(lexer)) {
271 state->in_heredoc = false;
272 return true;
273 }
274 lexer->advance(lexer, false);
275 break;
276
277 case '\n':
278 return true;
279
280 default:
281 lexer->advance(lexer, false);
282 }
283 }
284}
285
286bool tree_sitter_dockerfile_external_scanner_scan(void *payload, TSLexer *lexer,
287 const bool *valid_symbols) {
288 scanner_state *state = payload;
289
290 if (valid_symbols[ERROR_SENTINEL]) {
291 if (state->in_heredoc) {
292 return scan_content(state, lexer, valid_symbols);
293 } else {
294 return scan_marker(state, lexer);
295 }
296 }
297
298 // HEREDOC_NL only matches a linebreak if there are open heredocs. This is
299 // necessary to avoid a conflict in the grammar since a normal line break
300 // could either be the start of a heredoc or the end of an instruction.
301 if (valid_symbols[HEREDOC_NL]) {
302 if (state->heredoc_count > 0 && lexer->lookahead == '\n') {
303 lexer->result_symbol = HEREDOC_NL;
304 lexer->advance(lexer, false);
305 return true;
306 }
307 }
308
309 if (valid_symbols[HEREDOC_MARKER]) {
310 return scan_marker(state, lexer);
311 }
312
313 if (valid_symbols[HEREDOC_LINE] || valid_symbols[HEREDOC_END]) {
314 return scan_content(state, lexer, valid_symbols);
315 }
316
317 return false;
318}