From 5a8dbc6347b3541e84fe669b22c17ad3b715e258 Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Wed, 21 Jan 2026 20:22:09 +0100 Subject: Engage! --- .../go-tree-sitter/dockerfile/scanner.c | 318 +++++++++++++++++++++ 1 file changed, 318 insertions(+) create mode 100644 vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c (limited to 'vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c') diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c b/vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c new file mode 100644 index 0000000..854a7d0 --- /dev/null +++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/dockerfile/scanner.c @@ -0,0 +1,318 @@ +#include +#include +#include +#include + +#include "parser.h" + +#define MAX_HEREDOCS 10 +#define DEL_SPACE 512 + +typedef struct { + bool in_heredoc; + bool stripping_heredoc; + unsigned heredoc_count; + char *heredocs[MAX_HEREDOCS]; +} scanner_state; + +enum TokenType { + HEREDOC_MARKER, + HEREDOC_LINE, + HEREDOC_END, + HEREDOC_NL, + ERROR_SENTINEL, +}; + +void *tree_sitter_dockerfile_external_scanner_create() { + scanner_state *state = malloc(sizeof(scanner_state)); + memset(state, 0, sizeof(scanner_state)); + return state; +} + +void tree_sitter_dockerfile_external_scanner_destroy(void *payload) { + if (!payload) + return; + + scanner_state *state = payload; + for (unsigned i = 0; i < MAX_HEREDOCS; i++) { + if (state->heredocs[i]) { + free(state->heredocs[i]); + } + } + + free(state); +} + +unsigned tree_sitter_dockerfile_external_scanner_serialize(void *payload, + char *buffer) { + scanner_state *state = payload; + + unsigned pos = 0; + buffer[pos++] = state->in_heredoc; + buffer[pos++] = state->stripping_heredoc; + + for (unsigned i = 0; i < state->heredoc_count; i++) { + // Add the ending null byte to the length since we'll have to copy it as + // well. + unsigned len = strlen(state->heredocs[i]) + 1; + + // If we run out of space, just drop the heredocs that don't fit. + // We need at least len + 1 bytes space since we'll copy len bytes below + // and later add a null byte at the end. + if (pos + len + 1 > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { + break; + } + + memcpy(&buffer[pos], state->heredocs[i], len); + pos += len; + } + + // Add a null byte at the end to make it easy to detect. + buffer[pos++] = 0; + return pos; +} + +void tree_sitter_dockerfile_external_scanner_deserialize(void *payload, + const char *buffer, + unsigned length) { + scanner_state *state = payload; + // Free all current heredocs to avoid leaking memory when we overwrite the + // array later. + for (unsigned i = 0; i < state->heredoc_count; i++) { + free(state->heredocs[i]); + state->heredocs[i] = NULL; + } + + if (length == 0) { + state->in_heredoc = false; + state->stripping_heredoc = false; + state->heredoc_count = 0; + } else { + unsigned pos = 0; + state->in_heredoc = buffer[pos++]; + state->stripping_heredoc = buffer[pos++]; + + unsigned heredoc_count = 0; + for (unsigned i = 0; i < MAX_HEREDOCS; i++) { + unsigned len = strlen(&buffer[pos]); + + // We found the ending null byte which means that we're done. + if (len == 0) + break; + + // Account for the ending null byte in strings (again). + len++; + char *heredoc = malloc(len); + memcpy(heredoc, &buffer[pos], len); + state->heredocs[i] = heredoc; + heredoc_count++; + + pos += len; + } + + state->heredoc_count = heredoc_count; + } +} + +static void skip_whitespace(TSLexer *lexer) { + while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && + iswspace(lexer->lookahead)) + lexer->advance(lexer, true); +} + +static bool scan_marker(scanner_state *state, TSLexer *lexer) { + skip_whitespace(lexer); + + if (lexer->lookahead != '<') + return false; + lexer->advance(lexer, false); + + if (lexer->lookahead != '<') + return false; + lexer->advance(lexer, false); + + bool stripping = false; + if (lexer->lookahead == '-') { + stripping = true; + lexer->advance(lexer, false); + } + + int32_t quote = 0; + if (lexer->lookahead == '"' || lexer->lookahead == '\'') { + quote = lexer->lookahead; + lexer->advance(lexer, false); + } + + // Reserve a reasonable amount of space for the heredoc delimiter string. + // Most heredocs (like EOF, EOT, EOS, FILE, etc.) are pretty short so we'll + // usually only need a few bytes. We're also limited to less than 1024 bytes + // by tree-sitter since our state has to fit in + // TREE_SITTER_SERIALIZATION_BUFFER_SIZE. + char delimiter[DEL_SPACE]; + + // We start recording the actual string at position 1 since we store whether + // it's a stripping heredoc in the first position (with either a dash or a + // space). + unsigned del_idx = 1; + + while (lexer->lookahead != '\0' && + (quote ? lexer->lookahead != quote : !iswspace(lexer->lookahead))) { + if (lexer->lookahead == '\\') { + lexer->advance(lexer, false); + + if (lexer->lookahead == '\0') { + return false; + } + } + + if (del_idx > 0) { + delimiter[del_idx++] = lexer->lookahead; + } + lexer->advance(lexer, false); + + // If we run out of space, stop recording the delimiter but keep + // advancing the lexer to ensure that we at least parse the marker + // correctly. Reserve two bytes: one for the strip indicator and + // one for the terminating null byte. + if (del_idx >= DEL_SPACE - 2) { + del_idx = 0; + } + } + + if (quote) { + if (lexer->lookahead != quote) { + return false; + } + lexer->advance(lexer, false); + } + + if (del_idx == 0) { + lexer->result_symbol = HEREDOC_MARKER; + return true; + } + + delimiter[0] = stripping ? '-' : ' '; + delimiter[del_idx] = '\0'; + + // We copy the delimiter string to the heap here since we can't store our + // stack-allocated string in our state (which is stored on the heap). + char *del_copy = malloc(del_idx + 1); + memcpy(del_copy, delimiter, del_idx + 1); + + if (state->heredoc_count == 0) { + state->heredoc_count = 1; + state->heredocs[0] = del_copy; + state->stripping_heredoc = stripping; + } else if (state->heredoc_count >= MAX_HEREDOCS) { + free(del_copy); + } else { + state->heredocs[state->heredoc_count++] = del_copy; + } + + lexer->result_symbol = HEREDOC_MARKER; + return true; +} + +static bool scan_content(scanner_state *state, TSLexer *lexer, + const bool *valid_symbols) { + if (state->heredoc_count == 0) { + state->in_heredoc = false; + return false; + } + + state->in_heredoc = true; + + if (state->stripping_heredoc) { + skip_whitespace(lexer); + } + + if (valid_symbols[HEREDOC_END]) { + unsigned delim_idx = 1; + // Look for the current heredoc delimiter. + while (state->heredocs[0][delim_idx] != '\0' && + lexer->lookahead != '\0' && + lexer->lookahead == state->heredocs[0][delim_idx]) { + lexer->advance(lexer, false); + delim_idx++; + } + + // Check if the entire string matched. + if (state->heredocs[0][delim_idx] == '\0') { + lexer->result_symbol = HEREDOC_END; + + // Shift the first heredoc off the list. + free(state->heredocs[0]); + + for (unsigned i = 1; i < state->heredoc_count; i++) { + state->heredocs[i - 1] = state->heredocs[i]; + } + state->heredocs[state->heredoc_count - 1] = NULL; + state->heredoc_count--; + + if (state->heredoc_count > 0) { + state->stripping_heredoc = state->heredocs[0][0] == '-'; + } else { + state->in_heredoc = false; + } + + return true; + } + } + + if (!valid_symbols[HEREDOC_LINE]) + return false; + + lexer->result_symbol = HEREDOC_LINE; + + for (;;) { + switch (lexer->lookahead) { + case '\0': + if (lexer->eof(lexer)) { + state->in_heredoc = false; + return true; + } + lexer->advance(lexer, false); + break; + + case '\n': + return true; + + default: + lexer->advance(lexer, false); + } + } +} + +bool tree_sitter_dockerfile_external_scanner_scan(void *payload, TSLexer *lexer, + const bool *valid_symbols) { + scanner_state *state = payload; + + if (valid_symbols[ERROR_SENTINEL]) { + if (state->in_heredoc) { + return scan_content(state, lexer, valid_symbols); + } else { + return scan_marker(state, lexer); + } + } + + // HEREDOC_NL only matches a linebreak if there are open heredocs. This is + // necessary to avoid a conflict in the grammar since a normal line break + // could either be the start of a heredoc or the end of an instruction. + if (valid_symbols[HEREDOC_NL]) { + if (state->heredoc_count > 0 && lexer->lookahead == '\n') { + lexer->result_symbol = HEREDOC_NL; + lexer->advance(lexer, false); + return true; + } + } + + if (valid_symbols[HEREDOC_MARKER]) { + return scan_marker(state, lexer); + } + + if (valid_symbols[HEREDOC_LINE] || valid_symbols[HEREDOC_END]) { + return scan_content(state, lexer, valid_symbols); + } + + return false; +} -- cgit v1.2.3