diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-22 13:09:29 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-22 13:09:29 +0100 |
| commit | da973be545d6da3b2d42023f96bcfd79f751eba9 (patch) | |
| tree | b1b86be33ff9e65e6979a0a3a04a391338311f5b /vendor/tree-sitter-cuda/src/scanner.c | |
| parent | c4923c47ffc2309fc3844be80ee0d8392e2fad2b (diff) | |
| download | crep-da973be545d6da3b2d42023f96bcfd79f751eba9.tar.gz | |
Add CUDA grammar
Diffstat (limited to 'vendor/tree-sitter-cuda/src/scanner.c')
| -rw-r--r-- | vendor/tree-sitter-cuda/src/scanner.c | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/vendor/tree-sitter-cuda/src/scanner.c b/vendor/tree-sitter-cuda/src/scanner.c new file mode 100644 index 0000000..687f78d --- /dev/null +++ b/vendor/tree-sitter-cuda/src/scanner.c @@ -0,0 +1,148 @@ +#include "tree_sitter/alloc.h" +#include "tree_sitter/parser.h" + +#include <assert.h> +#include <string.h> +#include <wctype.h> + +enum TokenType { RAW_STRING_DELIMITER, RAW_STRING_CONTENT }; + +/// The spec limits delimiters to 16 chars +#define MAX_DELIMITER_LENGTH 16 + +typedef struct { + uint8_t delimiter_length; + wchar_t delimiter[MAX_DELIMITER_LENGTH]; +} Scanner; + +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static inline void reset(Scanner *scanner) { + scanner->delimiter_length = 0; + memset(scanner->delimiter, 0, sizeof scanner->delimiter); +} + +/// Scan the raw string delimiter in R"delimiter(content)delimiter" +static bool scan_raw_string_delimiter(Scanner *scanner, TSLexer *lexer) { + if (scanner->delimiter_length > 0) { + // Closing delimiter: must exactly match the opening delimiter. + // We already checked this when scanning content, but this is how we + // know when to stop. We can't stop at ", because R"""hello""" is valid. + for (int i = 0; i < scanner->delimiter_length; ++i) { + if (lexer->lookahead != scanner->delimiter[i]) { + return false; + } + advance(lexer); + } + reset(scanner); + return true; + } + + // Opening delimiter: record the d-char-sequence up to (. + // d-char is any basic character except parens, backslashes, and spaces. + for (;;) { + if (scanner->delimiter_length >= MAX_DELIMITER_LENGTH || lexer->eof(lexer) || lexer->lookahead == '\\' || + iswspace(lexer->lookahead)) { + return false; + } + if (lexer->lookahead == '(') { + // Rather than create a token for an empty delimiter, we fail and + // let the grammar fall back to a delimiter-less rule. + return scanner->delimiter_length > 0; + } + scanner->delimiter[scanner->delimiter_length++] = lexer->lookahead; + advance(lexer); + } +} + +/// Scan the raw string content in R"delimiter(content)delimiter" +static bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) { + // The progress made through the delimiter since the last ')'. + // The delimiter may not contain ')' so a single counter suffices. + for (int delimiter_index = -1;;) { + // If we hit EOF, consider the content to terminate there. + // This forms an incomplete raw_string_literal, and models the code + // well. + if (lexer->eof(lexer)) { + lexer->mark_end(lexer); + return true; + } + + if (delimiter_index >= 0) { + if (delimiter_index == scanner->delimiter_length) { + if (lexer->lookahead == '"') { + return true; + } + delimiter_index = -1; + } else { + if (lexer->lookahead == scanner->delimiter[delimiter_index]) { + delimiter_index += 1; + } else { + delimiter_index = -1; + } + } + } + + if (delimiter_index == -1 && lexer->lookahead == ')') { + // The content doesn't include the )delimiter" part. + // We must still scan through it, but exclude it from the token. + lexer->mark_end(lexer); + delimiter_index = 0; + } + + advance(lexer); + } +} + +void *tree_sitter_cuda_external_scanner_create() { + Scanner *scanner = (Scanner *)ts_calloc(1, sizeof(Scanner)); + memset(scanner, 0, sizeof(Scanner)); + return scanner; +} + +bool tree_sitter_cuda_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + Scanner *scanner = (Scanner *)payload; + + if (valid_symbols[RAW_STRING_DELIMITER] && valid_symbols[RAW_STRING_CONTENT]) { + // we're in error recovery + return false; + } + + // No skipping leading whitespace: raw-string grammar is space-sensitive. + if (valid_symbols[RAW_STRING_DELIMITER]) { + lexer->result_symbol = RAW_STRING_DELIMITER; + return scan_raw_string_delimiter(scanner, lexer); + } + + if (valid_symbols[RAW_STRING_CONTENT]) { + lexer->result_symbol = RAW_STRING_CONTENT; + return scan_raw_string_content(scanner, lexer); + } + + return false; +} + +unsigned tree_sitter_cuda_external_scanner_serialize(void *payload, char *buffer) { + static_assert(MAX_DELIMITER_LENGTH * sizeof(wchar_t) < TREE_SITTER_SERIALIZATION_BUFFER_SIZE, + "Serialized delimiter is too long!"); + + Scanner *scanner = (Scanner *)payload; + size_t size = scanner->delimiter_length * sizeof(wchar_t); + memcpy(buffer, scanner->delimiter, size); + return (unsigned)size; +} + +void tree_sitter_cuda_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { + assert(length % sizeof(wchar_t) == 0 && "Can't decode serialized delimiter!"); + + Scanner *scanner = (Scanner *)payload; + scanner->delimiter_length = length / sizeof(wchar_t); + if (length > 0) { + memcpy(&scanner->delimiter[0], buffer, length); + } +} + +void tree_sitter_cuda_external_scanner_destroy(void *payload) { + Scanner *scanner = (Scanner *)payload; + ts_free(scanner); +} |
