From 5a8dbc6347b3541e84fe669b22c17ad3b715e258 Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Wed, 21 Jan 2026 20:22:09 +0100 Subject: Engage! --- .../go-tree-sitter/javascript/scanner.c | 316 +++++++++++++++++++++ 1 file changed, 316 insertions(+) create mode 100644 vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c (limited to 'vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c') diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c b/vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c new file mode 100644 index 0000000..60ed40c --- /dev/null +++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c @@ -0,0 +1,316 @@ +#include "parser.h" + +#include + +enum TokenType { + AUTOMATIC_SEMICOLON, + TEMPLATE_CHARS, + TERNARY_QMARK, + HTML_COMMENT, + LOGICAL_OR, + ESCAPE_SEQUENCE, + REGEX_PATTERN, +}; + +void *tree_sitter_javascript_external_scanner_create() { return NULL; } + +void tree_sitter_javascript_external_scanner_destroy(void *p) {} + +unsigned tree_sitter_javascript_external_scanner_serialize(void *p, char *buffer) { return 0; } + +void tree_sitter_javascript_external_scanner_deserialize(void *p, const char *b, unsigned n) {} + +static void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +static bool scan_template_chars(TSLexer *lexer) { + lexer->result_symbol = TEMPLATE_CHARS; + for (bool has_content = false;; has_content = true) { + lexer->mark_end(lexer); + switch (lexer->lookahead) { + case '`': + return has_content; + case '\0': + return false; + case '$': + advance(lexer); + if (lexer->lookahead == '{') { + return has_content; + } + break; + case '\\': + return has_content; + default: + advance(lexer); + } + } +} + +typedef enum { + REJECT, // Semicolon is illegal, ie a syntax error occurred + NO_NEWLINE, // Unclear if semicolon will be legal, continue + ACCEPT, // Semicolon is legal, assuming a comment was encountered +} WhitespaceResult; + +/** + * @param consume If false, only consume enough to check if comment indicates semicolon-legality + */ +static WhitespaceResult scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, bool consume) { + bool saw_block_newline = false; + + for (;;) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (lexer->lookahead == '/') { + skip(lexer); + + if (lexer->lookahead == '/') { + skip(lexer); + while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 && + lexer->lookahead != 0x2029) { + skip(lexer); + } + *scanned_comment = true; + } else if (lexer->lookahead == '*') { + skip(lexer); + while (lexer->lookahead != 0) { + if (lexer->lookahead == '*') { + skip(lexer); + if (lexer->lookahead == '/') { + skip(lexer); + *scanned_comment = true; + + if (lexer->lookahead != '/' && !consume) { + return saw_block_newline ? ACCEPT : NO_NEWLINE; + } + + break; + } + } else if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) { + saw_block_newline = true; + skip(lexer); + } else { + skip(lexer); + } + } + } else { + return REJECT; + } + } else { + return ACCEPT; + } + } +} + +static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, bool *scanned_comment) { + lexer->result_symbol = AUTOMATIC_SEMICOLON; + lexer->mark_end(lexer); + + for (;;) { + if (lexer->lookahead == 0) { + return true; + } + + if (lexer->lookahead == '/') { + WhitespaceResult result = scan_whitespace_and_comments(lexer, scanned_comment, false); + if (result == REJECT) { + return false; + } + + if (result == ACCEPT && comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') { + return true; + } + } + + if (lexer->lookahead == '}') { + return true; + } + + if (lexer->is_at_included_range_start(lexer)) { + return true; + } + + if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) { + break; + } + + if (!iswspace(lexer->lookahead)) { + return false; + } + + skip(lexer); + } + + skip(lexer); + + if (scan_whitespace_and_comments(lexer, scanned_comment, true) == REJECT) { + return false; + } + + switch (lexer->lookahead) { + case ',': + case ':': + case ';': + case '*': + case '%': + case '>': + case '<': + case '=': + case '[': + case '(': + case '?': + case '^': + case '|': + case '&': + case '/': + return false; + + // Insert a semicolon before decimals literals but not otherwise. + case '.': + skip(lexer); + return iswdigit(lexer->lookahead); + + // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`. + case '+': + skip(lexer); + return lexer->lookahead == '+'; + case '-': + skip(lexer); + return lexer->lookahead == '-'; + + // Don't insert a semicolon before `!=`, but do insert one before a unary `!`. + case '!': + skip(lexer); + return lexer->lookahead != '='; + + // Don't insert a semicolon before `in` or `instanceof`, but do insert one + // before an identifier. + case 'i': + skip(lexer); + + if (lexer->lookahead != 'n') { + return true; + } + skip(lexer); + + if (!iswalpha(lexer->lookahead)) { + return false; + } + + for (unsigned i = 0; i < 8; i++) { + if (lexer->lookahead != "stanceof"[i]) { + return true; + } + skip(lexer); + } + + if (!iswalpha(lexer->lookahead)) { + return false; + } + break; + + default: + break; + } + + return true; +} + +static bool scan_ternary_qmark(TSLexer *lexer) { + for (;;) { + if (!iswspace(lexer->lookahead)) { + break; + } + skip(lexer); + } + + if (lexer->lookahead == '?') { + advance(lexer); + + if (lexer->lookahead == '?') { + return false; + } + + lexer->mark_end(lexer); + lexer->result_symbol = TERNARY_QMARK; + + if (lexer->lookahead == '.') { + advance(lexer); + if (iswdigit(lexer->lookahead)) { + return true; + } + return false; + } + return true; + } + return false; +} + +static bool scan_html_comment(TSLexer *lexer) { + while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) { + skip(lexer); + } + + const char *comment_start = ""; + + if (lexer->lookahead == '<') { + for (unsigned i = 0; i < 4; i++) { + if (lexer->lookahead != comment_start[i]) { + return false; + } + advance(lexer); + } + } else if (lexer->lookahead == '-') { + for (unsigned i = 0; i < 3; i++) { + if (lexer->lookahead != comment_end[i]) { + return false; + } + advance(lexer); + } + } else { + return false; + } + + while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 && + lexer->lookahead != 0x2029) { + advance(lexer); + } + + lexer->result_symbol = HTML_COMMENT; + lexer->mark_end(lexer); + + return true; +} + +bool tree_sitter_javascript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + if (valid_symbols[TEMPLATE_CHARS]) { + if (valid_symbols[AUTOMATIC_SEMICOLON]) { + return false; + } + return scan_template_chars(lexer); + } + + if (valid_symbols[AUTOMATIC_SEMICOLON]) { + bool scanned_comment = false; + bool ret = scan_automatic_semicolon(lexer, !valid_symbols[LOGICAL_OR], &scanned_comment); + if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') { + return scan_ternary_qmark(lexer); + } + return ret; + } + + if (valid_symbols[TERNARY_QMARK]) { + return scan_ternary_qmark(lexer); + } + + if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] && + !valid_symbols[REGEX_PATTERN]) { + return scan_html_comment(lexer); + } + + return false; +} -- cgit v1.2.3