diff options
Diffstat (limited to 'vendor/tree-sitter-javascript/src/scanner.c')
| -rw-r--r-- | vendor/tree-sitter-javascript/src/scanner.c | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/vendor/tree-sitter-javascript/src/scanner.c b/vendor/tree-sitter-javascript/src/scanner.c new file mode 100644 index 0000000..9bea968 --- /dev/null +++ b/vendor/tree-sitter-javascript/src/scanner.c @@ -0,0 +1,293 @@ +#include "tree_sitter/parser.h" + +#include <wctype.h> + +enum TokenType { + AUTOMATIC_SEMICOLON, + TEMPLATE_CHARS, + TERNARY_QMARK, + HTML_COMMENT, + LOGICAL_OR, + ESCAPE_SEQUENCE, + REGEX_PATTERN, +}; + +void *tree_sitter_javascript_external_scanner_create() { return NULL; } + +void tree_sitter_javascript_external_scanner_destroy(void *p) {} + +void tree_sitter_javascript_external_scanner_reset(void *p) {} + +unsigned tree_sitter_javascript_external_scanner_serialize(void *p, char *buffer) { return 0; } + +void tree_sitter_javascript_external_scanner_deserialize(void *p, const char *b, unsigned n) {} + +static void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +static bool scan_template_chars(TSLexer *lexer) { + lexer->result_symbol = TEMPLATE_CHARS; + for (bool has_content = false;; has_content = true) { + lexer->mark_end(lexer); + switch (lexer->lookahead) { + case '`': + return has_content; + case '\0': + return false; + case '$': + advance(lexer); + if (lexer->lookahead == '{') { + return has_content; + } + break; + case '\\': + return has_content; + default: + advance(lexer); + } + } +} + +static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) { + for (;;) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (lexer->lookahead == '/') { + skip(lexer); + + if (lexer->lookahead == '/') { + skip(lexer); + while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 && + lexer->lookahead != 0x2029) { + skip(lexer); + } + *scanned_comment = true; + } else if (lexer->lookahead == '*') { + skip(lexer); + while (lexer->lookahead != 0) { + if (lexer->lookahead == '*') { + skip(lexer); + if (lexer->lookahead == '/') { + skip(lexer); + *scanned_comment = true; + break; + } + } else { + skip(lexer); + } + } + } else { + return false; + } + } else { + return true; + } + } +} + +static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, bool *scanned_comment) { + lexer->result_symbol = AUTOMATIC_SEMICOLON; + lexer->mark_end(lexer); + + for (;;) { + if (lexer->lookahead == 0) { + return true; + } + + if (lexer->lookahead == '/') { + if (!scan_whitespace_and_comments(lexer, scanned_comment)) { + return false; + } + if (comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') { + return true; + } + } + + if (lexer->lookahead == '}') { + return true; + } + + if (lexer->is_at_included_range_start(lexer)) { + return true; + } + + if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) { + break; + } + + if (!iswspace(lexer->lookahead)) { + return false; + } + + skip(lexer); + } + + skip(lexer); + + if (!scan_whitespace_and_comments(lexer, scanned_comment)) { + return false; + } + + switch (lexer->lookahead) { + case ',': + case '.': + case ':': + case ';': + case '*': + case '%': + case '>': + case '<': + case '=': + case '[': + case '(': + case '?': + case '^': + case '|': + case '&': + case '/': + return false; + + // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`. + case '+': + skip(lexer); + return lexer->lookahead == '+'; + case '-': + skip(lexer); + return lexer->lookahead == '-'; + + // Don't insert a semicolon before `!=`, but do insert one before a unary `!`. + case '!': + skip(lexer); + return lexer->lookahead != '='; + + // Don't insert a semicolon before `in` or `instanceof`, but do insert one + // before an identifier. + case 'i': + skip(lexer); + + if (lexer->lookahead != 'n') { + return true; + } + skip(lexer); + + if (!iswalpha(lexer->lookahead)) { + return false; + } + + for (unsigned i = 0; i < 8; i++) { + if (lexer->lookahead != "stanceof"[i]) { + return true; + } + skip(lexer); + } + + if (!iswalpha(lexer->lookahead)) { + return false; + } + break; + + default: + break; + } + + return true; +} + +static bool scan_ternary_qmark(TSLexer *lexer) { + for (;;) { + if (!iswspace(lexer->lookahead)) { + break; + } + skip(lexer); + } + + if (lexer->lookahead == '?') { + advance(lexer); + + if (lexer->lookahead == '?') { + return false; + } + + lexer->mark_end(lexer); + lexer->result_symbol = TERNARY_QMARK; + + if (lexer->lookahead == '.') { + advance(lexer); + if (iswdigit(lexer->lookahead)) { + return true; + } + return false; + } + return true; + } + return false; +} + +static bool scan_html_comment(TSLexer *lexer) { + while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) { + skip(lexer); + } + + const char *comment_start = "<!--"; + const char *comment_end = "-->"; + + if (lexer->lookahead == '<') { + for (unsigned i = 0; i < 4; i++) { + if (lexer->lookahead != comment_start[i]) { + return false; + } + advance(lexer); + } + } else if (lexer->lookahead == '-') { + for (unsigned i = 0; i < 3; i++) { + if (lexer->lookahead != comment_end[i]) { + return false; + } + advance(lexer); + } + } else { + return false; + } + + while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 && + lexer->lookahead != 0x2029) { + advance(lexer); + } + + lexer->result_symbol = HTML_COMMENT; + lexer->mark_end(lexer); + + return true; +} + +bool tree_sitter_javascript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + if (valid_symbols[TEMPLATE_CHARS]) { + if (valid_symbols[AUTOMATIC_SEMICOLON]) { + return false; + } + return scan_template_chars(lexer); + } + + if (valid_symbols[AUTOMATIC_SEMICOLON]) { + bool scanned_comment = false; + bool ret = scan_automatic_semicolon(lexer, !valid_symbols[LOGICAL_OR], &scanned_comment); + if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') { + return scan_ternary_qmark(lexer); + } + return ret; + } + + if (valid_symbols[TERNARY_QMARK]) { + return scan_ternary_qmark(lexer); + } + + if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] && + !valid_symbols[REGEX_PATTERN]) { + return scan_html_comment(lexer); + } + + return false; +} |
