diff options
Diffstat (limited to 'vendor/tree-sitter-kotlin/src/scanner.c')
| -rw-r--r-- | vendor/tree-sitter-kotlin/src/scanner.c | 466 |
1 files changed, 466 insertions, 0 deletions
diff --git a/vendor/tree-sitter-kotlin/src/scanner.c b/vendor/tree-sitter-kotlin/src/scanner.c new file mode 100644 index 0000000..fe4b0d1 --- /dev/null +++ b/vendor/tree-sitter-kotlin/src/scanner.c @@ -0,0 +1,466 @@ +#include "tree_sitter/array.h" +#include "tree_sitter/parser.h" + +#include <wctype.h> + +enum TokenType { + SEMI, + CLASS_MEMBER_SEMI, + BLOCK_COMMENT, + NOT_IS, + IN, + Q_DOT, + MULTILINE_STRING_CONTENT, + CONSTRUCTOR, + GET, + SET, + DOLLAR, +}; + +#define MAX_WORD_SIZE 16 +#define MAX_WORDS 16 + +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +static inline bool scan_whitespace_and_comments(TSLexer *lexer) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + return lexer->lookahead != '/'; +} + +static bool scan_word(TSLexer *lexer, const char *const word) { + for (uint8_t i = 0; word[i] != '\0'; i++) { + if (lexer->lookahead != word[i]) { + return false; + } + skip(lexer); + } + return true; +} + +static bool scan_words(TSLexer *lexer, const char words[MAX_WORDS][MAX_WORD_SIZE], char scanned_word[16], + uint8_t *index) { + if (!scanned_word[0]) { + for (uint8_t i = 0; i < MAX_WORD_SIZE - 1; i++) { + if (!iswalpha(lexer->lookahead)) { + if (i == 0) { + return false; + } + break; + } + scanned_word[i] = (char)lexer->lookahead; + skip(lexer); + } + } + + for (uint8_t i = 0; i < MAX_WORDS; i++) { + if (strncmp(scanned_word, words[i], MAX_WORD_SIZE) == 0) { + if (index != NULL) { + *index = i; + } + return true; + } + } + + return false; +} + +void *tree_sitter_kotlin_external_scanner_create() { return NULL; } + +void tree_sitter_kotlin_external_scanner_destroy(void *payload) {} + +unsigned tree_sitter_kotlin_external_scanner_serialize(void *payload, char *buffer) { return 0; } + +void tree_sitter_kotlin_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {} + +bool tree_sitter_kotlin_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + if (valid_symbols[MULTILINE_STRING_CONTENT]) { + bool did_advance = false; + lexer->result_symbol = MULTILINE_STRING_CONTENT; + while (!lexer->eof(lexer)) { + switch (lexer->lookahead) { + case '$': + lexer->mark_end(lexer); + advance(lexer); + if (iswalpha(lexer->lookahead) || lexer->lookahead == '{') { + return did_advance; + } + did_advance = true; + break; + case '"': + lexer->mark_end(lexer); + // 3 or 4 quotes means we're done + advance(lexer); + if (lexer->lookahead == '"') { + advance(lexer); + if (lexer->lookahead == '"') { + advance(lexer); + if (lexer->lookahead == '"') { + advance(lexer); + } + return did_advance; + } + } + did_advance = true; + break; + default: + advance(lexer); + did_advance = true; + break; + } + } + } + + if (valid_symbols[SEMI] || valid_symbols[CLASS_MEMBER_SEMI]) { + lexer->result_symbol = valid_symbols[SEMI] ? SEMI : CLASS_MEMBER_SEMI; + lexer->mark_end(lexer); + bool saw_newline = false; + for (;;) { + if (lexer->eof(lexer)) { + return true; + } + + if (lexer->lookahead == ';') { + advance(lexer); + lexer->mark_end(lexer); + return true; + } + + if (!iswspace(lexer->lookahead)) { + break; + } + + if (lexer->lookahead == '\n') { + skip(lexer); + saw_newline = true; + break; + } + + if (lexer->lookahead == '\r') { + skip(lexer); + + if (lexer->lookahead == '\n') { + skip(lexer); + } + + saw_newline = true; + break; + } + + skip(lexer); + } + + // Skip whitespace and comments + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + if (lexer->lookahead == '/') { + goto comment; + } + + if (!saw_newline) { + switch (lexer->lookahead) { + case '!': + skip(lexer); + goto continue_not_is_from_semi; + case '?': + if (valid_symbols[Q_DOT]) { + goto q_dot_from_semi; + } + return false; + case 'i': + return scan_word(lexer, "import"); + case ';': + advance(lexer); + lexer->mark_end(lexer); + return true; + default: + return false; + } + } + + char scanned_word[16] = {0}; + _switch: + switch (lexer->lookahead) { + case ',': + case '.': + case ':': + case '*': + case '%': + case '>': + case '<': + case '=': + case '{': + case '[': + case '|': + case '&': + case '/': + return false; + // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`. + // Insert before +/-{float} + case '+': + skip(lexer); + if (lexer->lookahead == '+') { + return true; + } + return iswdigit(lexer->lookahead); + case '-': + skip(lexer); + if (lexer->lookahead == '-') { + return true; + } + return iswdigit(lexer->lookahead); + // Don't insert a semicolon before `!=`, but do insert one before a unary `!`. + case '!': + skip(lexer); + if (lexer->lookahead == 'i' && valid_symbols[NOT_IS]) { + skip(lexer); + if (lexer->lookahead == 's') { + skip(lexer); + if (!iswalnum(lexer->lookahead)) { + return true; + } + } + } + return lexer->lookahead != '='; + case '?': + if (valid_symbols[Q_DOT]) { + goto q_dot_from_semi; + } + return true; + case 'e': + case 'i': + case 'g': + case 's': + case 'p': + case 'a': + case 'f': + case 'o': + case 'l': + case 'v': + case 'n': + case 'c': + case 'b': + case 'w': + while (scan_words(lexer, + (const char[16][16]){"public", "private", "protected", "internal", "abstract", + "final", "open", "override", "lateinit", "vararg", "noinline", + "crossinline", "external", "suspend", "inline"}, + scanned_word, NULL)) { + memset(scanned_word, 0, MAX_WORD_SIZE); + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + } + + uint8_t index = -1; + bool res = scan_words( + lexer, + (const char[16][16]){"else", "in", "instanceof", "get", "set", "constructor", "by", "as", "where"}, + scanned_word, &index); + + // If `CLASS_MEMBER_SEMI` is valid, we found a secondary constructor and so we want to insert a semi, OR + // we found a variable named constructor whose field is being accessed + if (index == 5) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + if (valid_symbols[CLASS_MEMBER_SEMI] || lexer->lookahead == '.' || lexer->lookahead == '=') { + return true; + } + } + // Ordinarily, we should not insert a semicolon if there is an `else` on the next line, + // except for when it's a 'when entry', which has a `->` after the `else`. + else if (index == 0) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + if (lexer->lookahead == '-') { + skip(lexer); + if (lexer->lookahead == '>') { + return true; + } + } + } + // If `get` was found and the keyword is not valid, return a semi since it's being used as an identifier + else if (index == 3 && (!valid_symbols[GET] || lexer->lookahead == '[')) { + return true; + } + // If `set` was found and the keyword is not valid, return a semi since it's being used as an identifier + else if (index == 4 && (!valid_symbols[SET] || lexer->lookahead == '[' || lexer->lookahead == '(' || + lexer->lookahead == '.')) { + if (lexer->lookahead == '(' && valid_symbols[SET]) { + // skip until the closing parenthesis + while (lexer->lookahead != ')' && !lexer->eof(lexer)) { + skip(lexer); + } + skip(lexer); + + while (iswspace(lexer->lookahead)) { + if (lexer->lookahead == '\n') { + return true; + } + skip(lexer); + } + return false; + } + return true; + } + // If `in` was found and this specific external keyword is valid, + // return a semi since it's being used in a range test + else if (index == 1 && valid_symbols[IN]) { + return true; + } + return !res; + case ';': + advance(lexer); + lexer->mark_end(lexer); + return true; + case '@': + if (valid_symbols[CONSTRUCTOR]) { + while (!iswspace(lexer->lookahead)) { + skip(lexer); + } + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + char ctor[12] = "constructor"; + for (uint8_t i = 0; i < 11; i++) { + if (lexer->lookahead != ctor[i]) { + return true; + } + skip(lexer); + } + return false; + } + if (valid_symbols[GET] || valid_symbols[SET]) { + bool saw_paren = false; + while ((saw_paren ? lexer->lookahead != '\n' : !iswspace(lexer->lookahead))) { + skip(lexer); + if (lexer->lookahead == '(') { + saw_paren = true; + } + if (lexer->lookahead == ')') { + saw_paren = false; + } + } + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + if (lexer->lookahead == '/') { + return true; + } + goto _switch; + } + return true; + + default: + return true; + } + } + + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (valid_symbols[NOT_IS]) { + if (lexer->lookahead == '!') { + advance(lexer); + continue_not_is_from_semi: + if (lexer->lookahead == 'i') { + advance(lexer); + if (lexer->lookahead == 's') { + advance(lexer); + lexer->result_symbol = NOT_IS; + lexer->mark_end(lexer); + return !iswalnum(lexer->lookahead); + } + } + } + } + + if (valid_symbols[IN]) { + if (lexer->lookahead == 'i') { + advance(lexer); + if (lexer->lookahead == 'n') { + advance(lexer); + lexer->result_symbol = IN; + lexer->mark_end(lexer); + return !iswalnum(lexer->lookahead); + } + } + } + +q_dot_from_semi: + if (valid_symbols[Q_DOT]) { + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + if (lexer->lookahead == '?') { + advance(lexer); + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + if (lexer->lookahead == '.') { + advance(lexer); + lexer->result_symbol = Q_DOT; + lexer->mark_end(lexer); + return true; + } + } + } + +comment: + if (valid_symbols[DOLLAR]) { + return false; + } + + if (lexer->lookahead == '/') { + advance(lexer); + if (lexer->lookahead != '*') { + return false; + } + advance(lexer); + + bool after_star = false; + unsigned nesting_depth = 1; + for (;;) { + switch (lexer->lookahead) { + case '\0': + return false; + case '*': + advance(lexer); + after_star = true; + break; + case '/': + if (after_star) { + advance(lexer); + after_star = false; + nesting_depth--; + if (nesting_depth == 0) { + lexer->result_symbol = BLOCK_COMMENT; + lexer->mark_end(lexer); + return true; + } + } else { + advance(lexer); + after_star = false; + if (lexer->lookahead == '*') { + nesting_depth++; + advance(lexer); + } + } + break; + default: + advance(lexer); + after_star = false; + break; + } + } + } + + return false; +} |
