summaryrefslogtreecommitdiff
path: root/vendor/tree-sitter-kotlin/src/scanner.c
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/tree-sitter-kotlin/src/scanner.c')
-rw-r--r--vendor/tree-sitter-kotlin/src/scanner.c466
1 files changed, 466 insertions, 0 deletions
diff --git a/vendor/tree-sitter-kotlin/src/scanner.c b/vendor/tree-sitter-kotlin/src/scanner.c
new file mode 100644
index 0000000..fe4b0d1
--- /dev/null
+++ b/vendor/tree-sitter-kotlin/src/scanner.c
@@ -0,0 +1,466 @@
+#include "tree_sitter/array.h"
+#include "tree_sitter/parser.h"
+
+#include <wctype.h>
+
+enum TokenType {
+ SEMI,
+ CLASS_MEMBER_SEMI,
+ BLOCK_COMMENT,
+ NOT_IS,
+ IN,
+ Q_DOT,
+ MULTILINE_STRING_CONTENT,
+ CONSTRUCTOR,
+ GET,
+ SET,
+ DOLLAR,
+};
+
+#define MAX_WORD_SIZE 16
+#define MAX_WORDS 16
+
+static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+
+static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+
+static inline bool scan_whitespace_and_comments(TSLexer *lexer) {
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+ return lexer->lookahead != '/';
+}
+
+static bool scan_word(TSLexer *lexer, const char *const word) {
+ for (uint8_t i = 0; word[i] != '\0'; i++) {
+ if (lexer->lookahead != word[i]) {
+ return false;
+ }
+ skip(lexer);
+ }
+ return true;
+}
+
+static bool scan_words(TSLexer *lexer, const char words[MAX_WORDS][MAX_WORD_SIZE], char scanned_word[16],
+ uint8_t *index) {
+ if (!scanned_word[0]) {
+ for (uint8_t i = 0; i < MAX_WORD_SIZE - 1; i++) {
+ if (!iswalpha(lexer->lookahead)) {
+ if (i == 0) {
+ return false;
+ }
+ break;
+ }
+ scanned_word[i] = (char)lexer->lookahead;
+ skip(lexer);
+ }
+ }
+
+ for (uint8_t i = 0; i < MAX_WORDS; i++) {
+ if (strncmp(scanned_word, words[i], MAX_WORD_SIZE) == 0) {
+ if (index != NULL) {
+ *index = i;
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void *tree_sitter_kotlin_external_scanner_create() { return NULL; }
+
+void tree_sitter_kotlin_external_scanner_destroy(void *payload) {}
+
+unsigned tree_sitter_kotlin_external_scanner_serialize(void *payload, char *buffer) { return 0; }
+
+void tree_sitter_kotlin_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {}
+
+bool tree_sitter_kotlin_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+ if (valid_symbols[MULTILINE_STRING_CONTENT]) {
+ bool did_advance = false;
+ lexer->result_symbol = MULTILINE_STRING_CONTENT;
+ while (!lexer->eof(lexer)) {
+ switch (lexer->lookahead) {
+ case '$':
+ lexer->mark_end(lexer);
+ advance(lexer);
+ if (iswalpha(lexer->lookahead) || lexer->lookahead == '{') {
+ return did_advance;
+ }
+ did_advance = true;
+ break;
+ case '"':
+ lexer->mark_end(lexer);
+ // 3 or 4 quotes means we're done
+ advance(lexer);
+ if (lexer->lookahead == '"') {
+ advance(lexer);
+ if (lexer->lookahead == '"') {
+ advance(lexer);
+ if (lexer->lookahead == '"') {
+ advance(lexer);
+ }
+ return did_advance;
+ }
+ }
+ did_advance = true;
+ break;
+ default:
+ advance(lexer);
+ did_advance = true;
+ break;
+ }
+ }
+ }
+
+ if (valid_symbols[SEMI] || valid_symbols[CLASS_MEMBER_SEMI]) {
+ lexer->result_symbol = valid_symbols[SEMI] ? SEMI : CLASS_MEMBER_SEMI;
+ lexer->mark_end(lexer);
+ bool saw_newline = false;
+ for (;;) {
+ if (lexer->eof(lexer)) {
+ return true;
+ }
+
+ if (lexer->lookahead == ';') {
+ advance(lexer);
+ lexer->mark_end(lexer);
+ return true;
+ }
+
+ if (!iswspace(lexer->lookahead)) {
+ break;
+ }
+
+ if (lexer->lookahead == '\n') {
+ skip(lexer);
+ saw_newline = true;
+ break;
+ }
+
+ if (lexer->lookahead == '\r') {
+ skip(lexer);
+
+ if (lexer->lookahead == '\n') {
+ skip(lexer);
+ }
+
+ saw_newline = true;
+ break;
+ }
+
+ skip(lexer);
+ }
+
+ // Skip whitespace and comments
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+ if (lexer->lookahead == '/') {
+ goto comment;
+ }
+
+ if (!saw_newline) {
+ switch (lexer->lookahead) {
+ case '!':
+ skip(lexer);
+ goto continue_not_is_from_semi;
+ case '?':
+ if (valid_symbols[Q_DOT]) {
+ goto q_dot_from_semi;
+ }
+ return false;
+ case 'i':
+ return scan_word(lexer, "import");
+ case ';':
+ advance(lexer);
+ lexer->mark_end(lexer);
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ char scanned_word[16] = {0};
+ _switch:
+ switch (lexer->lookahead) {
+ case ',':
+ case '.':
+ case ':':
+ case '*':
+ case '%':
+ case '>':
+ case '<':
+ case '=':
+ case '{':
+ case '[':
+ case '|':
+ case '&':
+ case '/':
+ return false;
+ // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
+ // Insert before +/-{float}
+ case '+':
+ skip(lexer);
+ if (lexer->lookahead == '+') {
+ return true;
+ }
+ return iswdigit(lexer->lookahead);
+ case '-':
+ skip(lexer);
+ if (lexer->lookahead == '-') {
+ return true;
+ }
+ return iswdigit(lexer->lookahead);
+ // Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
+ case '!':
+ skip(lexer);
+ if (lexer->lookahead == 'i' && valid_symbols[NOT_IS]) {
+ skip(lexer);
+ if (lexer->lookahead == 's') {
+ skip(lexer);
+ if (!iswalnum(lexer->lookahead)) {
+ return true;
+ }
+ }
+ }
+ return lexer->lookahead != '=';
+ case '?':
+ if (valid_symbols[Q_DOT]) {
+ goto q_dot_from_semi;
+ }
+ return true;
+ case 'e':
+ case 'i':
+ case 'g':
+ case 's':
+ case 'p':
+ case 'a':
+ case 'f':
+ case 'o':
+ case 'l':
+ case 'v':
+ case 'n':
+ case 'c':
+ case 'b':
+ case 'w':
+ while (scan_words(lexer,
+ (const char[16][16]){"public", "private", "protected", "internal", "abstract",
+ "final", "open", "override", "lateinit", "vararg", "noinline",
+ "crossinline", "external", "suspend", "inline"},
+ scanned_word, NULL)) {
+ memset(scanned_word, 0, MAX_WORD_SIZE);
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+ }
+
+ uint8_t index = -1;
+ bool res = scan_words(
+ lexer,
+ (const char[16][16]){"else", "in", "instanceof", "get", "set", "constructor", "by", "as", "where"},
+ scanned_word, &index);
+
+ // If `CLASS_MEMBER_SEMI` is valid, we found a secondary constructor and so we want to insert a semi, OR
+ // we found a variable named constructor whose field is being accessed
+ if (index == 5) {
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+ if (valid_symbols[CLASS_MEMBER_SEMI] || lexer->lookahead == '.' || lexer->lookahead == '=') {
+ return true;
+ }
+ }
+ // Ordinarily, we should not insert a semicolon if there is an `else` on the next line,
+ // except for when it's a 'when entry', which has a `->` after the `else`.
+ else if (index == 0) {
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+ if (lexer->lookahead == '-') {
+ skip(lexer);
+ if (lexer->lookahead == '>') {
+ return true;
+ }
+ }
+ }
+ // If `get` was found and the keyword is not valid, return a semi since it's being used as an identifier
+ else if (index == 3 && (!valid_symbols[GET] || lexer->lookahead == '[')) {
+ return true;
+ }
+ // If `set` was found and the keyword is not valid, return a semi since it's being used as an identifier
+ else if (index == 4 && (!valid_symbols[SET] || lexer->lookahead == '[' || lexer->lookahead == '(' ||
+ lexer->lookahead == '.')) {
+ if (lexer->lookahead == '(' && valid_symbols[SET]) {
+ // skip until the closing parenthesis
+ while (lexer->lookahead != ')' && !lexer->eof(lexer)) {
+ skip(lexer);
+ }
+ skip(lexer);
+
+ while (iswspace(lexer->lookahead)) {
+ if (lexer->lookahead == '\n') {
+ return true;
+ }
+ skip(lexer);
+ }
+ return false;
+ }
+ return true;
+ }
+ // If `in` was found and this specific external keyword is valid,
+ // return a semi since it's being used in a range test
+ else if (index == 1 && valid_symbols[IN]) {
+ return true;
+ }
+ return !res;
+ case ';':
+ advance(lexer);
+ lexer->mark_end(lexer);
+ return true;
+ case '@':
+ if (valid_symbols[CONSTRUCTOR]) {
+ while (!iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+ char ctor[12] = "constructor";
+ for (uint8_t i = 0; i < 11; i++) {
+ if (lexer->lookahead != ctor[i]) {
+ return true;
+ }
+ skip(lexer);
+ }
+ return false;
+ }
+ if (valid_symbols[GET] || valid_symbols[SET]) {
+ bool saw_paren = false;
+ while ((saw_paren ? lexer->lookahead != '\n' : !iswspace(lexer->lookahead))) {
+ skip(lexer);
+ if (lexer->lookahead == '(') {
+ saw_paren = true;
+ }
+ if (lexer->lookahead == ')') {
+ saw_paren = false;
+ }
+ }
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+ if (lexer->lookahead == '/') {
+ return true;
+ }
+ goto _switch;
+ }
+ return true;
+
+ default:
+ return true;
+ }
+ }
+
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+
+ if (valid_symbols[NOT_IS]) {
+ if (lexer->lookahead == '!') {
+ advance(lexer);
+ continue_not_is_from_semi:
+ if (lexer->lookahead == 'i') {
+ advance(lexer);
+ if (lexer->lookahead == 's') {
+ advance(lexer);
+ lexer->result_symbol = NOT_IS;
+ lexer->mark_end(lexer);
+ return !iswalnum(lexer->lookahead);
+ }
+ }
+ }
+ }
+
+ if (valid_symbols[IN]) {
+ if (lexer->lookahead == 'i') {
+ advance(lexer);
+ if (lexer->lookahead == 'n') {
+ advance(lexer);
+ lexer->result_symbol = IN;
+ lexer->mark_end(lexer);
+ return !iswalnum(lexer->lookahead);
+ }
+ }
+ }
+
+q_dot_from_semi:
+ if (valid_symbols[Q_DOT]) {
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+ if (lexer->lookahead == '?') {
+ advance(lexer);
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+ if (lexer->lookahead == '.') {
+ advance(lexer);
+ lexer->result_symbol = Q_DOT;
+ lexer->mark_end(lexer);
+ return true;
+ }
+ }
+ }
+
+comment:
+ if (valid_symbols[DOLLAR]) {
+ return false;
+ }
+
+ if (lexer->lookahead == '/') {
+ advance(lexer);
+ if (lexer->lookahead != '*') {
+ return false;
+ }
+ advance(lexer);
+
+ bool after_star = false;
+ unsigned nesting_depth = 1;
+ for (;;) {
+ switch (lexer->lookahead) {
+ case '\0':
+ return false;
+ case '*':
+ advance(lexer);
+ after_star = true;
+ break;
+ case '/':
+ if (after_star) {
+ advance(lexer);
+ after_star = false;
+ nesting_depth--;
+ if (nesting_depth == 0) {
+ lexer->result_symbol = BLOCK_COMMENT;
+ lexer->mark_end(lexer);
+ return true;
+ }
+ } else {
+ advance(lexer);
+ after_star = false;
+ if (lexer->lookahead == '*') {
+ nesting_depth++;
+ advance(lexer);
+ }
+ }
+ break;
+ default:
+ advance(lexer);
+ after_star = false;
+ break;
+ }
+ }
+ }
+
+ return false;
+}