summaryrefslogtreecommitdiff
path: root/vendor/tree-sitter-javascript/src/scanner.c
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/tree-sitter-javascript/src/scanner.c')
-rw-r--r--vendor/tree-sitter-javascript/src/scanner.c293
1 files changed, 293 insertions, 0 deletions
diff --git a/vendor/tree-sitter-javascript/src/scanner.c b/vendor/tree-sitter-javascript/src/scanner.c
new file mode 100644
index 0000000..9bea968
--- /dev/null
+++ b/vendor/tree-sitter-javascript/src/scanner.c
@@ -0,0 +1,293 @@
+#include "tree_sitter/parser.h"
+
+#include <wctype.h>
+
+enum TokenType {
+ AUTOMATIC_SEMICOLON,
+ TEMPLATE_CHARS,
+ TERNARY_QMARK,
+ HTML_COMMENT,
+ LOGICAL_OR,
+ ESCAPE_SEQUENCE,
+ REGEX_PATTERN,
+};
+
+void *tree_sitter_javascript_external_scanner_create() { return NULL; }
+
+void tree_sitter_javascript_external_scanner_destroy(void *p) {}
+
+void tree_sitter_javascript_external_scanner_reset(void *p) {}
+
+unsigned tree_sitter_javascript_external_scanner_serialize(void *p, char *buffer) { return 0; }
+
+void tree_sitter_javascript_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
+
+static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+
+static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+
+static bool scan_template_chars(TSLexer *lexer) {
+ lexer->result_symbol = TEMPLATE_CHARS;
+ for (bool has_content = false;; has_content = true) {
+ lexer->mark_end(lexer);
+ switch (lexer->lookahead) {
+ case '`':
+ return has_content;
+ case '\0':
+ return false;
+ case '$':
+ advance(lexer);
+ if (lexer->lookahead == '{') {
+ return has_content;
+ }
+ break;
+ case '\\':
+ return has_content;
+ default:
+ advance(lexer);
+ }
+ }
+}
+
+static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) {
+ for (;;) {
+ while (iswspace(lexer->lookahead)) {
+ skip(lexer);
+ }
+
+ if (lexer->lookahead == '/') {
+ skip(lexer);
+
+ if (lexer->lookahead == '/') {
+ skip(lexer);
+ while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
+ lexer->lookahead != 0x2029) {
+ skip(lexer);
+ }
+ *scanned_comment = true;
+ } else if (lexer->lookahead == '*') {
+ skip(lexer);
+ while (lexer->lookahead != 0) {
+ if (lexer->lookahead == '*') {
+ skip(lexer);
+ if (lexer->lookahead == '/') {
+ skip(lexer);
+ *scanned_comment = true;
+ break;
+ }
+ } else {
+ skip(lexer);
+ }
+ }
+ } else {
+ return false;
+ }
+ } else {
+ return true;
+ }
+ }
+}
+
+static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, bool *scanned_comment) {
+ lexer->result_symbol = AUTOMATIC_SEMICOLON;
+ lexer->mark_end(lexer);
+
+ for (;;) {
+ if (lexer->lookahead == 0) {
+ return true;
+ }
+
+ if (lexer->lookahead == '/') {
+ if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
+ return false;
+ }
+ if (comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') {
+ return true;
+ }
+ }
+
+ if (lexer->lookahead == '}') {
+ return true;
+ }
+
+ if (lexer->is_at_included_range_start(lexer)) {
+ return true;
+ }
+
+ if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
+ break;
+ }
+
+ if (!iswspace(lexer->lookahead)) {
+ return false;
+ }
+
+ skip(lexer);
+ }
+
+ skip(lexer);
+
+ if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
+ return false;
+ }
+
+ switch (lexer->lookahead) {
+ case ',':
+ case '.':
+ case ':':
+ case ';':
+ case '*':
+ case '%':
+ case '>':
+ case '<':
+ case '=':
+ case '[':
+ case '(':
+ case '?':
+ case '^':
+ case '|':
+ case '&':
+ case '/':
+ return false;
+
+ // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
+ case '+':
+ skip(lexer);
+ return lexer->lookahead == '+';
+ case '-':
+ skip(lexer);
+ return lexer->lookahead == '-';
+
+ // Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
+ case '!':
+ skip(lexer);
+ return lexer->lookahead != '=';
+
+ // Don't insert a semicolon before `in` or `instanceof`, but do insert one
+ // before an identifier.
+ case 'i':
+ skip(lexer);
+
+ if (lexer->lookahead != 'n') {
+ return true;
+ }
+ skip(lexer);
+
+ if (!iswalpha(lexer->lookahead)) {
+ return false;
+ }
+
+ for (unsigned i = 0; i < 8; i++) {
+ if (lexer->lookahead != "stanceof"[i]) {
+ return true;
+ }
+ skip(lexer);
+ }
+
+ if (!iswalpha(lexer->lookahead)) {
+ return false;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return true;
+}
+
+static bool scan_ternary_qmark(TSLexer *lexer) {
+ for (;;) {
+ if (!iswspace(lexer->lookahead)) {
+ break;
+ }
+ skip(lexer);
+ }
+
+ if (lexer->lookahead == '?') {
+ advance(lexer);
+
+ if (lexer->lookahead == '?') {
+ return false;
+ }
+
+ lexer->mark_end(lexer);
+ lexer->result_symbol = TERNARY_QMARK;
+
+ if (lexer->lookahead == '.') {
+ advance(lexer);
+ if (iswdigit(lexer->lookahead)) {
+ return true;
+ }
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+static bool scan_html_comment(TSLexer *lexer) {
+ while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
+ skip(lexer);
+ }
+
+ const char *comment_start = "<!--";
+ const char *comment_end = "-->";
+
+ if (lexer->lookahead == '<') {
+ for (unsigned i = 0; i < 4; i++) {
+ if (lexer->lookahead != comment_start[i]) {
+ return false;
+ }
+ advance(lexer);
+ }
+ } else if (lexer->lookahead == '-') {
+ for (unsigned i = 0; i < 3; i++) {
+ if (lexer->lookahead != comment_end[i]) {
+ return false;
+ }
+ advance(lexer);
+ }
+ } else {
+ return false;
+ }
+
+ while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
+ lexer->lookahead != 0x2029) {
+ advance(lexer);
+ }
+
+ lexer->result_symbol = HTML_COMMENT;
+ lexer->mark_end(lexer);
+
+ return true;
+}
+
+bool tree_sitter_javascript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+ if (valid_symbols[TEMPLATE_CHARS]) {
+ if (valid_symbols[AUTOMATIC_SEMICOLON]) {
+ return false;
+ }
+ return scan_template_chars(lexer);
+ }
+
+ if (valid_symbols[AUTOMATIC_SEMICOLON]) {
+ bool scanned_comment = false;
+ bool ret = scan_automatic_semicolon(lexer, !valid_symbols[LOGICAL_OR], &scanned_comment);
+ if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') {
+ return scan_ternary_qmark(lexer);
+ }
+ return ret;
+ }
+
+ if (valid_symbols[TERNARY_QMARK]) {
+ return scan_ternary_qmark(lexer);
+ }
+
+ if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] &&
+ !valid_symbols[REGEX_PATTERN]) {
+ return scan_html_comment(lexer);
+ }
+
+ return false;
+}