1#include "tree_sitter/parser.h"
  2
  3#include <wctype.h>
  4
  5enum TokenType {
  6    AUTOMATIC_SEMICOLON,
  7    TEMPLATE_CHARS,
  8    TERNARY_QMARK,
  9    HTML_COMMENT,
 10    LOGICAL_OR,
 11    ESCAPE_SEQUENCE,
 12    REGEX_PATTERN,
 13};
 14
 15void *tree_sitter_javascript_external_scanner_create() { return NULL; }
 16
 17void tree_sitter_javascript_external_scanner_destroy(void *p) {}
 18
 19void tree_sitter_javascript_external_scanner_reset(void *p) {}
 20
 21unsigned tree_sitter_javascript_external_scanner_serialize(void *p, char *buffer) { return 0; }
 22
 23void tree_sitter_javascript_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
 24
 25static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
 26
 27static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
 28
 29static bool scan_template_chars(TSLexer *lexer) {
 30    lexer->result_symbol = TEMPLATE_CHARS;
 31    for (bool has_content = false;; has_content = true) {
 32        lexer->mark_end(lexer);
 33        switch (lexer->lookahead) {
 34            case '`':
 35                return has_content;
 36            case '\0':
 37                return false;
 38            case '$':
 39                advance(lexer);
 40                if (lexer->lookahead == '{') {
 41                    return has_content;
 42                }
 43                break;
 44            case '\\':
 45                return has_content;
 46            default:
 47                advance(lexer);
 48        }
 49    }
 50}
 51
 52static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) {
 53    for (;;) {
 54        while (iswspace(lexer->lookahead)) {
 55            skip(lexer);
 56        }
 57
 58        if (lexer->lookahead == '/') {
 59            skip(lexer);
 60
 61            if (lexer->lookahead == '/') {
 62                skip(lexer);
 63                while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
 64                       lexer->lookahead != 0x2029) {
 65                    skip(lexer);
 66                }
 67                *scanned_comment = true;
 68            } else if (lexer->lookahead == '*') {
 69                skip(lexer);
 70                while (lexer->lookahead != 0) {
 71                    if (lexer->lookahead == '*') {
 72                        skip(lexer);
 73                        if (lexer->lookahead == '/') {
 74                            skip(lexer);
 75                            *scanned_comment = true;
 76                            break;
 77                        }
 78                    } else {
 79                        skip(lexer);
 80                    }
 81                }
 82            } else {
 83                return false;
 84            }
 85        } else {
 86            return true;
 87        }
 88    }
 89}
 90
 91static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, bool *scanned_comment) {
 92    lexer->result_symbol = AUTOMATIC_SEMICOLON;
 93    lexer->mark_end(lexer);
 94
 95    for (;;) {
 96        if (lexer->lookahead == 0) {
 97            return true;
 98        }
 99
100        if (lexer->lookahead == '/') {
101            if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
102                return false;
103            }
104            if (comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') {
105                return true;
106            }
107        }
108
109        if (lexer->lookahead == '}') {
110            return true;
111        }
112
113        if (lexer->is_at_included_range_start(lexer)) {
114            return true;
115        }
116
117        if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
118            break;
119        }
120
121        if (!iswspace(lexer->lookahead)) {
122            return false;
123        }
124
125        skip(lexer);
126    }
127
128    skip(lexer);
129
130    if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
131        return false;
132    }
133
134    switch (lexer->lookahead) {
135        case ',':
136        case '.':
137        case ':':
138        case ';':
139        case '*':
140        case '%':
141        case '>':
142        case '<':
143        case '=':
144        case '[':
145        case '(':
146        case '?':
147        case '^':
148        case '|':
149        case '&':
150        case '/':
151            return false;
152
153        // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
154        case '+':
155            skip(lexer);
156            return lexer->lookahead == '+';
157        case '-':
158            skip(lexer);
159            return lexer->lookahead == '-';
160
161        // Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
162        case '!':
163            skip(lexer);
164            return lexer->lookahead != '=';
165
166        // Don't insert a semicolon before `in` or `instanceof`, but do insert one
167        // before an identifier.
168        case 'i':
169            skip(lexer);
170
171            if (lexer->lookahead != 'n') {
172                return true;
173            }
174            skip(lexer);
175
176            if (!iswalpha(lexer->lookahead)) {
177                return false;
178            }
179
180            for (unsigned i = 0; i < 8; i++) {
181                if (lexer->lookahead != "stanceof"[i]) {
182                    return true;
183                }
184                skip(lexer);
185            }
186
187            if (!iswalpha(lexer->lookahead)) {
188                return false;
189            }
190            break;
191
192        default:
193            break;
194    }
195
196    return true;
197}
198
199static bool scan_ternary_qmark(TSLexer *lexer) {
200    for (;;) {
201        if (!iswspace(lexer->lookahead)) {
202            break;
203        }
204        skip(lexer);
205    }
206
207    if (lexer->lookahead == '?') {
208        advance(lexer);
209
210        if (lexer->lookahead == '?') {
211            return false;
212        }
213
214        lexer->mark_end(lexer);
215        lexer->result_symbol = TERNARY_QMARK;
216
217        if (lexer->lookahead == '.') {
218            advance(lexer);
219            if (iswdigit(lexer->lookahead)) {
220                return true;
221            }
222            return false;
223        }
224        return true;
225    }
226    return false;
227}
228
229static bool scan_html_comment(TSLexer *lexer) {
230    while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
231        skip(lexer);
232    }
233
234    const char *comment_start = "<!--";
235    const char *comment_end = "-->";
236
237    if (lexer->lookahead == '<') {
238        for (unsigned i = 0; i < 4; i++) {
239            if (lexer->lookahead != comment_start[i]) {
240                return false;
241            }
242            advance(lexer);
243        }
244    } else if (lexer->lookahead == '-') {
245        for (unsigned i = 0; i < 3; i++) {
246            if (lexer->lookahead != comment_end[i]) {
247                return false;
248            }
249            advance(lexer);
250        }
251    } else {
252        return false;
253    }
254
255    while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
256           lexer->lookahead != 0x2029) {
257        advance(lexer);
258    }
259
260    lexer->result_symbol = HTML_COMMENT;
261    lexer->mark_end(lexer);
262
263    return true;
264}
265
266bool tree_sitter_javascript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
267    if (valid_symbols[TEMPLATE_CHARS]) {
268        if (valid_symbols[AUTOMATIC_SEMICOLON]) {
269            return false;
270        }
271        return scan_template_chars(lexer);
272    }
273
274    if (valid_symbols[AUTOMATIC_SEMICOLON]) {
275        bool scanned_comment = false;
276        bool ret = scan_automatic_semicolon(lexer, !valid_symbols[LOGICAL_OR], &scanned_comment);
277        if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') {
278            return scan_ternary_qmark(lexer);
279        }
280        return ret;
281    }
282
283    if (valid_symbols[TERNARY_QMARK]) {
284        return scan_ternary_qmark(lexer);
285    }
286
287    if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] &&
288        !valid_symbols[REGEX_PATTERN]) {
289        return scan_html_comment(lexer);
290    }
291
292    return false;
293}