aboutsummaryrefslogtreecommitdiff
path: root/vendor/tree-sitter-javascript/src/scanner.c
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/tree-sitter-javascript/src/scanner.c')
-rw-r--r--vendor/tree-sitter-javascript/src/scanner.c293
1 files changed, 293 insertions, 0 deletions
diff --git a/vendor/tree-sitter-javascript/src/scanner.c b/vendor/tree-sitter-javascript/src/scanner.c
new file mode 100644
index 0000000..9bea968
--- /dev/null
+++ b/vendor/tree-sitter-javascript/src/scanner.c
@@ -0,0 +1,293 @@
1#include "tree_sitter/parser.h"
2
3#include <wctype.h>
4
5enum TokenType {
6 AUTOMATIC_SEMICOLON,
7 TEMPLATE_CHARS,
8 TERNARY_QMARK,
9 HTML_COMMENT,
10 LOGICAL_OR,
11 ESCAPE_SEQUENCE,
12 REGEX_PATTERN,
13};
14
15void *tree_sitter_javascript_external_scanner_create() { return NULL; }
16
17void tree_sitter_javascript_external_scanner_destroy(void *p) {}
18
19void tree_sitter_javascript_external_scanner_reset(void *p) {}
20
21unsigned tree_sitter_javascript_external_scanner_serialize(void *p, char *buffer) { return 0; }
22
23void tree_sitter_javascript_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
24
25static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
26
27static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
28
29static bool scan_template_chars(TSLexer *lexer) {
30 lexer->result_symbol = TEMPLATE_CHARS;
31 for (bool has_content = false;; has_content = true) {
32 lexer->mark_end(lexer);
33 switch (lexer->lookahead) {
34 case '`':
35 return has_content;
36 case '\0':
37 return false;
38 case '$':
39 advance(lexer);
40 if (lexer->lookahead == '{') {
41 return has_content;
42 }
43 break;
44 case '\\':
45 return has_content;
46 default:
47 advance(lexer);
48 }
49 }
50}
51
52static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) {
53 for (;;) {
54 while (iswspace(lexer->lookahead)) {
55 skip(lexer);
56 }
57
58 if (lexer->lookahead == '/') {
59 skip(lexer);
60
61 if (lexer->lookahead == '/') {
62 skip(lexer);
63 while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
64 lexer->lookahead != 0x2029) {
65 skip(lexer);
66 }
67 *scanned_comment = true;
68 } else if (lexer->lookahead == '*') {
69 skip(lexer);
70 while (lexer->lookahead != 0) {
71 if (lexer->lookahead == '*') {
72 skip(lexer);
73 if (lexer->lookahead == '/') {
74 skip(lexer);
75 *scanned_comment = true;
76 break;
77 }
78 } else {
79 skip(lexer);
80 }
81 }
82 } else {
83 return false;
84 }
85 } else {
86 return true;
87 }
88 }
89}
90
91static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, bool *scanned_comment) {
92 lexer->result_symbol = AUTOMATIC_SEMICOLON;
93 lexer->mark_end(lexer);
94
95 for (;;) {
96 if (lexer->lookahead == 0) {
97 return true;
98 }
99
100 if (lexer->lookahead == '/') {
101 if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
102 return false;
103 }
104 if (comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') {
105 return true;
106 }
107 }
108
109 if (lexer->lookahead == '}') {
110 return true;
111 }
112
113 if (lexer->is_at_included_range_start(lexer)) {
114 return true;
115 }
116
117 if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
118 break;
119 }
120
121 if (!iswspace(lexer->lookahead)) {
122 return false;
123 }
124
125 skip(lexer);
126 }
127
128 skip(lexer);
129
130 if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
131 return false;
132 }
133
134 switch (lexer->lookahead) {
135 case ',':
136 case '.':
137 case ':':
138 case ';':
139 case '*':
140 case '%':
141 case '>':
142 case '<':
143 case '=':
144 case '[':
145 case '(':
146 case '?':
147 case '^':
148 case '|':
149 case '&':
150 case '/':
151 return false;
152
153 // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
154 case '+':
155 skip(lexer);
156 return lexer->lookahead == '+';
157 case '-':
158 skip(lexer);
159 return lexer->lookahead == '-';
160
161 // Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
162 case '!':
163 skip(lexer);
164 return lexer->lookahead != '=';
165
166 // Don't insert a semicolon before `in` or `instanceof`, but do insert one
167 // before an identifier.
168 case 'i':
169 skip(lexer);
170
171 if (lexer->lookahead != 'n') {
172 return true;
173 }
174 skip(lexer);
175
176 if (!iswalpha(lexer->lookahead)) {
177 return false;
178 }
179
180 for (unsigned i = 0; i < 8; i++) {
181 if (lexer->lookahead != "stanceof"[i]) {
182 return true;
183 }
184 skip(lexer);
185 }
186
187 if (!iswalpha(lexer->lookahead)) {
188 return false;
189 }
190 break;
191
192 default:
193 break;
194 }
195
196 return true;
197}
198
199static bool scan_ternary_qmark(TSLexer *lexer) {
200 for (;;) {
201 if (!iswspace(lexer->lookahead)) {
202 break;
203 }
204 skip(lexer);
205 }
206
207 if (lexer->lookahead == '?') {
208 advance(lexer);
209
210 if (lexer->lookahead == '?') {
211 return false;
212 }
213
214 lexer->mark_end(lexer);
215 lexer->result_symbol = TERNARY_QMARK;
216
217 if (lexer->lookahead == '.') {
218 advance(lexer);
219 if (iswdigit(lexer->lookahead)) {
220 return true;
221 }
222 return false;
223 }
224 return true;
225 }
226 return false;
227}
228
229static bool scan_html_comment(TSLexer *lexer) {
230 while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
231 skip(lexer);
232 }
233
234 const char *comment_start = "<!--";
235 const char *comment_end = "-->";
236
237 if (lexer->lookahead == '<') {
238 for (unsigned i = 0; i < 4; i++) {
239 if (lexer->lookahead != comment_start[i]) {
240 return false;
241 }
242 advance(lexer);
243 }
244 } else if (lexer->lookahead == '-') {
245 for (unsigned i = 0; i < 3; i++) {
246 if (lexer->lookahead != comment_end[i]) {
247 return false;
248 }
249 advance(lexer);
250 }
251 } else {
252 return false;
253 }
254
255 while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
256 lexer->lookahead != 0x2029) {
257 advance(lexer);
258 }
259
260 lexer->result_symbol = HTML_COMMENT;
261 lexer->mark_end(lexer);
262
263 return true;
264}
265
266bool tree_sitter_javascript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
267 if (valid_symbols[TEMPLATE_CHARS]) {
268 if (valid_symbols[AUTOMATIC_SEMICOLON]) {
269 return false;
270 }
271 return scan_template_chars(lexer);
272 }
273
274 if (valid_symbols[AUTOMATIC_SEMICOLON]) {
275 bool scanned_comment = false;
276 bool ret = scan_automatic_semicolon(lexer, !valid_symbols[LOGICAL_OR], &scanned_comment);
277 if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') {
278 return scan_ternary_qmark(lexer);
279 }
280 return ret;
281 }
282
283 if (valid_symbols[TERNARY_QMARK]) {
284 return scan_ternary_qmark(lexer);
285 }
286
287 if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] &&
288 !valid_symbols[REGEX_PATTERN]) {
289 return scan_html_comment(lexer);
290 }
291
292 return false;
293}