aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c')
-rw-r--r--vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c316
1 files changed, 316 insertions, 0 deletions
diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c b/vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c
new file mode 100644
index 0000000..60ed40c
--- /dev/null
+++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/javascript/scanner.c
@@ -0,0 +1,316 @@
1#include "parser.h"
2
3#include <wctype.h>
4
5enum TokenType {
6 AUTOMATIC_SEMICOLON,
7 TEMPLATE_CHARS,
8 TERNARY_QMARK,
9 HTML_COMMENT,
10 LOGICAL_OR,
11 ESCAPE_SEQUENCE,
12 REGEX_PATTERN,
13};
14
15void *tree_sitter_javascript_external_scanner_create() { return NULL; }
16
17void tree_sitter_javascript_external_scanner_destroy(void *p) {}
18
19unsigned tree_sitter_javascript_external_scanner_serialize(void *p, char *buffer) { return 0; }
20
21void tree_sitter_javascript_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
22
23static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
24
25static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
26
27static bool scan_template_chars(TSLexer *lexer) {
28 lexer->result_symbol = TEMPLATE_CHARS;
29 for (bool has_content = false;; has_content = true) {
30 lexer->mark_end(lexer);
31 switch (lexer->lookahead) {
32 case '`':
33 return has_content;
34 case '\0':
35 return false;
36 case '$':
37 advance(lexer);
38 if (lexer->lookahead == '{') {
39 return has_content;
40 }
41 break;
42 case '\\':
43 return has_content;
44 default:
45 advance(lexer);
46 }
47 }
48}
49
50typedef enum {
51 REJECT, // Semicolon is illegal, ie a syntax error occurred
52 NO_NEWLINE, // Unclear if semicolon will be legal, continue
53 ACCEPT, // Semicolon is legal, assuming a comment was encountered
54} WhitespaceResult;
55
56/**
57 * @param consume If false, only consume enough to check if comment indicates semicolon-legality
58 */
59static WhitespaceResult scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, bool consume) {
60 bool saw_block_newline = false;
61
62 for (;;) {
63 while (iswspace(lexer->lookahead)) {
64 skip(lexer);
65 }
66
67 if (lexer->lookahead == '/') {
68 skip(lexer);
69
70 if (lexer->lookahead == '/') {
71 skip(lexer);
72 while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
73 lexer->lookahead != 0x2029) {
74 skip(lexer);
75 }
76 *scanned_comment = true;
77 } else if (lexer->lookahead == '*') {
78 skip(lexer);
79 while (lexer->lookahead != 0) {
80 if (lexer->lookahead == '*') {
81 skip(lexer);
82 if (lexer->lookahead == '/') {
83 skip(lexer);
84 *scanned_comment = true;
85
86 if (lexer->lookahead != '/' && !consume) {
87 return saw_block_newline ? ACCEPT : NO_NEWLINE;
88 }
89
90 break;
91 }
92 } else if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
93 saw_block_newline = true;
94 skip(lexer);
95 } else {
96 skip(lexer);
97 }
98 }
99 } else {
100 return REJECT;
101 }
102 } else {
103 return ACCEPT;
104 }
105 }
106}
107
108static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, bool *scanned_comment) {
109 lexer->result_symbol = AUTOMATIC_SEMICOLON;
110 lexer->mark_end(lexer);
111
112 for (;;) {
113 if (lexer->lookahead == 0) {
114 return true;
115 }
116
117 if (lexer->lookahead == '/') {
118 WhitespaceResult result = scan_whitespace_and_comments(lexer, scanned_comment, false);
119 if (result == REJECT) {
120 return false;
121 }
122
123 if (result == ACCEPT && comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') {
124 return true;
125 }
126 }
127
128 if (lexer->lookahead == '}') {
129 return true;
130 }
131
132 if (lexer->is_at_included_range_start(lexer)) {
133 return true;
134 }
135
136 if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
137 break;
138 }
139
140 if (!iswspace(lexer->lookahead)) {
141 return false;
142 }
143
144 skip(lexer);
145 }
146
147 skip(lexer);
148
149 if (scan_whitespace_and_comments(lexer, scanned_comment, true) == REJECT) {
150 return false;
151 }
152
153 switch (lexer->lookahead) {
154 case ',':
155 case ':':
156 case ';':
157 case '*':
158 case '%':
159 case '>':
160 case '<':
161 case '=':
162 case '[':
163 case '(':
164 case '?':
165 case '^':
166 case '|':
167 case '&':
168 case '/':
169 return false;
170
171 // Insert a semicolon before decimals literals but not otherwise.
172 case '.':
173 skip(lexer);
174 return iswdigit(lexer->lookahead);
175
176 // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
177 case '+':
178 skip(lexer);
179 return lexer->lookahead == '+';
180 case '-':
181 skip(lexer);
182 return lexer->lookahead == '-';
183
184 // Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
185 case '!':
186 skip(lexer);
187 return lexer->lookahead != '=';
188
189 // Don't insert a semicolon before `in` or `instanceof`, but do insert one
190 // before an identifier.
191 case 'i':
192 skip(lexer);
193
194 if (lexer->lookahead != 'n') {
195 return true;
196 }
197 skip(lexer);
198
199 if (!iswalpha(lexer->lookahead)) {
200 return false;
201 }
202
203 for (unsigned i = 0; i < 8; i++) {
204 if (lexer->lookahead != "stanceof"[i]) {
205 return true;
206 }
207 skip(lexer);
208 }
209
210 if (!iswalpha(lexer->lookahead)) {
211 return false;
212 }
213 break;
214
215 default:
216 break;
217 }
218
219 return true;
220}
221
222static bool scan_ternary_qmark(TSLexer *lexer) {
223 for (;;) {
224 if (!iswspace(lexer->lookahead)) {
225 break;
226 }
227 skip(lexer);
228 }
229
230 if (lexer->lookahead == '?') {
231 advance(lexer);
232
233 if (lexer->lookahead == '?') {
234 return false;
235 }
236
237 lexer->mark_end(lexer);
238 lexer->result_symbol = TERNARY_QMARK;
239
240 if (lexer->lookahead == '.') {
241 advance(lexer);
242 if (iswdigit(lexer->lookahead)) {
243 return true;
244 }
245 return false;
246 }
247 return true;
248 }
249 return false;
250}
251
252static bool scan_html_comment(TSLexer *lexer) {
253 while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
254 skip(lexer);
255 }
256
257 const char *comment_start = "<!--";
258 const char *comment_end = "-->";
259
260 if (lexer->lookahead == '<') {
261 for (unsigned i = 0; i < 4; i++) {
262 if (lexer->lookahead != comment_start[i]) {
263 return false;
264 }
265 advance(lexer);
266 }
267 } else if (lexer->lookahead == '-') {
268 for (unsigned i = 0; i < 3; i++) {
269 if (lexer->lookahead != comment_end[i]) {
270 return false;
271 }
272 advance(lexer);
273 }
274 } else {
275 return false;
276 }
277
278 while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
279 lexer->lookahead != 0x2029) {
280 advance(lexer);
281 }
282
283 lexer->result_symbol = HTML_COMMENT;
284 lexer->mark_end(lexer);
285
286 return true;
287}
288
289bool tree_sitter_javascript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
290 if (valid_symbols[TEMPLATE_CHARS]) {
291 if (valid_symbols[AUTOMATIC_SEMICOLON]) {
292 return false;
293 }
294 return scan_template_chars(lexer);
295 }
296
297 if (valid_symbols[AUTOMATIC_SEMICOLON]) {
298 bool scanned_comment = false;
299 bool ret = scan_automatic_semicolon(lexer, !valid_symbols[LOGICAL_OR], &scanned_comment);
300 if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') {
301 return scan_ternary_qmark(lexer);
302 }
303 return ret;
304 }
305
306 if (valid_symbols[TERNARY_QMARK]) {
307 return scan_ternary_qmark(lexer);
308 }
309
310 if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] &&
311 !valid_symbols[REGEX_PATTERN]) {
312 return scan_html_comment(lexer);
313 }
314
315 return false;
316}