aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/mitjafelicijan/go-tree-sitter/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/mitjafelicijan/go-tree-sitter/parser.c')
-rw-r--r--vendor/github.com/mitjafelicijan/go-tree-sitter/parser.c2151
1 files changed, 2151 insertions, 0 deletions
diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/parser.c b/vendor/github.com/mitjafelicijan/go-tree-sitter/parser.c
new file mode 100644
index 0000000..a72983d
--- /dev/null
+++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/parser.c
@@ -0,0 +1,2151 @@
1#define _POSIX_C_SOURCE 200112L
2
3#include <time.h>
4#include <assert.h>
5#include <stdio.h>
6#include <limits.h>
7#include <stdbool.h>
8#include <inttypes.h>
9#include "api.h"
10#include "./alloc.h"
11#include "./array.h"
12#include "./atomic.h"
13#include "./clock.h"
14#include "./error_costs.h"
15#include "./get_changed_ranges.h"
16#include "./language.h"
17#include "./length.h"
18#include "./lexer.h"
19#include "./reduce_action.h"
20#include "./reusable_node.h"
21#include "./stack.h"
22#include "./subtree.h"
23#include "./tree.h"
24#include "./wasm_store.h"
25
26#define LOG(...) \
27 if (self->lexer.logger.log || self->dot_graph_file) { \
28 snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
29 ts_parser__log(self); \
30 }
31
32#define LOG_LOOKAHEAD(symbol_name, size) \
33 if (self->lexer.logger.log || self->dot_graph_file) { \
34 char *buf = self->lexer.debug_buffer; \
35 const char *symbol = symbol_name; \
36 int off = sprintf(buf, "lexed_lookahead sym:"); \
37 for ( \
38 int i = 0; \
39 symbol[i] != '\0' \
40 && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \
41 i++ \
42 ) { \
43 switch (symbol[i]) { \
44 case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \
45 case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \
46 case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \
47 case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \
48 case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \
49 case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \
50 default: buf[off++] = symbol[i]; break; \
51 } \
52 } \
53 snprintf( \
54 buf + off, \
55 TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \
56 ", size:%u", \
57 size \
58 ); \
59 ts_parser__log(self); \
60 }
61
62#define LOG_STACK() \
63 if (self->dot_graph_file) { \
64 ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \
65 fputs("\n\n", self->dot_graph_file); \
66 }
67
68#define LOG_TREE(tree) \
69 if (self->dot_graph_file) { \
70 ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
71 fputs("\n", self->dot_graph_file); \
72 }
73
74#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
75
76#define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree))
77
78static const unsigned MAX_VERSION_COUNT = 6;
79static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
80static const unsigned MAX_SUMMARY_DEPTH = 16;
81static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
82static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
83
84typedef struct {
85 Subtree token;
86 Subtree last_external_token;
87 uint32_t byte_index;
88} TokenCache;
89
90struct TSParser {
91 Lexer lexer;
92 Stack *stack;
93 SubtreePool tree_pool;
94 const TSLanguage *language;
95 TSWasmStore *wasm_store;
96 ReduceActionSet reduce_actions;
97 Subtree finished_tree;
98 SubtreeArray trailing_extras;
99 SubtreeArray trailing_extras2;
100 SubtreeArray scratch_trees;
101 TokenCache token_cache;
102 ReusableNode reusable_node;
103 void *external_scanner_payload;
104 FILE *dot_graph_file;
105 TSClock end_clock;
106 TSDuration timeout_duration;
107 unsigned accept_count;
108 unsigned operation_count;
109 const volatile size_t *cancellation_flag;
110 Subtree old_tree;
111 TSRangeArray included_range_differences;
112 unsigned included_range_difference_index;
113 bool has_scanner_error;
114};
115
116typedef struct {
117 unsigned cost;
118 unsigned node_count;
119 int dynamic_precedence;
120 bool is_in_error;
121} ErrorStatus;
122
123typedef enum {
124 ErrorComparisonTakeLeft,
125 ErrorComparisonPreferLeft,
126 ErrorComparisonNone,
127 ErrorComparisonPreferRight,
128 ErrorComparisonTakeRight,
129} ErrorComparison;
130
131typedef struct {
132 const char *string;
133 uint32_t length;
134} TSStringInput;
135
136// StringInput
137
138static const char *ts_string_input_read(
139 void *_self,
140 uint32_t byte,
141 TSPoint point,
142 uint32_t *length
143) {
144 (void)point;
145 TSStringInput *self = (TSStringInput *)_self;
146 if (byte >= self->length) {
147 *length = 0;
148 return "";
149 } else {
150 *length = self->length - byte;
151 return self->string + byte;
152 }
153}
154
155// Parser - Private
156
157static void ts_parser__log(TSParser *self) {
158 if (self->lexer.logger.log) {
159 self->lexer.logger.log(
160 self->lexer.logger.payload,
161 TSLogTypeParse,
162 self->lexer.debug_buffer
163 );
164 }
165
166 if (self->dot_graph_file) {
167 fprintf(self->dot_graph_file, "graph {\nlabel=\"");
168 for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) {
169 if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file);
170 fputc(*chr, self->dot_graph_file);
171 }
172 fprintf(self->dot_graph_file, "\"\n}\n\n");
173 }
174}
175
176static bool ts_parser__breakdown_top_of_stack(
177 TSParser *self,
178 StackVersion version
179) {
180 bool did_break_down = false;
181 bool pending = false;
182
183 do {
184 StackSliceArray pop = ts_stack_pop_pending(self->stack, version);
185 if (!pop.size) break;
186
187 did_break_down = true;
188 pending = false;
189 for (uint32_t i = 0; i < pop.size; i++) {
190 StackSlice slice = pop.contents[i];
191 TSStateId state = ts_stack_state(self->stack, slice.version);
192 Subtree parent = *array_front(&slice.subtrees);
193
194 for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
195 Subtree child = ts_subtree_children(parent)[j];
196 pending = ts_subtree_child_count(child) > 0;
197
198 if (ts_subtree_is_error(child)) {
199 state = ERROR_STATE;
200 } else if (!ts_subtree_extra(child)) {
201 state = ts_language_next_state(self->language, state, ts_subtree_symbol(child));
202 }
203
204 ts_subtree_retain(child);
205 ts_stack_push(self->stack, slice.version, child, pending, state);
206 }
207
208 for (uint32_t j = 1; j < slice.subtrees.size; j++) {
209 Subtree tree = slice.subtrees.contents[j];
210 ts_stack_push(self->stack, slice.version, tree, false, state);
211 }
212
213 ts_subtree_release(&self->tree_pool, parent);
214 array_delete(&slice.subtrees);
215
216 LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent));
217 LOG_STACK();
218 }
219 } while (pending);
220
221 return did_break_down;
222}
223
224static void ts_parser__breakdown_lookahead(
225 TSParser *self,
226 Subtree *lookahead,
227 TSStateId state,
228 ReusableNode *reusable_node
229) {
230 bool did_descend = false;
231 Subtree tree = reusable_node_tree(reusable_node);
232 while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) {
233 LOG("state_mismatch sym:%s", TREE_NAME(tree));
234 reusable_node_descend(reusable_node);
235 tree = reusable_node_tree(reusable_node);
236 did_descend = true;
237 }
238
239 if (did_descend) {
240 ts_subtree_release(&self->tree_pool, *lookahead);
241 *lookahead = tree;
242 ts_subtree_retain(*lookahead);
243 }
244}
245
246static ErrorComparison ts_parser__compare_versions(
247 TSParser *self,
248 ErrorStatus a,
249 ErrorStatus b
250) {
251 (void)self;
252 if (!a.is_in_error && b.is_in_error) {
253 if (a.cost < b.cost) {
254 return ErrorComparisonTakeLeft;
255 } else {
256 return ErrorComparisonPreferLeft;
257 }
258 }
259
260 if (a.is_in_error && !b.is_in_error) {
261 if (b.cost < a.cost) {
262 return ErrorComparisonTakeRight;
263 } else {
264 return ErrorComparisonPreferRight;
265 }
266 }
267
268 if (a.cost < b.cost) {
269 if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) {
270 return ErrorComparisonTakeLeft;
271 } else {
272 return ErrorComparisonPreferLeft;
273 }
274 }
275
276 if (b.cost < a.cost) {
277 if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) {
278 return ErrorComparisonTakeRight;
279 } else {
280 return ErrorComparisonPreferRight;
281 }
282 }
283
284 if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
285 if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
286 return ErrorComparisonNone;
287}
288
289static ErrorStatus ts_parser__version_status(
290 TSParser *self,
291 StackVersion version
292) {
293 unsigned cost = ts_stack_error_cost(self->stack, version);
294 bool is_paused = ts_stack_is_paused(self->stack, version);
295 if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE;
296 return (ErrorStatus) {
297 .cost = cost,
298 .node_count = ts_stack_node_count_since_error(self->stack, version),
299 .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
300 .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE
301 };
302}
303
304static bool ts_parser__better_version_exists(
305 TSParser *self,
306 StackVersion version,
307 bool is_in_error,
308 unsigned cost
309) {
310 if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) {
311 return true;
312 }
313
314 Length position = ts_stack_position(self->stack, version);
315 ErrorStatus status = {
316 .cost = cost,
317 .is_in_error = is_in_error,
318 .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
319 .node_count = ts_stack_node_count_since_error(self->stack, version),
320 };
321
322 for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
323 if (i == version ||
324 !ts_stack_is_active(self->stack, i) ||
325 ts_stack_position(self->stack, i).bytes < position.bytes) continue;
326 ErrorStatus status_i = ts_parser__version_status(self, i);
327 switch (ts_parser__compare_versions(self, status, status_i)) {
328 case ErrorComparisonTakeRight:
329 return true;
330 case ErrorComparisonPreferRight:
331 if (ts_stack_can_merge(self->stack, i, version)) return true;
332 break;
333 default:
334 break;
335 }
336 }
337
338 return false;
339}
340
341static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) {
342 if (ts_language_is_wasm(self->language)) {
343 return ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state);
344 } else {
345 return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
346 }
347}
348
349static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) {
350 if (ts_language_is_wasm(self->language)) {
351 return ts_wasm_store_call_lex_keyword(self->wasm_store, 0);
352 } else {
353 return self->language->keyword_lex_fn(&self->lexer.data, 0);
354 }
355}
356
357static void ts_parser__external_scanner_create(
358 TSParser *self
359) {
360 if (self->language && self->language->external_scanner.states) {
361 if (ts_language_is_wasm(self->language)) {
362 self->external_scanner_payload = (void *)(uintptr_t)ts_wasm_store_call_scanner_create(
363 self->wasm_store
364 );
365 if (ts_wasm_store_has_error(self->wasm_store)) {
366 self->has_scanner_error = true;
367 }
368 } else if (self->language->external_scanner.create) {
369 self->external_scanner_payload = self->language->external_scanner.create();
370 }
371 }
372}
373
374static void ts_parser__external_scanner_destroy(
375 TSParser *self
376) {
377 if (
378 self->language &&
379 self->external_scanner_payload &&
380 self->language->external_scanner.destroy &&
381 !ts_language_is_wasm(self->language)
382 ) {
383 self->language->external_scanner.destroy(
384 self->external_scanner_payload
385 );
386 }
387 self->external_scanner_payload = NULL;
388}
389
390static unsigned ts_parser__external_scanner_serialize(
391 TSParser *self
392) {
393 if (ts_language_is_wasm(self->language)) {
394 return ts_wasm_store_call_scanner_serialize(
395 self->wasm_store,
396 (uintptr_t)self->external_scanner_payload,
397 self->lexer.debug_buffer
398 );
399 } else {
400 return self->language->external_scanner.serialize(
401 self->external_scanner_payload,
402 self->lexer.debug_buffer
403 );
404 }
405}
406
407static void ts_parser__external_scanner_deserialize(
408 TSParser *self,
409 Subtree external_token
410) {
411 const char *data = NULL;
412 uint32_t length = 0;
413 if (external_token.ptr) {
414 data = ts_external_scanner_state_data(&external_token.ptr->external_scanner_state);
415 length = external_token.ptr->external_scanner_state.length;
416 }
417
418 if (ts_language_is_wasm(self->language)) {
419 ts_wasm_store_call_scanner_deserialize(
420 self->wasm_store,
421 (uintptr_t)self->external_scanner_payload,
422 data,
423 length
424 );
425 if (ts_wasm_store_has_error(self->wasm_store)) {
426 self->has_scanner_error = true;
427 }
428 } else {
429 self->language->external_scanner.deserialize(
430 self->external_scanner_payload,
431 data,
432 length
433 );
434 }
435}
436
437static bool ts_parser__external_scanner_scan(
438 TSParser *self,
439 TSStateId external_lex_state
440) {
441 if (ts_language_is_wasm(self->language)) {
442 bool result = ts_wasm_store_call_scanner_scan(
443 self->wasm_store,
444 (uintptr_t)self->external_scanner_payload,
445 external_lex_state * self->language->external_token_count
446 );
447 if (ts_wasm_store_has_error(self->wasm_store)) {
448 self->has_scanner_error = true;
449 }
450 return result;
451 } else {
452 const bool *valid_external_tokens = ts_language_enabled_external_tokens(
453 self->language,
454 external_lex_state
455 );
456 return self->language->external_scanner.scan(
457 self->external_scanner_payload,
458 &self->lexer.data,
459 valid_external_tokens
460 );
461 }
462}
463
464static bool ts_parser__can_reuse_first_leaf(
465 TSParser *self,
466 TSStateId state,
467 Subtree tree,
468 TableEntry *table_entry
469) {
470 TSLexMode current_lex_mode = self->language->lex_modes[state];
471 TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree);
472 TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
473 TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state];
474
475 // At the end of a non-terminal extra node, the lexer normally returns
476 // NULL, which indicates that the parser should look for a reduce action
477 // at symbol `0`. Avoid reusing tokens in this situation to ensure that
478 // the same thing happens when incrementally reparsing.
479 if (current_lex_mode.lex_state == (uint16_t)(-1)) return false;
480
481 // If the token was created in a state with the same set of lookaheads, it is reusable.
482 if (
483 table_entry->action_count > 0 &&
484 memcmp(&leaf_lex_mode, &current_lex_mode, sizeof(TSLexMode)) == 0 &&
485 (
486 leaf_symbol != self->language->keyword_capture_token ||
487 (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state)
488 )
489 ) return true;
490
491 // Empty tokens are not reusable in states with different lookaheads.
492 if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false;
493
494 // If the current state allows external tokens or other tokens that conflict with this
495 // token, this token is not reusable.
496 return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
497}
498
499static Subtree ts_parser__lex(
500 TSParser *self,
501 StackVersion version,
502 TSStateId parse_state
503) {
504 TSLexMode lex_mode = self->language->lex_modes[parse_state];
505 if (lex_mode.lex_state == (uint16_t)-1) {
506 LOG("no_lookahead_after_non_terminal_extra");
507 return NULL_SUBTREE;
508 }
509
510 const Length start_position = ts_stack_position(self->stack, version);
511 const Subtree external_token = ts_stack_last_external_token(self->stack, version);
512
513 bool found_external_token = false;
514 bool error_mode = parse_state == ERROR_STATE;
515 bool skipped_error = false;
516 bool called_get_column = false;
517 int32_t first_error_character = 0;
518 Length error_start_position = length_zero();
519 Length error_end_position = length_zero();
520 uint32_t lookahead_end_byte = 0;
521 uint32_t external_scanner_state_len = 0;
522 bool external_scanner_state_changed = false;
523 ts_lexer_reset(&self->lexer, start_position);
524
525 for (;;) {
526 bool found_token = false;
527 Length current_position = self->lexer.current_position;
528
529 if (lex_mode.external_lex_state != 0) {
530 LOG(
531 "lex_external state:%d, row:%u, column:%u",
532 lex_mode.external_lex_state,
533 current_position.extent.row,
534 current_position.extent.column
535 );
536 ts_lexer_start(&self->lexer);
537 ts_parser__external_scanner_deserialize(self, external_token);
538 found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state);
539 if (self->has_scanner_error) return NULL_SUBTREE;
540 ts_lexer_finish(&self->lexer, &lookahead_end_byte);
541
542 if (found_token) {
543 external_scanner_state_len = ts_parser__external_scanner_serialize(self);
544 external_scanner_state_changed = !ts_external_scanner_state_eq(
545 ts_subtree_external_scanner_state(external_token),
546 self->lexer.debug_buffer,
547 external_scanner_state_len
548 );
549
550 // When recovering from an error, ignore any zero-length external tokens
551 // unless they have changed the external scanner's state. This helps to
552 // avoid infinite loops which could otherwise occur, because the lexer is
553 // looking for any possible token, instead of looking for the specific set of
554 // tokens that are valid in some parse state.
555 //
556 // Note that it's possible that the token end position may be *before* the
557 // original position of the lexer because of the way that tokens are positioned
558 // at included range boundaries: when a token is terminated at the start of
559 // an included range, it is marked as ending at the *end* of the preceding
560 // included range.
561 if (
562 self->lexer.token_end_position.bytes <= current_position.bytes &&
563 (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) &&
564 !external_scanner_state_changed
565 ) {
566 LOG(
567 "ignore_empty_external_token symbol:%s",
568 SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol])
569 )
570 found_token = false;
571 }
572 }
573
574 if (found_token) {
575 found_external_token = true;
576 called_get_column = self->lexer.did_get_column;
577 break;
578 }
579
580 ts_lexer_reset(&self->lexer, current_position);
581 }
582
583 LOG(
584 "lex_internal state:%d, row:%u, column:%u",
585 lex_mode.lex_state,
586 current_position.extent.row,
587 current_position.extent.column
588 );
589 ts_lexer_start(&self->lexer);
590 found_token = ts_parser__call_main_lex_fn(self, lex_mode);
591 ts_lexer_finish(&self->lexer, &lookahead_end_byte);
592 if (found_token) break;
593
594 if (!error_mode) {
595 error_mode = true;
596 lex_mode = self->language->lex_modes[ERROR_STATE];
597 ts_lexer_reset(&self->lexer, start_position);
598 continue;
599 }
600
601 if (!skipped_error) {
602 LOG("skip_unrecognized_character");
603 skipped_error = true;
604 error_start_position = self->lexer.token_start_position;
605 error_end_position = self->lexer.token_start_position;
606 first_error_character = self->lexer.data.lookahead;
607 }
608
609 if (self->lexer.current_position.bytes == error_end_position.bytes) {
610 if (self->lexer.data.eof(&self->lexer.data)) {
611 self->lexer.data.result_symbol = ts_builtin_sym_error;
612 break;
613 }
614 self->lexer.data.advance(&self->lexer.data, false);
615 }
616
617 error_end_position = self->lexer.current_position;
618 }
619
620 Subtree result;
621 if (skipped_error) {
622 Length padding = length_sub(error_start_position, start_position);
623 Length size = length_sub(error_end_position, error_start_position);
624 uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes;
625 result = ts_subtree_new_error(
626 &self->tree_pool,
627 first_error_character,
628 padding,
629 size,
630 lookahead_bytes,
631 parse_state,
632 self->language
633 );
634 } else {
635 bool is_keyword = false;
636 TSSymbol symbol = self->lexer.data.result_symbol;
637 Length padding = length_sub(self->lexer.token_start_position, start_position);
638 Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
639 uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes;
640
641 if (found_external_token) {
642 symbol = self->language->external_scanner.symbol_map[symbol];
643 } else if (symbol == self->language->keyword_capture_token && symbol != 0) {
644 uint32_t end_byte = self->lexer.token_end_position.bytes;
645 ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
646 ts_lexer_start(&self->lexer);
647
648 is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode);
649
650 if (
651 is_keyword &&
652 self->lexer.token_end_position.bytes == end_byte &&
653 ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol)
654 ) {
655 symbol = self->lexer.data.result_symbol;
656 }
657 }
658
659 result = ts_subtree_new_leaf(
660 &self->tree_pool,
661 symbol,
662 padding,
663 size,
664 lookahead_bytes,
665 parse_state,
666 found_external_token,
667 called_get_column,
668 is_keyword,
669 self->language
670 );
671
672 if (found_external_token) {
673 MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result);
674 ts_external_scanner_state_init(
675 &mut_result.ptr->external_scanner_state,
676 self->lexer.debug_buffer,
677 external_scanner_state_len
678 );
679 mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed;
680 }
681 }
682
683 LOG_LOOKAHEAD(
684 SYM_NAME(ts_subtree_symbol(result)),
685 ts_subtree_total_size(result).bytes
686 );
687 return result;
688}
689
690static Subtree ts_parser__get_cached_token(
691 TSParser *self,
692 TSStateId state,
693 size_t position,
694 Subtree last_external_token,
695 TableEntry *table_entry
696) {
697 TokenCache *cache = &self->token_cache;
698 if (
699 cache->token.ptr && cache->byte_index == position &&
700 ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)
701 ) {
702 ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry);
703 if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) {
704 ts_subtree_retain(cache->token);
705 return cache->token;
706 }
707 }
708 return NULL_SUBTREE;
709}
710
711static void ts_parser__set_cached_token(
712 TSParser *self,
713 uint32_t byte_index,
714 Subtree last_external_token,
715 Subtree token
716) {
717 TokenCache *cache = &self->token_cache;
718 if (token.ptr) ts_subtree_retain(token);
719 if (last_external_token.ptr) ts_subtree_retain(last_external_token);
720 if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token);
721 if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token);
722 cache->token = token;
723 cache->byte_index = byte_index;
724 cache->last_external_token = last_external_token;
725}
726
727static bool ts_parser__has_included_range_difference(
728 const TSParser *self,
729 uint32_t start_position,
730 uint32_t end_position
731) {
732 return ts_range_array_intersects(
733 &self->included_range_differences,
734 self->included_range_difference_index,
735 start_position,
736 end_position
737 );
738}
739
740static Subtree ts_parser__reuse_node(
741 TSParser *self,
742 StackVersion version,
743 TSStateId *state,
744 uint32_t position,
745 Subtree last_external_token,
746 TableEntry *table_entry
747) {
748 Subtree result;
749 while ((result = reusable_node_tree(&self->reusable_node)).ptr) {
750 uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
751 uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
752
753 // Do not reuse an EOF node if the included ranges array has changes
754 // later on in the file.
755 if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX;
756
757 if (byte_offset > position) {
758 LOG("before_reusable_node symbol:%s", TREE_NAME(result));
759 break;
760 }
761
762 if (byte_offset < position) {
763 LOG("past_reusable_node symbol:%s", TREE_NAME(result));
764 if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) {
765 reusable_node_advance(&self->reusable_node);
766 }
767 continue;
768 }
769
770 if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) {
771 LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result));
772 reusable_node_advance(&self->reusable_node);
773 continue;
774 }
775
776 const char *reason = NULL;
777 if (ts_subtree_has_changes(result)) {
778 reason = "has_changes";
779 } else if (ts_subtree_is_error(result)) {
780 reason = "is_error";
781 } else if (ts_subtree_missing(result)) {
782 reason = "is_missing";
783 } else if (ts_subtree_is_fragile(result)) {
784 reason = "is_fragile";
785 } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) {
786 reason = "contains_different_included_range";
787 }
788
789 if (reason) {
790 LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result));
791 if (!reusable_node_descend(&self->reusable_node)) {
792 reusable_node_advance(&self->reusable_node);
793 ts_parser__breakdown_top_of_stack(self, version);
794 *state = ts_stack_state(self->stack, version);
795 }
796 continue;
797 }
798
799 TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result);
800 ts_language_table_entry(self->language, *state, leaf_symbol, table_entry);
801 if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
802 LOG(
803 "cant_reuse_node symbol:%s, first_leaf_symbol:%s",
804 TREE_NAME(result),
805 SYM_NAME(leaf_symbol)
806 );
807 reusable_node_advance_past_leaf(&self->reusable_node);
808 break;
809 }
810
811 LOG("reuse_node symbol:%s", TREE_NAME(result));
812 ts_subtree_retain(result);
813 return result;
814 }
815
816 return NULL_SUBTREE;
817}
818
819// Determine if a given tree should be replaced by an alternative tree.
820//
821// The decision is based on the trees' error costs (if any), their dynamic precedence,
822// and finally, as a default, by a recursive comparison of the trees' symbols.
823static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
824 if (!left.ptr) return true;
825 if (!right.ptr) return false;
826
827 if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) {
828 LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
829 return true;
830 }
831
832 if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) {
833 LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
834 return false;
835 }
836
837 if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) {
838 LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32,
839 TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left),
840 ts_subtree_dynamic_precedence(left));
841 return true;
842 }
843
844 if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) {
845 LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32,
846 TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right),
847 ts_subtree_dynamic_precedence(right));
848 return false;
849 }
850
851 if (ts_subtree_error_cost(left) > 0) return true;
852
853 int comparison = ts_subtree_compare(left, right, &self->tree_pool);
854 switch (comparison) {
855 case -1:
856 LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
857 return false;
858 break;
859 case 1:
860 LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
861 return true;
862 default:
863 LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
864 return false;
865 }
866}
867
868// Determine if a given tree's children should be replaced by an alternative
869// array of children.
870static bool ts_parser__select_children(
871 TSParser *self,
872 Subtree left,
873 const SubtreeArray *children
874) {
875 array_assign(&self->scratch_trees, children);
876
877 // Create a temporary subtree using the scratch trees array. This node does
878 // not perform any allocation except for possibly growing the array to make
879 // room for its own heap data. The scratch tree is never explicitly released,
880 // so the same 'scratch trees' array can be reused again later.
881 MutableSubtree scratch_tree = ts_subtree_new_node(
882 ts_subtree_symbol(left),
883 &self->scratch_trees,
884 0,
885 self->language
886 );
887
888 return ts_parser__select_tree(
889 self,
890 left,
891 ts_subtree_from_mut(scratch_tree)
892 );
893}
894
895static void ts_parser__shift(
896 TSParser *self,
897 StackVersion version,
898 TSStateId state,
899 Subtree lookahead,
900 bool extra
901) {
902 bool is_leaf = ts_subtree_child_count(lookahead) == 0;
903 Subtree subtree_to_push = lookahead;
904 if (extra != ts_subtree_extra(lookahead) && is_leaf) {
905 MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead);
906 ts_subtree_set_extra(&result, extra);
907 subtree_to_push = ts_subtree_from_mut(result);
908 }
909
910 ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state);
911 if (ts_subtree_has_external_tokens(subtree_to_push)) {
912 ts_stack_set_last_external_token(
913 self->stack, version, ts_subtree_last_external_token(subtree_to_push)
914 );
915 }
916}
917
918static StackVersion ts_parser__reduce(
919 TSParser *self,
920 StackVersion version,
921 TSSymbol symbol,
922 uint32_t count,
923 int dynamic_precedence,
924 uint16_t production_id,
925 bool is_fragile,
926 bool end_of_non_terminal_extra
927) {
928 uint32_t initial_version_count = ts_stack_version_count(self->stack);
929
930 // Pop the given number of nodes from the given version of the parse stack.
931 // If stack versions have previously merged, then there may be more than one
932 // path back through the stack. For each path, create a new parent node to
933 // contain the popped children, and push it onto the stack in place of the
934 // children.
935 StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
936 uint32_t removed_version_count = 0;
937 for (uint32_t i = 0; i < pop.size; i++) {
938 StackSlice slice = pop.contents[i];
939 StackVersion slice_version = slice.version - removed_version_count;
940
941 // This is where new versions are added to the parse stack. The versions
942 // will all be sorted and truncated at the end of the outer parsing loop.
943 // Allow the maximum version count to be temporarily exceeded, but only
944 // by a limited threshold.
945 if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
946 ts_stack_remove_version(self->stack, slice_version);
947 ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
948 removed_version_count++;
949 while (i + 1 < pop.size) {
950 StackSlice next_slice = pop.contents[i + 1];
951 if (next_slice.version != slice.version) break;
952 ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
953 i++;
954 }
955 continue;
956 }
957
958 // Extra tokens on top of the stack should not be included in this new parent
959 // node. They will be re-pushed onto the stack after the parent node is
960 // created and pushed.
961 SubtreeArray children = slice.subtrees;
962 ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras);
963
964 MutableSubtree parent = ts_subtree_new_node(
965 symbol, &children, production_id, self->language
966 );
967
968 // This pop operation may have caused multiple stack versions to collapse
969 // into one, because they all diverged from a common state. In that case,
970 // choose one of the arrays of trees to be the parent node's children, and
971 // delete the rest of the tree arrays.
972 while (i + 1 < pop.size) {
973 StackSlice next_slice = pop.contents[i + 1];
974 if (next_slice.version != slice.version) break;
975 i++;
976
977 SubtreeArray next_slice_children = next_slice.subtrees;
978 ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2);
979
980 if (ts_parser__select_children(
981 self,
982 ts_subtree_from_mut(parent),
983 &next_slice_children
984 )) {
985 ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras);
986 ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent));
987 array_swap(&self->trailing_extras, &self->trailing_extras2);
988 parent = ts_subtree_new_node(
989 symbol, &next_slice_children, production_id, self->language
990 );
991 } else {
992 array_clear(&self->trailing_extras2);
993 ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
994 }
995 }
996
997 TSStateId state = ts_stack_state(self->stack, slice_version);
998 TSStateId next_state = ts_language_next_state(self->language, state, symbol);
999 if (end_of_non_terminal_extra && next_state == state) {
1000 parent.ptr->extra = true;
1001 }
1002 if (is_fragile || pop.size > 1 || initial_version_count > 1) {
1003 parent.ptr->fragile_left = true;
1004 parent.ptr->fragile_right = true;
1005 parent.ptr->parse_state = TS_TREE_STATE_NONE;
1006 } else {
1007 parent.ptr->parse_state = state;
1008 }
1009 parent.ptr->dynamic_precedence += dynamic_precedence;
1010
1011 // Push the parent node onto the stack, along with any extra tokens that
1012 // were previously on top of the stack.
1013 ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
1014 for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
1015 ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state);
1016 }
1017
1018 for (StackVersion j = 0; j < slice_version; j++) {
1019 if (j == version) continue;
1020 if (ts_stack_merge(self->stack, j, slice_version)) {
1021 removed_version_count++;
1022 break;
1023 }
1024 }
1025 }
1026
1027 // Return the first new stack version that was created.
1028 return ts_stack_version_count(self->stack) > initial_version_count
1029 ? initial_version_count
1030 : STACK_VERSION_NONE;
1031}
1032
1033static void ts_parser__accept(
1034 TSParser *self,
1035 StackVersion version,
1036 Subtree lookahead
1037) {
1038 assert(ts_subtree_is_eof(lookahead));
1039 ts_stack_push(self->stack, version, lookahead, false, 1);
1040
1041 StackSliceArray pop = ts_stack_pop_all(self->stack, version);
1042 for (uint32_t i = 0; i < pop.size; i++) {
1043 SubtreeArray trees = pop.contents[i].subtrees;
1044
1045 Subtree root = NULL_SUBTREE;
1046 for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
1047 Subtree tree = trees.contents[j];
1048 if (!ts_subtree_extra(tree)) {
1049 assert(!tree.data.is_inline);
1050 uint32_t child_count = ts_subtree_child_count(tree);
1051 const Subtree *children = ts_subtree_children(tree);
1052 for (uint32_t k = 0; k < child_count; k++) {
1053 ts_subtree_retain(children[k]);
1054 }
1055 array_splice(&trees, j, 1, child_count, children);
1056 root = ts_subtree_from_mut(ts_subtree_new_node(
1057 ts_subtree_symbol(tree),
1058 &trees,
1059 tree.ptr->production_id,
1060 self->language
1061 ));
1062 ts_subtree_release(&self->tree_pool, tree);
1063 break;
1064 }
1065 }
1066
1067 assert(root.ptr);
1068 self->accept_count++;
1069
1070 if (self->finished_tree.ptr) {
1071 if (ts_parser__select_tree(self, self->finished_tree, root)) {
1072 ts_subtree_release(&self->tree_pool, self->finished_tree);
1073 self->finished_tree = root;
1074 } else {
1075 ts_subtree_release(&self->tree_pool, root);
1076 }
1077 } else {
1078 self->finished_tree = root;
1079 }
1080 }
1081
1082 ts_stack_remove_version(self->stack, pop.contents[0].version);
1083 ts_stack_halt(self->stack, version);
1084}
1085
1086static bool ts_parser__do_all_potential_reductions(
1087 TSParser *self,
1088 StackVersion starting_version,
1089 TSSymbol lookahead_symbol
1090) {
1091 uint32_t initial_version_count = ts_stack_version_count(self->stack);
1092
1093 bool can_shift_lookahead_symbol = false;
1094 StackVersion version = starting_version;
1095 for (unsigned i = 0; true; i++) {
1096 uint32_t version_count = ts_stack_version_count(self->stack);
1097 if (version >= version_count) break;
1098
1099 bool merged = false;
1100 for (StackVersion j = initial_version_count; j < version; j++) {
1101 if (ts_stack_merge(self->stack, j, version)) {
1102 merged = true;
1103 break;
1104 }
1105 }
1106 if (merged) continue;
1107
1108 TSStateId state = ts_stack_state(self->stack, version);
1109 bool has_shift_action = false;
1110 array_clear(&self->reduce_actions);
1111
1112 TSSymbol first_symbol, end_symbol;
1113 if (lookahead_symbol != 0) {
1114 first_symbol = lookahead_symbol;
1115 end_symbol = lookahead_symbol + 1;
1116 } else {
1117 first_symbol = 1;
1118 end_symbol = self->language->token_count;
1119 }
1120
1121 for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
1122 TableEntry entry;
1123 ts_language_table_entry(self->language, state, symbol, &entry);
1124 for (uint32_t j = 0; j < entry.action_count; j++) {
1125 TSParseAction action = entry.actions[j];
1126 switch (action.type) {
1127 case TSParseActionTypeShift:
1128 case TSParseActionTypeRecover:
1129 if (!action.shift.extra && !action.shift.repetition) has_shift_action = true;
1130 break;
1131 case TSParseActionTypeReduce:
1132 if (action.reduce.child_count > 0)
1133 ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) {
1134 .symbol = action.reduce.symbol,
1135 .count = action.reduce.child_count,
1136 .dynamic_precedence = action.reduce.dynamic_precedence,
1137 .production_id = action.reduce.production_id,
1138 });
1139 break;
1140 default:
1141 break;
1142 }
1143 }
1144 }
1145
1146 StackVersion reduction_version = STACK_VERSION_NONE;
1147 for (uint32_t j = 0; j < self->reduce_actions.size; j++) {
1148 ReduceAction action = self->reduce_actions.contents[j];
1149
1150 reduction_version = ts_parser__reduce(
1151 self, version, action.symbol, action.count,
1152 action.dynamic_precedence, action.production_id,
1153 true, false
1154 );
1155 }
1156
1157 if (has_shift_action) {
1158 can_shift_lookahead_symbol = true;
1159 } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) {
1160 ts_stack_renumber_version(self->stack, reduction_version, version);
1161 continue;
1162 } else if (lookahead_symbol != 0) {
1163 ts_stack_remove_version(self->stack, version);
1164 }
1165
1166 if (version == starting_version) {
1167 version = version_count;
1168 } else {
1169 version++;
1170 }
1171 }
1172
1173 return can_shift_lookahead_symbol;
1174}
1175
1176static bool ts_parser__recover_to_state(
1177 TSParser *self,
1178 StackVersion version,
1179 unsigned depth,
1180 TSStateId goal_state
1181) {
1182 StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth);
1183 StackVersion previous_version = STACK_VERSION_NONE;
1184
1185 for (unsigned i = 0; i < pop.size; i++) {
1186 StackSlice slice = pop.contents[i];
1187
1188 if (slice.version == previous_version) {
1189 ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1190 array_erase(&pop, i--);
1191 continue;
1192 }
1193
1194 if (ts_stack_state(self->stack, slice.version) != goal_state) {
1195 ts_stack_halt(self->stack, slice.version);
1196 ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1197 array_erase(&pop, i--);
1198 continue;
1199 }
1200
1201 SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version);
1202 if (error_trees.size > 0) {
1203 assert(error_trees.size == 1);
1204 Subtree error_tree = error_trees.contents[0];
1205 uint32_t error_child_count = ts_subtree_child_count(error_tree);
1206 if (error_child_count > 0) {
1207 array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree));
1208 for (unsigned j = 0; j < error_child_count; j++) {
1209 ts_subtree_retain(slice.subtrees.contents[j]);
1210 }
1211 }
1212 ts_subtree_array_delete(&self->tree_pool, &error_trees);
1213 }
1214
1215 ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras);
1216
1217 if (slice.subtrees.size > 0) {
1218 Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
1219 ts_stack_push(self->stack, slice.version, error, false, goal_state);
1220 } else {
1221 array_delete(&slice.subtrees);
1222 }
1223
1224 for (unsigned j = 0; j < self->trailing_extras.size; j++) {
1225 Subtree tree = self->trailing_extras.contents[j];
1226 ts_stack_push(self->stack, slice.version, tree, false, goal_state);
1227 }
1228
1229 previous_version = slice.version;
1230 }
1231
1232 return previous_version != STACK_VERSION_NONE;
1233}
1234
1235static void ts_parser__recover(
1236 TSParser *self,
1237 StackVersion version,
1238 Subtree lookahead
1239) {
1240 bool did_recover = false;
1241 unsigned previous_version_count = ts_stack_version_count(self->stack);
1242 Length position = ts_stack_position(self->stack, version);
1243 StackSummary *summary = ts_stack_get_summary(self->stack, version);
1244 unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version);
1245 unsigned current_error_cost = ts_stack_error_cost(self->stack, version);
1246
1247 // When the parser is in the error state, there are two strategies for recovering with a
1248 // given lookahead token:
1249 // 1. Find a previous state on the stack in which that lookahead token would be valid. Then,
1250 // create a new stack version that is in that state again. This entails popping all of the
1251 // subtrees that have been pushed onto the stack since that previous state, and wrapping
1252 // them in an ERROR node.
1253 // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and
1254 // move on to the next lookahead token, remaining in the error state.
1255 //
1256 // First, try the strategy 1. Upon entering the error state, the parser recorded a summary
1257 // of the previous parse states and their depths. Look at each state in the summary, to see
1258 // if the current lookahead token would be valid in that state.
1259 if (summary && !ts_subtree_is_error(lookahead)) {
1260 for (unsigned i = 0; i < summary->size; i++) {
1261 StackSummaryEntry entry = summary->contents[i];
1262
1263 if (entry.state == ERROR_STATE) continue;
1264 if (entry.position.bytes == position.bytes) continue;
1265 unsigned depth = entry.depth;
1266 if (node_count_since_error > 0) depth++;
1267
1268 // Do not recover in ways that create redundant stack versions.
1269 bool would_merge = false;
1270 for (unsigned j = 0; j < previous_version_count; j++) {
1271 if (
1272 ts_stack_state(self->stack, j) == entry.state &&
1273 ts_stack_position(self->stack, j).bytes == position.bytes
1274 ) {
1275 would_merge = true;
1276 break;
1277 }
1278 }
1279 if (would_merge) continue;
1280
1281 // Do not recover if the result would clearly be worse than some existing stack version.
1282 unsigned new_cost =
1283 current_error_cost +
1284 entry.depth * ERROR_COST_PER_SKIPPED_TREE +
1285 (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR +
1286 (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
1287 if (ts_parser__better_version_exists(self, version, false, new_cost)) break;
1288
1289 // If the current lookahead token is valid in some previous state, recover to that state.
1290 // Then stop looking for further recoveries.
1291 if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) {
1292 if (ts_parser__recover_to_state(self, version, depth, entry.state)) {
1293 did_recover = true;
1294 LOG("recover_to_previous state:%u, depth:%u", entry.state, depth);
1295 LOG_STACK();
1296 break;
1297 }
1298 }
1299 }
1300 }
1301
1302 // In the process of attempting to recover, some stack versions may have been created
1303 // and subsequently halted. Remove those versions.
1304 for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
1305 if (!ts_stack_is_active(self->stack, i)) {
1306 ts_stack_remove_version(self->stack, i--);
1307 }
1308 }
1309
1310 // If strategy 1 succeeded, a new stack version will have been created which is able to handle
1311 // the current lookahead token. Now, in addition, try strategy 2 described above: skip the
1312 // current lookahead token by wrapping it in an ERROR node.
1313
1314 // Don't pursue this additional strategy if there are already too many stack versions.
1315 if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1316 ts_stack_halt(self->stack, version);
1317 ts_subtree_release(&self->tree_pool, lookahead);
1318 return;
1319 }
1320
1321 if (
1322 did_recover &&
1323 ts_subtree_has_external_scanner_state_change(lookahead)
1324 ) {
1325 ts_stack_halt(self->stack, version);
1326 ts_subtree_release(&self->tree_pool, lookahead);
1327 return;
1328 }
1329
1330 // If the parser is still in the error state at the end of the file, just wrap everything
1331 // in an ERROR node and terminate.
1332 if (ts_subtree_is_eof(lookahead)) {
1333 LOG("recover_eof");
1334 SubtreeArray children = array_new();
1335 Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
1336 ts_stack_push(self->stack, version, parent, false, 1);
1337 ts_parser__accept(self, version, lookahead);
1338 return;
1339 }
1340
1341 // Do not recover if the result would clearly be worse than some existing stack version.
1342 unsigned new_cost =
1343 current_error_cost + ERROR_COST_PER_SKIPPED_TREE +
1344 ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR +
1345 ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE;
1346 if (ts_parser__better_version_exists(self, version, false, new_cost)) {
1347 ts_stack_halt(self->stack, version);
1348 ts_subtree_release(&self->tree_pool, lookahead);
1349 return;
1350 }
1351
1352 // If the current lookahead token is an extra token, mark it as extra. This means it won't
1353 // be counted in error cost calculations.
1354 unsigned n;
1355 const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
1356 if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) {
1357 MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1358 ts_subtree_set_extra(&mutable_lookahead, true);
1359 lookahead = ts_subtree_from_mut(mutable_lookahead);
1360 }
1361
1362 // Wrap the lookahead token in an ERROR.
1363 LOG("skip_token symbol:%s", TREE_NAME(lookahead));
1364 SubtreeArray children = array_new();
1365 array_reserve(&children, 1);
1366 array_push(&children, lookahead);
1367 MutableSubtree error_repeat = ts_subtree_new_node(
1368 ts_builtin_sym_error_repeat,
1369 &children,
1370 0,
1371 self->language
1372 );
1373
1374 // If other tokens have already been skipped, so there is already an ERROR at the top of the
1375 // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger
1376 // ERROR.
1377 if (node_count_since_error > 0) {
1378 StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1);
1379
1380 // TODO: Figure out how to make this condition occur.
1381 // See https://github.com/atom/atom/issues/18450#issuecomment-439579778
1382 // If multiple stack versions have merged at this point, just pick one of the errors
1383 // arbitrarily and discard the rest.
1384 if (pop.size > 1) {
1385 for (unsigned i = 1; i < pop.size; i++) {
1386 ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees);
1387 }
1388 while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) {
1389 ts_stack_remove_version(self->stack, pop.contents[0].version + 1);
1390 }
1391 }
1392
1393 ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
1394 array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
1395 error_repeat = ts_subtree_new_node(
1396 ts_builtin_sym_error_repeat,
1397 &pop.contents[0].subtrees,
1398 0,
1399 self->language
1400 );
1401 }
1402
1403 // Push the new ERROR onto the stack.
1404 ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
1405 if (ts_subtree_has_external_tokens(lookahead)) {
1406 ts_stack_set_last_external_token(
1407 self->stack, version, ts_subtree_last_external_token(lookahead)
1408 );
1409 }
1410}
1411
1412static void ts_parser__handle_error(
1413 TSParser *self,
1414 StackVersion version,
1415 Subtree lookahead
1416) {
1417 uint32_t previous_version_count = ts_stack_version_count(self->stack);
1418
1419 // Perform any reductions that can happen in this state, regardless of the lookahead. After
1420 // skipping one or more invalid tokens, the parser might find a token that would have allowed
1421 // a reduction to take place.
1422 ts_parser__do_all_potential_reductions(self, version, 0);
1423 uint32_t version_count = ts_stack_version_count(self->stack);
1424 Length position = ts_stack_position(self->stack, version);
1425
1426 // Push a discontinuity onto the stack. Merge all of the stack versions that
1427 // were created in the previous step.
1428 bool did_insert_missing_token = false;
1429 for (StackVersion v = version; v < version_count;) {
1430 if (!did_insert_missing_token) {
1431 TSStateId state = ts_stack_state(self->stack, v);
1432 for (
1433 TSSymbol missing_symbol = 1;
1434 missing_symbol < (uint16_t)self->language->token_count;
1435 missing_symbol++
1436 ) {
1437 TSStateId state_after_missing_symbol = ts_language_next_state(
1438 self->language, state, missing_symbol
1439 );
1440 if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
1441 continue;
1442 }
1443
1444 if (ts_language_has_reduce_action(
1445 self->language,
1446 state_after_missing_symbol,
1447 ts_subtree_leaf_symbol(lookahead)
1448 )) {
1449 // In case the parser is currently outside of any included range, the lexer will
1450 // snap to the beginning of the next included range. The missing token's padding
1451 // must be assigned to position it within the next included range.
1452 ts_lexer_reset(&self->lexer, position);
1453 ts_lexer_mark_end(&self->lexer);
1454 Length padding = length_sub(self->lexer.token_end_position, position);
1455 uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead);
1456
1457 StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
1458 Subtree missing_tree = ts_subtree_new_missing_leaf(
1459 &self->tree_pool, missing_symbol,
1460 padding, lookahead_bytes,
1461 self->language
1462 );
1463 ts_stack_push(
1464 self->stack, version_with_missing_tree,
1465 missing_tree, false,
1466 state_after_missing_symbol
1467 );
1468
1469 if (ts_parser__do_all_potential_reductions(
1470 self, version_with_missing_tree,
1471 ts_subtree_leaf_symbol(lookahead)
1472 )) {
1473 LOG(
1474 "recover_with_missing symbol:%s, state:%u",
1475 SYM_NAME(missing_symbol),
1476 ts_stack_state(self->stack, version_with_missing_tree)
1477 );
1478 did_insert_missing_token = true;
1479 break;
1480 }
1481 }
1482 }
1483 }
1484
1485 ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
1486 v = (v == version) ? previous_version_count : v + 1;
1487 }
1488
1489 for (unsigned i = previous_version_count; i < version_count; i++) {
1490 bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
1491 assert(did_merge);
1492 (void)did_merge; // fix warning/error with clang -Os
1493 }
1494
1495 ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH);
1496
1497 // Begin recovery with the current lookahead node, rather than waiting for the
1498 // next turn of the parse loop. This ensures that the tree accounts for the
1499 // current lookahead token's "lookahead bytes" value, which describes how far
1500 // the lexer needed to look ahead beyond the content of the token in order to
1501 // recognize it.
1502 if (ts_subtree_child_count(lookahead) > 0) {
1503 ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1504 }
1505 ts_parser__recover(self, version, lookahead);
1506
1507 LOG_STACK();
1508}
1509
1510static bool ts_parser__advance(
1511 TSParser *self,
1512 StackVersion version,
1513 bool allow_node_reuse
1514) {
1515 TSStateId state = ts_stack_state(self->stack, version);
1516 uint32_t position = ts_stack_position(self->stack, version).bytes;
1517 Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
1518
1519 bool did_reuse = true;
1520 Subtree lookahead = NULL_SUBTREE;
1521 TableEntry table_entry = {.action_count = 0};
1522
1523 // If possible, reuse a node from the previous syntax tree.
1524 if (allow_node_reuse) {
1525 lookahead = ts_parser__reuse_node(
1526 self, version, &state, position, last_external_token, &table_entry
1527 );
1528 }
1529
1530 // If no node from the previous syntax tree could be reused, then try to
1531 // reuse the token previously returned by the lexer.
1532 if (!lookahead.ptr) {
1533 did_reuse = false;
1534 lookahead = ts_parser__get_cached_token(
1535 self, state, position, last_external_token, &table_entry
1536 );
1537 }
1538
1539 bool needs_lex = !lookahead.ptr;
1540 for (;;) {
1541 // Otherwise, re-run the lexer.
1542 if (needs_lex) {
1543 needs_lex = false;
1544 lookahead = ts_parser__lex(self, version, state);
1545 if (self->has_scanner_error) return false;
1546
1547 if (lookahead.ptr) {
1548 ts_parser__set_cached_token(self, position, last_external_token, lookahead);
1549 ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
1550 }
1551
1552 // When parsing a non-terminal extra, a null lookahead indicates the
1553 // end of the rule. The reduction is stored in the EOF table entry.
1554 // After the reduction, the lexer needs to be run again.
1555 else {
1556 ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
1557 }
1558 }
1559
1560 // If a cancellation flag or a timeout was provided, then check every
1561 // time a fixed number of parse actions has been processed.
1562 if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
1563 self->operation_count = 0;
1564 }
1565 if (
1566 self->operation_count == 0 &&
1567 ((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
1568 (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
1569 ) {
1570 if (lookahead.ptr) {
1571 ts_subtree_release(&self->tree_pool, lookahead);
1572 }
1573 return false;
1574 }
1575
1576 // Process each parse action for the current lookahead token in
1577 // the current state. If there are multiple actions, then this is
1578 // an ambiguous state. REDUCE actions always create a new stack
1579 // version, whereas SHIFT actions update the existing stack version
1580 // and terminate this loop.
1581 StackVersion last_reduction_version = STACK_VERSION_NONE;
1582 for (uint32_t i = 0; i < table_entry.action_count; i++) {
1583 TSParseAction action = table_entry.actions[i];
1584
1585 switch (action.type) {
1586 case TSParseActionTypeShift: {
1587 if (action.shift.repetition) break;
1588 TSStateId next_state;
1589 if (action.shift.extra) {
1590 next_state = state;
1591 LOG("shift_extra");
1592 } else {
1593 next_state = action.shift.state;
1594 LOG("shift state:%u", next_state);
1595 }
1596
1597 if (ts_subtree_child_count(lookahead) > 0) {
1598 ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
1599 next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
1600 }
1601
1602 ts_parser__shift(self, version, next_state, lookahead, action.shift.extra);
1603 if (did_reuse) reusable_node_advance(&self->reusable_node);
1604 return true;
1605 }
1606
1607 case TSParseActionTypeReduce: {
1608 bool is_fragile = table_entry.action_count > 1;
1609 bool end_of_non_terminal_extra = lookahead.ptr == NULL;
1610 LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count);
1611 StackVersion reduction_version = ts_parser__reduce(
1612 self, version, action.reduce.symbol, action.reduce.child_count,
1613 action.reduce.dynamic_precedence, action.reduce.production_id,
1614 is_fragile, end_of_non_terminal_extra
1615 );
1616 if (reduction_version != STACK_VERSION_NONE) {
1617 last_reduction_version = reduction_version;
1618 }
1619 break;
1620 }
1621
1622 case TSParseActionTypeAccept: {
1623 LOG("accept");
1624 ts_parser__accept(self, version, lookahead);
1625 return true;
1626 }
1627
1628 case TSParseActionTypeRecover: {
1629 if (ts_subtree_child_count(lookahead) > 0) {
1630 ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1631 }
1632
1633 ts_parser__recover(self, version, lookahead);
1634 if (did_reuse) reusable_node_advance(&self->reusable_node);
1635 return true;
1636 }
1637 }
1638 }
1639
1640 // If a reduction was performed, then replace the current stack version
1641 // with one of the stack versions created by a reduction, and continue
1642 // processing this version of the stack with the same lookahead symbol.
1643 if (last_reduction_version != STACK_VERSION_NONE) {
1644 ts_stack_renumber_version(self->stack, last_reduction_version, version);
1645 LOG_STACK();
1646 state = ts_stack_state(self->stack, version);
1647
1648 // At the end of a non-terminal extra rule, the lexer will return a
1649 // null subtree, because the parser needs to perform a fixed reduction
1650 // regardless of the lookahead node. After performing that reduction,
1651 // (and completing the non-terminal extra rule) run the lexer again based
1652 // on the current parse state.
1653 if (!lookahead.ptr) {
1654 needs_lex = true;
1655 } else {
1656 ts_language_table_entry(
1657 self->language,
1658 state,
1659 ts_subtree_leaf_symbol(lookahead),
1660 &table_entry
1661 );
1662 }
1663
1664 continue;
1665 }
1666
1667 // A non-terminal extra rule was reduced and merged into an existing
1668 // stack version. This version can be discarded.
1669 if (!lookahead.ptr) {
1670 ts_stack_halt(self->stack, version);
1671 return true;
1672 }
1673
1674 // If there were no parse actions for the current lookahead token, then
1675 // it is not valid in this state. If the current lookahead token is a
1676 // keyword, then switch to treating it as the normal word token if that
1677 // token is valid in this state.
1678 if (
1679 ts_subtree_is_keyword(lookahead) &&
1680 ts_subtree_symbol(lookahead) != self->language->keyword_capture_token
1681 ) {
1682 ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry);
1683 if (table_entry.action_count > 0) {
1684 LOG(
1685 "switch from_keyword:%s, to_word_token:%s",
1686 TREE_NAME(lookahead),
1687 SYM_NAME(self->language->keyword_capture_token)
1688 );
1689
1690 MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1691 ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
1692 lookahead = ts_subtree_from_mut(mutable_lookahead);
1693 continue;
1694 }
1695 }
1696
1697 // If the current lookahead token is not valid and the parser is
1698 // already in the error state, restart the error recovery process.
1699 // TODO - can this be unified with the other `RECOVER` case above?
1700 if (state == ERROR_STATE) {
1701 ts_parser__recover(self, version, lookahead);
1702 return true;
1703 }
1704
1705 // If the current lookahead token is not valid and the previous
1706 // subtree on the stack was reused from an old tree, it isn't actually
1707 // valid to reuse it. Remove it from the stack, and in its place,
1708 // push each of its children. Then try again to process the current
1709 // lookahead.
1710 if (ts_parser__breakdown_top_of_stack(self, version)) {
1711 state = ts_stack_state(self->stack, version);
1712 ts_subtree_release(&self->tree_pool, lookahead);
1713 needs_lex = true;
1714 continue;
1715 }
1716
1717 // At this point, the current lookahead token is definitely not valid
1718 // for this parse stack version. Mark this version as paused and continue
1719 // processing any other stack versions that might exist. If some other
1720 // version advances successfully, then this version can simply be removed.
1721 // But if all versions end up paused, then error recovery is needed.
1722 LOG("detect_error");
1723 ts_stack_pause(self->stack, version, lookahead);
1724 return true;
1725 }
1726}
1727
1728static unsigned ts_parser__condense_stack(TSParser *self) {
1729 bool made_changes = false;
1730 unsigned min_error_cost = UINT_MAX;
1731 for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
1732 // Prune any versions that have been marked for removal.
1733 if (ts_stack_is_halted(self->stack, i)) {
1734 ts_stack_remove_version(self->stack, i);
1735 i--;
1736 continue;
1737 }
1738
1739 // Keep track of the minimum error cost of any stack version so
1740 // that it can be returned.
1741 ErrorStatus status_i = ts_parser__version_status(self, i);
1742 if (!status_i.is_in_error && status_i.cost < min_error_cost) {
1743 min_error_cost = status_i.cost;
1744 }
1745
1746 // Examine each pair of stack versions, removing any versions that
1747 // are clearly worse than another version. Ensure that the versions
1748 // are ordered from most promising to least promising.
1749 for (StackVersion j = 0; j < i; j++) {
1750 ErrorStatus status_j = ts_parser__version_status(self, j);
1751
1752 switch (ts_parser__compare_versions(self, status_j, status_i)) {
1753 case ErrorComparisonTakeLeft:
1754 made_changes = true;
1755 ts_stack_remove_version(self->stack, i);
1756 i--;
1757 j = i;
1758 break;
1759
1760 case ErrorComparisonPreferLeft:
1761 case ErrorComparisonNone:
1762 if (ts_stack_merge(self->stack, j, i)) {
1763 made_changes = true;
1764 i--;
1765 j = i;
1766 }
1767 break;
1768
1769 case ErrorComparisonPreferRight:
1770 made_changes = true;
1771 if (ts_stack_merge(self->stack, j, i)) {
1772 i--;
1773 j = i;
1774 } else {
1775 ts_stack_swap_versions(self->stack, i, j);
1776 }
1777 break;
1778
1779 case ErrorComparisonTakeRight:
1780 made_changes = true;
1781 ts_stack_remove_version(self->stack, j);
1782 i--;
1783 j--;
1784 break;
1785 }
1786 }
1787 }
1788
1789 // Enforce a hard upper bound on the number of stack versions by
1790 // discarding the least promising versions.
1791 while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1792 ts_stack_remove_version(self->stack, MAX_VERSION_COUNT);
1793 made_changes = true;
1794 }
1795
1796 // If the best-performing stack version is currently paused, or all
1797 // versions are paused, then resume the best paused version and begin
1798 // the error recovery process. Otherwise, remove the paused versions.
1799 if (ts_stack_version_count(self->stack) > 0) {
1800 bool has_unpaused_version = false;
1801 for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
1802 if (ts_stack_is_paused(self->stack, i)) {
1803 if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) {
1804 LOG("resume version:%u", i);
1805 min_error_cost = ts_stack_error_cost(self->stack, i);
1806 Subtree lookahead = ts_stack_resume(self->stack, i);
1807 ts_parser__handle_error(self, i, lookahead);
1808 has_unpaused_version = true;
1809 } else {
1810 ts_stack_remove_version(self->stack, i);
1811 i--;
1812 n--;
1813 }
1814 } else {
1815 has_unpaused_version = true;
1816 }
1817 }
1818 }
1819
1820 if (made_changes) {
1821 LOG("condense");
1822 LOG_STACK();
1823 }
1824
1825 return min_error_cost;
1826}
1827
1828static bool ts_parser_has_outstanding_parse(TSParser *self) {
1829 return (
1830 self->external_scanner_payload ||
1831 ts_stack_state(self->stack, 0) != 1 ||
1832 ts_stack_node_count_since_error(self->stack, 0) != 0
1833 );
1834}
1835
1836// Parser - Public
1837
1838TSParser *ts_parser_new(void) {
1839 TSParser *self = ts_calloc(1, sizeof(TSParser));
1840 ts_lexer_init(&self->lexer);
1841 array_init(&self->reduce_actions);
1842 array_reserve(&self->reduce_actions, 4);
1843 self->tree_pool = ts_subtree_pool_new(32);
1844 self->stack = ts_stack_new(&self->tree_pool);
1845 self->finished_tree = NULL_SUBTREE;
1846 self->reusable_node = reusable_node_new();
1847 self->dot_graph_file = NULL;
1848 self->cancellation_flag = NULL;
1849 self->timeout_duration = 0;
1850 self->language = NULL;
1851 self->has_scanner_error = false;
1852 self->external_scanner_payload = NULL;
1853 self->end_clock = clock_null();
1854 self->operation_count = 0;
1855 self->old_tree = NULL_SUBTREE;
1856 self->included_range_differences = (TSRangeArray) array_new();
1857 self->included_range_difference_index = 0;
1858 ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1859 return self;
1860}
1861
1862void ts_parser_delete(TSParser *self) {
1863 if (!self) return;
1864
1865 ts_parser_set_language(self, NULL);
1866 ts_stack_delete(self->stack);
1867 if (self->reduce_actions.contents) {
1868 array_delete(&self->reduce_actions);
1869 }
1870 if (self->included_range_differences.contents) {
1871 array_delete(&self->included_range_differences);
1872 }
1873 if (self->old_tree.ptr) {
1874 ts_subtree_release(&self->tree_pool, self->old_tree);
1875 self->old_tree = NULL_SUBTREE;
1876 }
1877 ts_wasm_store_delete(self->wasm_store);
1878 ts_lexer_delete(&self->lexer);
1879 ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1880 ts_subtree_pool_delete(&self->tree_pool);
1881 reusable_node_delete(&self->reusable_node);
1882 array_delete(&self->trailing_extras);
1883 array_delete(&self->trailing_extras2);
1884 array_delete(&self->scratch_trees);
1885 ts_free(self);
1886}
1887
1888const TSLanguage *ts_parser_language(const TSParser *self) {
1889 return self->language;
1890}
1891
1892bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
1893 ts_parser_reset(self);
1894 ts_language_delete(self->language);
1895 self->language = NULL;
1896
1897 if (language) {
1898 if (
1899 language->version > TREE_SITTER_LANGUAGE_VERSION ||
1900 language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
1901 ) return false;
1902
1903 if (ts_language_is_wasm(language)) {
1904 if (
1905 !self->wasm_store ||
1906 !ts_wasm_store_start(self->wasm_store, &self->lexer.data, language)
1907 ) return false;
1908 }
1909 }
1910
1911 self->language = ts_language_copy(language);
1912 return true;
1913}
1914
1915TSLogger ts_parser_logger(const TSParser *self) {
1916 return self->lexer.logger;
1917}
1918
1919void ts_parser_set_logger(TSParser *self, TSLogger logger) {
1920 self->lexer.logger = logger;
1921}
1922
1923void ts_parser_print_dot_graphs(TSParser *self, int fd) {
1924 if (self->dot_graph_file) {
1925 fclose(self->dot_graph_file);
1926 }
1927
1928 if (fd >= 0) {
1929 #ifdef _WIN32
1930 self->dot_graph_file = _fdopen(fd, "a");
1931 #else
1932 self->dot_graph_file = fdopen(fd, "a");
1933 #endif
1934 } else {
1935 self->dot_graph_file = NULL;
1936 }
1937}
1938
1939const size_t *ts_parser_cancellation_flag(const TSParser *self) {
1940 return (const size_t *)self->cancellation_flag;
1941}
1942
1943void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) {
1944 self->cancellation_flag = (const volatile size_t *)flag;
1945}
1946
1947uint64_t ts_parser_timeout_micros(const TSParser *self) {
1948 return duration_to_micros(self->timeout_duration);
1949}
1950
1951void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
1952 self->timeout_duration = duration_from_micros(timeout_micros);
1953}
1954
1955bool ts_parser_set_included_ranges(
1956 TSParser *self,
1957 const TSRange *ranges,
1958 uint32_t count
1959) {
1960 return ts_lexer_set_included_ranges(&self->lexer, ranges, count);
1961}
1962
1963const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) {
1964 return ts_lexer_included_ranges(&self->lexer, count);
1965}
1966
1967void ts_parser_reset(TSParser *self) {
1968 ts_parser__external_scanner_destroy(self);
1969 if (self->wasm_store) {
1970 ts_wasm_store_reset(self->wasm_store);
1971 }
1972
1973 if (self->old_tree.ptr) {
1974 ts_subtree_release(&self->tree_pool, self->old_tree);
1975 self->old_tree = NULL_SUBTREE;
1976 }
1977
1978 reusable_node_clear(&self->reusable_node);
1979 ts_lexer_reset(&self->lexer, length_zero());
1980 ts_stack_clear(self->stack);
1981 ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1982 if (self->finished_tree.ptr) {
1983 ts_subtree_release(&self->tree_pool, self->finished_tree);
1984 self->finished_tree = NULL_SUBTREE;
1985 }
1986 self->accept_count = 0;
1987 self->has_scanner_error = false;
1988}
1989
1990TSTree *ts_parser_parse(
1991 TSParser *self,
1992 const TSTree *old_tree,
1993 TSInput input
1994) {
1995 TSTree *result = NULL;
1996 if (!self->language || !input.read) return NULL;
1997
1998 if (ts_language_is_wasm(self->language)) {
1999 if (!self->wasm_store) return NULL;
2000 ts_wasm_store_start(self->wasm_store, &self->lexer.data, self->language);
2001 }
2002
2003 ts_lexer_set_input(&self->lexer, input);
2004 array_clear(&self->included_range_differences);
2005 self->included_range_difference_index = 0;
2006
2007 if (ts_parser_has_outstanding_parse(self)) {
2008 LOG("resume_parsing");
2009 } else {
2010 ts_parser__external_scanner_create(self);
2011 if (self->has_scanner_error) goto exit;
2012
2013 if (old_tree) {
2014 ts_subtree_retain(old_tree->root);
2015 self->old_tree = old_tree->root;
2016 ts_range_array_get_changed_ranges(
2017 old_tree->included_ranges, old_tree->included_range_count,
2018 self->lexer.included_ranges, self->lexer.included_range_count,
2019 &self->included_range_differences
2020 );
2021 reusable_node_reset(&self->reusable_node, old_tree->root);
2022 LOG("parse_after_edit");
2023 LOG_TREE(self->old_tree);
2024 for (unsigned i = 0; i < self->included_range_differences.size; i++) {
2025 TSRange *range = &self->included_range_differences.contents[i];
2026 LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
2027 }
2028 } else {
2029 reusable_node_clear(&self->reusable_node);
2030 LOG("new_parse");
2031 }
2032 }
2033
2034 self->operation_count = 0;
2035 if (self->timeout_duration) {
2036 self->end_clock = clock_after(clock_now(), self->timeout_duration);
2037 } else {
2038 self->end_clock = clock_null();
2039 }
2040
2041 uint32_t position = 0, last_position = 0, version_count = 0;
2042 do {
2043 for (
2044 StackVersion version = 0;
2045 version_count = ts_stack_version_count(self->stack),
2046 version < version_count;
2047 version++
2048 ) {
2049 bool allow_node_reuse = version_count == 1;
2050 while (ts_stack_is_active(self->stack, version)) {
2051 LOG(
2052 "process version:%u, version_count:%u, state:%d, row:%u, col:%u",
2053 version,
2054 ts_stack_version_count(self->stack),
2055 ts_stack_state(self->stack, version),
2056 ts_stack_position(self->stack, version).extent.row,
2057 ts_stack_position(self->stack, version).extent.column
2058 );
2059
2060 if (!ts_parser__advance(self, version, allow_node_reuse)) {
2061 if (self->has_scanner_error) goto exit;
2062 return NULL;
2063 }
2064
2065 LOG_STACK();
2066
2067 position = ts_stack_position(self->stack, version).bytes;
2068 if (position > last_position || (version > 0 && position == last_position)) {
2069 last_position = position;
2070 break;
2071 }
2072 }
2073 }
2074
2075 // After advancing each version of the stack, re-sort the versions by their cost,
2076 // removing any versions that are no longer worth pursuing.
2077 unsigned min_error_cost = ts_parser__condense_stack(self);
2078
2079 // If there's already a finished parse tree that's better than any in-progress version,
2080 // then terminate parsing. Clear the parse stack to remove any extra references to subtrees
2081 // within the finished tree, ensuring that these subtrees can be safely mutated in-place
2082 // for rebalancing.
2083 if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
2084 ts_stack_clear(self->stack);
2085 break;
2086 }
2087
2088 while (self->included_range_difference_index < self->included_range_differences.size) {
2089 TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index];
2090 if (range->end_byte <= position) {
2091 self->included_range_difference_index++;
2092 } else {
2093 break;
2094 }
2095 }
2096 } while (version_count != 0);
2097
2098 assert(self->finished_tree.ptr);
2099 ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
2100 LOG("done");
2101 LOG_TREE(self->finished_tree);
2102
2103 result = ts_tree_new(
2104 self->finished_tree,
2105 self->language,
2106 self->lexer.included_ranges,
2107 self->lexer.included_range_count
2108 );
2109 self->finished_tree = NULL_SUBTREE;
2110
2111exit:
2112 ts_parser_reset(self);
2113 return result;
2114}
2115
2116TSTree *ts_parser_parse_string(
2117 TSParser *self,
2118 const TSTree *old_tree,
2119 const char *string,
2120 uint32_t length
2121) {
2122 return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8);
2123}
2124
2125TSTree *ts_parser_parse_string_encoding(
2126 TSParser *self,
2127 const TSTree *old_tree,
2128 const char *string,
2129 uint32_t length,
2130 TSInputEncoding encoding
2131) {
2132 TSStringInput input = {string, length};
2133 return ts_parser_parse(self, old_tree, (TSInput) {
2134 &input,
2135 ts_string_input_read,
2136 encoding,
2137 });
2138}
2139
2140void ts_parser_set_wasm_store(TSParser *self, TSWasmStore *store) {
2141 ts_wasm_store_delete(self->wasm_store);
2142 self->wasm_store = store;
2143}
2144
2145TSWasmStore *ts_parser_take_wasm_store(TSParser *self) {
2146 TSWasmStore *result = self->wasm_store;
2147 self->wasm_store = NULL;
2148 return result;
2149}
2150
2151#undef LOG