aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/mitjafelicijan/go-tree-sitter/query.c
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/mitjafelicijan/go-tree-sitter/query.c')
-rw-r--r--vendor/github.com/mitjafelicijan/go-tree-sitter/query.c4134
1 files changed, 4134 insertions, 0 deletions
diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/query.c b/vendor/github.com/mitjafelicijan/go-tree-sitter/query.c
new file mode 100644
index 0000000..1b6e04b
--- /dev/null
+++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/query.c
@@ -0,0 +1,4134 @@
1#include "api.h"
2#include "./alloc.h"
3#include "./array.h"
4#include "./language.h"
5#include "./point.h"
6#include "./tree_cursor.h"
7#include "./unicode.h"
8#include <wctype.h>
9
10// #define DEBUG_ANALYZE_QUERY
11// #define DEBUG_EXECUTE_QUERY
12
13#define MAX_STEP_CAPTURE_COUNT 3
14#define MAX_NEGATED_FIELD_COUNT 8
15#define MAX_STATE_PREDECESSOR_COUNT 256
16#define MAX_ANALYSIS_STATE_DEPTH 8
17#define MAX_ANALYSIS_ITERATION_COUNT 256
18
19/*
20 * Stream - A sequence of unicode characters derived from a UTF8 string.
21 * This struct is used in parsing queries from S-expressions.
22 */
23typedef struct {
24 const char *input;
25 const char *start;
26 const char *end;
27 int32_t next;
28 uint8_t next_size;
29} Stream;
30
31/*
32 * QueryStep - A step in the process of matching a query. Each node within
33 * a query S-expression corresponds to one of these steps. An entire pattern
34 * is represented as a sequence of these steps. The basic properties of a
35 * node are represented by these fields:
36 * - `symbol` - The grammar symbol to match. A zero value represents the
37 * wildcard symbol, '_'.
38 * - `field` - The field name to match. A zero value means that a field name
39 * was not specified.
40 * - `capture_ids` - An array of integers representing the names of captures
41 * associated with this node in the pattern, terminated by a `NONE` value.
42 * - `depth` - The depth where this node occurs in the pattern. The root node
43 * of the pattern has depth zero.
44 * - `negated_field_list_id` - An id representing a set of fields that must
45 * not be present on a node matching this step.
46 *
47 * Steps have some additional fields in order to handle the `.` (or "anchor") operator,
48 * which forbids additional child nodes:
49 * - `is_immediate` - Indicates that the node matching this step cannot be preceded
50 * by other sibling nodes that weren't specified in the pattern.
51 * - `is_last_child` - Indicates that the node matching this step cannot have any
52 * subsequent named siblings.
53 *
54 * For simple patterns, steps are matched in sequential order. But in order to
55 * handle alternative/repeated/optional sub-patterns, query steps are not always
56 * structured as a linear sequence; they sometimes need to split and merge. This
57 * is done using the following fields:
58 * - `alternative_index` - The index of a different query step that serves as
59 * an alternative to this step. A `NONE` value represents no alternative.
60 * When a query state reaches a step with an alternative index, the state
61 * is duplicated, with one copy remaining at the original step, and one copy
62 * moving to the alternative step. The alternative may have its own alternative
63 * step, so this splitting is an iterative process.
64 * - `is_dead_end` - Indicates that this state cannot be passed directly, and
65 * exists only in order to redirect to an alternative index, with no splitting.
66 * - `is_pass_through` - Indicates that state has no matching logic of its own,
67 * and exists only to split a state. One copy of the state advances immediately
68 * to the next step, and one moves to the alternative step.
69 * - `alternative_is_immediate` - Indicates that this step's alternative step
70 * should be treated as if `is_immediate` is true.
71 *
72 * Steps also store some derived state that summarizes how they relate to other
73 * steps within the same pattern. This is used to optimize the matching process:
74 * - `contains_captures` - Indicates that this step or one of its child steps
75 * has a non-empty `capture_ids` list.
76 * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then
77 * it and all of its subsequent sibling steps within the same parent pattern
78 * are guaranteed to match.
79 * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but
80 * for the entire top-level pattern. When iterating through a query's
81 * captures using `ts_query_cursor_next_capture`, this field is used to
82 * detect that a capture can safely be returned from a match that has not
83 * even completed yet.
84 */
85typedef struct {
86 TSSymbol symbol;
87 TSSymbol supertype_symbol;
88 TSFieldId field;
89 uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
90 uint16_t depth;
91 uint16_t alternative_index;
92 uint16_t negated_field_list_id;
93 bool is_named: 1;
94 bool is_immediate: 1;
95 bool is_last_child: 1;
96 bool is_pass_through: 1;
97 bool is_dead_end: 1;
98 bool alternative_is_immediate: 1;
99 bool contains_captures: 1;
100 bool root_pattern_guaranteed: 1;
101 bool parent_pattern_guaranteed: 1;
102} QueryStep;
103
104/*
105 * Slice - A slice of an external array. Within a query, capture names,
106 * literal string values, and predicate step information are stored in three
107 * contiguous arrays. Individual captures, string values, and predicates are
108 * represented as slices of these three arrays.
109 */
110typedef struct {
111 uint32_t offset;
112 uint32_t length;
113} Slice;
114
115/*
116 * SymbolTable - a two-way mapping of strings to ids.
117 */
118typedef struct {
119 Array(char) characters;
120 Array(Slice) slices;
121} SymbolTable;
122
123/**
124 * CaptureQuantififers - a data structure holding the quantifiers of pattern captures.
125 */
126typedef Array(uint8_t) CaptureQuantifiers;
127
128/*
129 * PatternEntry - Information about the starting point for matching a particular
130 * pattern. These entries are stored in a 'pattern map' - a sorted array that
131 * makes it possible to efficiently lookup patterns based on the symbol for their
132 * first step. The entry consists of the following fields:
133 * - `pattern_index` - the index of the pattern within the query
134 * - `step_index` - the index of the pattern's first step in the shared `steps` array
135 * - `is_rooted` - whether or not the pattern has a single root node. This property
136 * affects decisions about whether or not to start the pattern for nodes outside
137 * of a QueryCursor's range restriction.
138 */
139typedef struct {
140 uint16_t step_index;
141 uint16_t pattern_index;
142 bool is_rooted;
143} PatternEntry;
144
145typedef struct {
146 Slice steps;
147 Slice predicate_steps;
148 uint32_t start_byte;
149 bool is_non_local;
150} QueryPattern;
151
152typedef struct {
153 uint32_t byte_offset;
154 uint16_t step_index;
155} StepOffset;
156
157/*
158 * QueryState - The state of an in-progress match of a particular pattern
159 * in a query. While executing, a `TSQueryCursor` must keep track of a number
160 * of possible in-progress matches. Each of those possible matches is
161 * represented as one of these states. Fields:
162 * - `id` - A numeric id that is exposed to the public API. This allows the
163 * caller to remove a given match, preventing any more of its captures
164 * from being returned.
165 * - `start_depth` - The depth in the tree where the first step of the state's
166 * pattern was matched.
167 * - `pattern_index` - The pattern that the state is matching.
168 * - `consumed_capture_count` - The number of captures from this match that
169 * have already been returned.
170 * - `capture_list_id` - A numeric id that can be used to retrieve the state's
171 * list of captures from the `CaptureListPool`.
172 * - `seeking_immediate_match` - A flag that indicates that the state's next
173 * step must be matched by the very next sibling. This is used when
174 * processing repetitions.
175 * - `has_in_progress_alternatives` - A flag that indicates that there is are
176 * other states that have the same captures as this state, but are at
177 * different steps in their pattern. This means that in order to obey the
178 * 'longest-match' rule, this state should not be returned as a match until
179 * it is clear that there can be no other alternative match with more captures.
180 */
181typedef struct {
182 uint32_t id;
183 uint32_t capture_list_id;
184 uint16_t start_depth;
185 uint16_t step_index;
186 uint16_t pattern_index;
187 uint16_t consumed_capture_count: 12;
188 bool seeking_immediate_match: 1;
189 bool has_in_progress_alternatives: 1;
190 bool dead: 1;
191 bool needs_parent: 1;
192} QueryState;
193
194typedef Array(TSQueryCapture) CaptureList;
195
196/*
197 * CaptureListPool - A collection of *lists* of captures. Each query state needs
198 * to maintain its own list of captures. To avoid repeated allocations, this struct
199 * maintains a fixed set of capture lists, and keeps track of which ones are
200 * currently in use by a query state.
201 */
202typedef struct {
203 Array(CaptureList) list;
204 CaptureList empty_list;
205 // The maximum number of capture lists that we are allowed to allocate. We
206 // never allow `list` to allocate more entries than this, dropping pending
207 // matches if needed to stay under the limit.
208 uint32_t max_capture_list_count;
209 // The number of capture lists allocated in `list` that are not currently in
210 // use. We reuse those existing-but-unused capture lists before trying to
211 // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
212 // list's length to indicate that it's not in use.
213 uint32_t free_capture_list_count;
214} CaptureListPool;
215
216/*
217 * AnalysisState - The state needed for walking the parse table when analyzing
218 * a query pattern, to determine at which steps the pattern might fail to match.
219 */
220typedef struct {
221 TSStateId parse_state;
222 TSSymbol parent_symbol;
223 uint16_t child_index;
224 TSFieldId field_id: 15;
225 bool done: 1;
226} AnalysisStateEntry;
227
228typedef struct {
229 AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH];
230 uint16_t depth;
231 uint16_t step_index;
232 TSSymbol root_symbol;
233} AnalysisState;
234
235typedef Array(AnalysisState *) AnalysisStateSet;
236
237typedef struct {
238 AnalysisStateSet states;
239 AnalysisStateSet next_states;
240 AnalysisStateSet deeper_states;
241 AnalysisStateSet state_pool;
242 Array(uint16_t) final_step_indices;
243 Array(TSSymbol) finished_parent_symbols;
244 bool did_abort;
245} QueryAnalysis;
246
247/*
248 * AnalysisSubgraph - A subset of the states in the parse table that are used
249 * in constructing nodes with a certain symbol. Each state is accompanied by
250 * some information about the possible node that could be produced in
251 * downstream states.
252 */
253typedef struct {
254 TSStateId state;
255 uint16_t production_id;
256 uint8_t child_index: 7;
257 bool done: 1;
258} AnalysisSubgraphNode;
259
260typedef struct {
261 TSSymbol symbol;
262 Array(TSStateId) start_states;
263 Array(AnalysisSubgraphNode) nodes;
264} AnalysisSubgraph;
265
266typedef Array(AnalysisSubgraph) AnalysisSubgraphArray;
267
268/*
269 * StatePredecessorMap - A map that stores the predecessors of each parse state.
270 * This is used during query analysis to determine which parse states can lead
271 * to which reduce actions.
272 */
273typedef struct {
274 TSStateId *contents;
275} StatePredecessorMap;
276
277/*
278 * TSQuery - A tree query, compiled from a string of S-expressions. The query
279 * itself is immutable. The mutable state used in the process of executing the
280 * query is stored in a `TSQueryCursor`.
281 */
282struct TSQuery {
283 SymbolTable captures;
284 SymbolTable predicate_values;
285 Array(CaptureQuantifiers) capture_quantifiers;
286 Array(QueryStep) steps;
287 Array(PatternEntry) pattern_map;
288 Array(TSQueryPredicateStep) predicate_steps;
289 Array(QueryPattern) patterns;
290 Array(StepOffset) step_offsets;
291 Array(TSFieldId) negated_fields;
292 Array(char) string_buffer;
293 Array(TSSymbol) repeat_symbols_with_rootless_patterns;
294 const TSLanguage *language;
295 uint16_t wildcard_root_pattern_count;
296};
297
298/*
299 * TSQueryCursor - A stateful struct used to execute a query on a tree.
300 */
301struct TSQueryCursor {
302 const TSQuery *query;
303 TSTreeCursor cursor;
304 Array(QueryState) states;
305 Array(QueryState) finished_states;
306 CaptureListPool capture_list_pool;
307 uint32_t depth;
308 uint32_t max_start_depth;
309 uint32_t start_byte;
310 uint32_t end_byte;
311 TSPoint start_point;
312 TSPoint end_point;
313 uint32_t next_state_id;
314 bool on_visible_node;
315 bool ascending;
316 bool halted;
317 bool did_exceed_match_limit;
318};
319
320static const TSQueryError PARENT_DONE = -1;
321static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
322static const uint16_t NONE = UINT16_MAX;
323static const TSSymbol WILDCARD_SYMBOL = 0;
324
325/**********
326 * Stream
327 **********/
328
329// Advance to the next unicode code point in the stream.
330static bool stream_advance(Stream *self) {
331 self->input += self->next_size;
332 if (self->input < self->end) {
333 uint32_t size = ts_decode_utf8(
334 (const uint8_t *)self->input,
335 (uint32_t)(self->end - self->input),
336 &self->next
337 );
338 if (size > 0) {
339 self->next_size = size;
340 return true;
341 }
342 } else {
343 self->next_size = 0;
344 self->next = '\0';
345 }
346 return false;
347}
348
349// Reset the stream to the given input position, represented as a pointer
350// into the input string.
351static void stream_reset(Stream *self, const char *input) {
352 self->input = input;
353 self->next_size = 0;
354 stream_advance(self);
355}
356
357static Stream stream_new(const char *string, uint32_t length) {
358 Stream self = {
359 .next = 0,
360 .input = string,
361 .start = string,
362 .end = string + length,
363 };
364 stream_advance(&self);
365 return self;
366}
367
368static void stream_skip_whitespace(Stream *self) {
369 for (;;) {
370 if (iswspace(self->next)) {
371 stream_advance(self);
372 } else if (self->next == ';') {
373 // skip over comments
374 stream_advance(self);
375 while (self->next && self->next != '\n') {
376 if (!stream_advance(self)) break;
377 }
378 } else {
379 break;
380 }
381 }
382}
383
384static bool stream_is_ident_start(Stream *self) {
385 return iswalnum(self->next) || self->next == '_' || self->next == '-';
386}
387
388static void stream_scan_identifier(Stream *stream) {
389 do {
390 stream_advance(stream);
391 } while (
392 iswalnum(stream->next) ||
393 stream->next == '_' ||
394 stream->next == '-' ||
395 stream->next == '.' ||
396 stream->next == '?' ||
397 stream->next == '!'
398 );
399}
400
401static uint32_t stream_offset(Stream *self) {
402 return (uint32_t)(self->input - self->start);
403}
404
405/******************
406 * CaptureListPool
407 ******************/
408
409static CaptureListPool capture_list_pool_new(void) {
410 return (CaptureListPool) {
411 .list = array_new(),
412 .empty_list = array_new(),
413 .max_capture_list_count = UINT32_MAX,
414 .free_capture_list_count = 0,
415 };
416}
417
418static void capture_list_pool_reset(CaptureListPool *self) {
419 for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
420 // This invalid size means that the list is not in use.
421 self->list.contents[i].size = UINT32_MAX;
422 }
423 self->free_capture_list_count = self->list.size;
424}
425
426static void capture_list_pool_delete(CaptureListPool *self) {
427 for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
428 array_delete(&self->list.contents[i]);
429 }
430 array_delete(&self->list);
431}
432
433static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
434 if (id >= self->list.size) return &self->empty_list;
435 return &self->list.contents[id];
436}
437
438static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
439 assert(id < self->list.size);
440 return &self->list.contents[id];
441}
442
443static bool capture_list_pool_is_empty(const CaptureListPool *self) {
444 // The capture list pool is empty if all allocated lists are in use, and we
445 // have reached the maximum allowed number of allocated lists.
446 return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
447}
448
449static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
450 // First see if any already allocated capture list is currently unused.
451 if (self->free_capture_list_count > 0) {
452 for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
453 if (self->list.contents[i].size == UINT32_MAX) {
454 array_clear(&self->list.contents[i]);
455 self->free_capture_list_count--;
456 return i;
457 }
458 }
459 }
460
461 // Otherwise allocate and initialize a new capture list, as long as that
462 // doesn't put us over the requested maximum.
463 uint32_t i = self->list.size;
464 if (i >= self->max_capture_list_count) {
465 return NONE;
466 }
467 CaptureList list;
468 array_init(&list);
469 array_push(&self->list, list);
470 return i;
471}
472
473static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
474 if (id >= self->list.size) return;
475 self->list.contents[id].size = UINT32_MAX;
476 self->free_capture_list_count++;
477}
478
479/**************
480 * Quantifiers
481 **************/
482
483static TSQuantifier quantifier_mul(
484 TSQuantifier left,
485 TSQuantifier right
486) {
487 switch (left)
488 {
489 case TSQuantifierZero:
490 return TSQuantifierZero;
491 case TSQuantifierZeroOrOne:
492 switch (right) {
493 case TSQuantifierZero:
494 return TSQuantifierZero;
495 case TSQuantifierZeroOrOne:
496 case TSQuantifierOne:
497 return TSQuantifierZeroOrOne;
498 case TSQuantifierZeroOrMore:
499 case TSQuantifierOneOrMore:
500 return TSQuantifierZeroOrMore;
501 };
502 break;
503 case TSQuantifierZeroOrMore:
504 switch (right) {
505 case TSQuantifierZero:
506 return TSQuantifierZero;
507 case TSQuantifierZeroOrOne:
508 case TSQuantifierZeroOrMore:
509 case TSQuantifierOne:
510 case TSQuantifierOneOrMore:
511 return TSQuantifierZeroOrMore;
512 };
513 break;
514 case TSQuantifierOne:
515 return right;
516 case TSQuantifierOneOrMore:
517 switch (right) {
518 case TSQuantifierZero:
519 return TSQuantifierZero;
520 case TSQuantifierZeroOrOne:
521 case TSQuantifierZeroOrMore:
522 return TSQuantifierZeroOrMore;
523 case TSQuantifierOne:
524 case TSQuantifierOneOrMore:
525 return TSQuantifierOneOrMore;
526 };
527 break;
528 }
529 return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
530}
531
532static TSQuantifier quantifier_join(
533 TSQuantifier left,
534 TSQuantifier right
535) {
536 switch (left)
537 {
538 case TSQuantifierZero:
539 switch (right) {
540 case TSQuantifierZero:
541 return TSQuantifierZero;
542 case TSQuantifierZeroOrOne:
543 case TSQuantifierOne:
544 return TSQuantifierZeroOrOne;
545 case TSQuantifierZeroOrMore:
546 case TSQuantifierOneOrMore:
547 return TSQuantifierZeroOrMore;
548 };
549 break;
550 case TSQuantifierZeroOrOne:
551 switch (right) {
552 case TSQuantifierZero:
553 case TSQuantifierZeroOrOne:
554 case TSQuantifierOne:
555 return TSQuantifierZeroOrOne;
556 break;
557 case TSQuantifierZeroOrMore:
558 case TSQuantifierOneOrMore:
559 return TSQuantifierZeroOrMore;
560 break;
561 };
562 break;
563 case TSQuantifierZeroOrMore:
564 return TSQuantifierZeroOrMore;
565 case TSQuantifierOne:
566 switch (right) {
567 case TSQuantifierZero:
568 case TSQuantifierZeroOrOne:
569 return TSQuantifierZeroOrOne;
570 case TSQuantifierZeroOrMore:
571 return TSQuantifierZeroOrMore;
572 case TSQuantifierOne:
573 return TSQuantifierOne;
574 case TSQuantifierOneOrMore:
575 return TSQuantifierOneOrMore;
576 };
577 break;
578 case TSQuantifierOneOrMore:
579 switch (right) {
580 case TSQuantifierZero:
581 case TSQuantifierZeroOrOne:
582 case TSQuantifierZeroOrMore:
583 return TSQuantifierZeroOrMore;
584 case TSQuantifierOne:
585 case TSQuantifierOneOrMore:
586 return TSQuantifierOneOrMore;
587 };
588 break;
589 }
590 return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
591}
592
593static TSQuantifier quantifier_add(
594 TSQuantifier left,
595 TSQuantifier right
596) {
597 switch (left)
598 {
599 case TSQuantifierZero:
600 return right;
601 case TSQuantifierZeroOrOne:
602 switch (right) {
603 case TSQuantifierZero:
604 return TSQuantifierZeroOrOne;
605 case TSQuantifierZeroOrOne:
606 case TSQuantifierZeroOrMore:
607 return TSQuantifierZeroOrMore;
608 case TSQuantifierOne:
609 case TSQuantifierOneOrMore:
610 return TSQuantifierOneOrMore;
611 };
612 break;
613 case TSQuantifierZeroOrMore:
614 switch (right) {
615 case TSQuantifierZero:
616 return TSQuantifierZeroOrMore;
617 case TSQuantifierZeroOrOne:
618 case TSQuantifierZeroOrMore:
619 return TSQuantifierZeroOrMore;
620 case TSQuantifierOne:
621 case TSQuantifierOneOrMore:
622 return TSQuantifierOneOrMore;
623 };
624 break;
625 case TSQuantifierOne:
626 switch (right) {
627 case TSQuantifierZero:
628 return TSQuantifierOne;
629 case TSQuantifierZeroOrOne:
630 case TSQuantifierZeroOrMore:
631 case TSQuantifierOne:
632 case TSQuantifierOneOrMore:
633 return TSQuantifierOneOrMore;
634 };
635 break;
636 case TSQuantifierOneOrMore:
637 return TSQuantifierOneOrMore;
638 }
639 return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
640}
641
642// Create new capture quantifiers structure
643static CaptureQuantifiers capture_quantifiers_new(void) {
644 return (CaptureQuantifiers) array_new();
645}
646
647// Delete capture quantifiers structure
648static void capture_quantifiers_delete(
649 CaptureQuantifiers *self
650) {
651 array_delete(self);
652}
653
654// Clear capture quantifiers structure
655static void capture_quantifiers_clear(
656 CaptureQuantifiers *self
657) {
658 array_clear(self);
659}
660
661// Replace capture quantifiers with the given quantifiers
662static void capture_quantifiers_replace(
663 CaptureQuantifiers *self,
664 CaptureQuantifiers *quantifiers
665) {
666 array_clear(self);
667 array_push_all(self, quantifiers);
668}
669
670// Return capture quantifier for the given capture id
671static TSQuantifier capture_quantifier_for_id(
672 const CaptureQuantifiers *self,
673 uint16_t id
674) {
675 return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id);
676}
677
678// Add the given quantifier to the current value for id
679static void capture_quantifiers_add_for_id(
680 CaptureQuantifiers *self,
681 uint16_t id,
682 TSQuantifier quantifier
683) {
684 if (self->size <= id) {
685 array_grow_by(self, id + 1 - self->size);
686 }
687 uint8_t *own_quantifier = array_get(self, id);
688 *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier);
689}
690
691// Point-wise add the given quantifiers to the current values
692static void capture_quantifiers_add_all(
693 CaptureQuantifiers *self,
694 CaptureQuantifiers *quantifiers
695) {
696 if (self->size < quantifiers->size) {
697 array_grow_by(self, quantifiers->size - self->size);
698 }
699 for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) {
700 uint8_t *quantifier = array_get(quantifiers, id);
701 uint8_t *own_quantifier = array_get(self, id);
702 *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
703 }
704}
705
706// Join the given quantifier with the current values
707static void capture_quantifiers_mul(
708 CaptureQuantifiers *self,
709 TSQuantifier quantifier
710) {
711 for (uint16_t id = 0; id < (uint16_t)self->size; id++) {
712 uint8_t *own_quantifier = array_get(self, id);
713 *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier);
714 }
715}
716
717// Point-wise join the quantifiers from a list of alternatives with the current values
718static void capture_quantifiers_join_all(
719 CaptureQuantifiers *self,
720 CaptureQuantifiers *quantifiers
721) {
722 if (self->size < quantifiers->size) {
723 array_grow_by(self, quantifiers->size - self->size);
724 }
725 for (uint32_t id = 0; id < quantifiers->size; id++) {
726 uint8_t *quantifier = array_get(quantifiers, id);
727 uint8_t *own_quantifier = array_get(self, id);
728 *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
729 }
730 for (uint32_t id = quantifiers->size; id < self->size; id++) {
731 uint8_t *own_quantifier = array_get(self, id);
732 *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero);
733 }
734}
735
736/**************
737 * SymbolTable
738 **************/
739
740static SymbolTable symbol_table_new(void) {
741 return (SymbolTable) {
742 .characters = array_new(),
743 .slices = array_new(),
744 };
745}
746
747static void symbol_table_delete(SymbolTable *self) {
748 array_delete(&self->characters);
749 array_delete(&self->slices);
750}
751
752static int symbol_table_id_for_name(
753 const SymbolTable *self,
754 const char *name,
755 uint32_t length
756) {
757 for (unsigned i = 0; i < self->slices.size; i++) {
758 Slice slice = self->slices.contents[i];
759 if (
760 slice.length == length &&
761 !strncmp(&self->characters.contents[slice.offset], name, length)
762 ) return i;
763 }
764 return -1;
765}
766
767static const char *symbol_table_name_for_id(
768 const SymbolTable *self,
769 uint16_t id,
770 uint32_t *length
771) {
772 Slice slice = self->slices.contents[id];
773 *length = slice.length;
774 return &self->characters.contents[slice.offset];
775}
776
777static uint16_t symbol_table_insert_name(
778 SymbolTable *self,
779 const char *name,
780 uint32_t length
781) {
782 int id = symbol_table_id_for_name(self, name, length);
783 if (id >= 0) return (uint16_t)id;
784 Slice slice = {
785 .offset = self->characters.size,
786 .length = length,
787 };
788 array_grow_by(&self->characters, length + 1);
789 memcpy(&self->characters.contents[slice.offset], name, length);
790 self->characters.contents[self->characters.size - 1] = 0;
791 array_push(&self->slices, slice);
792 return self->slices.size - 1;
793}
794
795/************
796 * QueryStep
797 ************/
798
799static QueryStep query_step__new(
800 TSSymbol symbol,
801 uint16_t depth,
802 bool is_immediate
803) {
804 QueryStep step = {
805 .symbol = symbol,
806 .depth = depth,
807 .field = 0,
808 .alternative_index = NONE,
809 .negated_field_list_id = 0,
810 .contains_captures = false,
811 .is_last_child = false,
812 .is_named = false,
813 .is_pass_through = false,
814 .is_dead_end = false,
815 .root_pattern_guaranteed = false,
816 .is_immediate = is_immediate,
817 .alternative_is_immediate = false,
818 };
819 for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
820 step.capture_ids[i] = NONE;
821 }
822 return step;
823}
824
825static void query_step__add_capture(QueryStep *self, uint16_t capture_id) {
826 for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
827 if (self->capture_ids[i] == NONE) {
828 self->capture_ids[i] = capture_id;
829 break;
830 }
831 }
832}
833
834static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
835 for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
836 if (self->capture_ids[i] == capture_id) {
837 self->capture_ids[i] = NONE;
838 while (i + 1 < MAX_STEP_CAPTURE_COUNT) {
839 if (self->capture_ids[i + 1] == NONE) break;
840 self->capture_ids[i] = self->capture_ids[i + 1];
841 self->capture_ids[i + 1] = NONE;
842 i++;
843 }
844 break;
845 }
846 }
847}
848
849/**********************
850 * StatePredecessorMap
851 **********************/
852
853static inline StatePredecessorMap state_predecessor_map_new(
854 const TSLanguage *language
855) {
856 return (StatePredecessorMap) {
857 .contents = ts_calloc(
858 (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
859 sizeof(TSStateId)
860 ),
861 };
862}
863
864static inline void state_predecessor_map_delete(StatePredecessorMap *self) {
865 ts_free(self->contents);
866}
867
868static inline void state_predecessor_map_add(
869 StatePredecessorMap *self,
870 TSStateId state,
871 TSStateId predecessor
872) {
873 size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
874 TSStateId *count = &self->contents[index];
875 if (
876 *count == 0 ||
877 (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)
878 ) {
879 (*count)++;
880 self->contents[index + *count] = predecessor;
881 }
882}
883
884static inline const TSStateId *state_predecessor_map_get(
885 const StatePredecessorMap *self,
886 TSStateId state,
887 unsigned *count
888) {
889 size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
890 *count = self->contents[index];
891 return &self->contents[index + 1];
892}
893
894/****************
895 * AnalysisState
896 ****************/
897
898static unsigned analysis_state__recursion_depth(const AnalysisState *self) {
899 unsigned result = 0;
900 for (unsigned i = 0; i < self->depth; i++) {
901 TSSymbol symbol = self->stack[i].parent_symbol;
902 for (unsigned j = 0; j < i; j++) {
903 if (self->stack[j].parent_symbol == symbol) {
904 result++;
905 break;
906 }
907 }
908 }
909 return result;
910}
911
912static inline int analysis_state__compare_position(
913 AnalysisState *const *self,
914 AnalysisState *const *other
915) {
916 for (unsigned i = 0; i < (*self)->depth; i++) {
917 if (i >= (*other)->depth) return -1;
918 if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1;
919 if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1;
920 }
921 if ((*self)->depth < (*other)->depth) return 1;
922 if ((*self)->step_index < (*other)->step_index) return -1;
923 if ((*self)->step_index > (*other)->step_index) return 1;
924 return 0;
925}
926
927static inline int analysis_state__compare(
928 AnalysisState *const *self,
929 AnalysisState *const *other
930) {
931 int result = analysis_state__compare_position(self, other);
932 if (result != 0) return result;
933 for (unsigned i = 0; i < (*self)->depth; i++) {
934 if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1;
935 if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1;
936 if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1;
937 if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1;
938 if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1;
939 if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1;
940 }
941 return 0;
942}
943
944static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) {
945 if (self->depth == 0) {
946 return &self->stack[0];
947 }
948 return &self->stack[self->depth - 1];
949}
950
951static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
952 for (unsigned i = 0; i < self->depth; i++) {
953 if (self->stack[i].parent_symbol == symbol) return true;
954 }
955 return false;
956}
957
958/******************
959 * AnalysisStateSet
960 ******************/
961
962// Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by
963// cloning one from scratch.
964static inline AnalysisState *analysis_state_pool__clone_or_reuse(
965 AnalysisStateSet *self,
966 AnalysisState *borrowed_item
967) {
968 AnalysisState *new_item;
969 if (self->size) {
970 new_item = array_pop(self);
971 } else {
972 new_item = ts_malloc(sizeof(AnalysisState));
973 }
974 *new_item = *borrowed_item;
975 return new_item;
976}
977
978// Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this
979// set. The set does not contain duplicates, so if the item is already present, it will not be
980// inserted, and no clone will be made.
981//
982// The caller retains ownership of the passed-in memory. However, the clone that is created by this
983// function will be managed by the state set.
984static inline void analysis_state_set__insert_sorted(
985 AnalysisStateSet *self,
986 AnalysisStateSet *pool,
987 AnalysisState *borrowed_item
988) {
989 unsigned index, exists;
990 array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists);
991 if (!exists) {
992 AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
993 array_insert(self, index, new_item);
994 }
995}
996
997// Inserts a clone of the passed-in item at the end position of this list.
998//
999// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function
1000// `analysis_state__compare`) than largest item already in this set. If items are inserted in the
1001// wrong order, the set will not function properly for future use.
1002//
1003// The caller retains ownership of the passed-in memory. However, the clone that is created by this
1004// function will be managed by the state set.
1005static inline void analysis_state_set__push(
1006 AnalysisStateSet *self,
1007 AnalysisStateSet *pool,
1008 AnalysisState *borrowed_item
1009) {
1010 AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
1011 array_push(self, new_item);
1012}
1013
1014// Removes all items from this set, returning it to an empty state.
1015static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) {
1016 array_push_all(pool, self);
1017 array_clear(self);
1018}
1019
1020// Releases all memory that is managed with this state set, including any items currently present.
1021// After calling this function, the set is no longer suitable for use.
1022static inline void analysis_state_set__delete(AnalysisStateSet *self) {
1023 for (unsigned i = 0; i < self->size; i++) {
1024 ts_free(self->contents[i]);
1025 }
1026 array_delete(self);
1027}
1028
1029/****************
1030 * QueryAnalyzer
1031 ****************/
1032
1033static inline QueryAnalysis query_analysis__new(void) {
1034 return (QueryAnalysis) {
1035 .states = array_new(),
1036 .next_states = array_new(),
1037 .deeper_states = array_new(),
1038 .state_pool = array_new(),
1039 .final_step_indices = array_new(),
1040 .finished_parent_symbols = array_new(),
1041 .did_abort = false,
1042 };
1043}
1044
1045static inline void query_analysis__delete(QueryAnalysis *self) {
1046 analysis_state_set__delete(&self->states);
1047 analysis_state_set__delete(&self->next_states);
1048 analysis_state_set__delete(&self->deeper_states);
1049 analysis_state_set__delete(&self->state_pool);
1050 array_delete(&self->final_step_indices);
1051 array_delete(&self->finished_parent_symbols);
1052}
1053
1054/***********************
1055 * AnalysisSubgraphNode
1056 ***********************/
1057
1058static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) {
1059 if (self->state < other->state) return -1;
1060 if (self->state > other->state) return 1;
1061 if (self->child_index < other->child_index) return -1;
1062 if (self->child_index > other->child_index) return 1;
1063 if (self->done < other->done) return -1;
1064 if (self->done > other->done) return 1;
1065 if (self->production_id < other->production_id) return -1;
1066 if (self->production_id > other->production_id) return 1;
1067 return 0;
1068}
1069
1070/*********
1071 * Query
1072 *********/
1073
1074// The `pattern_map` contains a mapping from TSSymbol values to indices in the
1075// `steps` array. For a given syntax node, the `pattern_map` makes it possible
1076// to quickly find the starting steps of all of the patterns whose root matches
1077// that node. Each entry has two fields: a `pattern_index`, which identifies one
1078// of the patterns in the query, and a `step_index`, which indicates the start
1079// offset of that pattern's steps within the `steps` array.
1080//
1081// The entries are sorted by the patterns' root symbols, and lookups use a
1082// binary search. This ensures that the cost of this initial lookup step
1083// scales logarithmically with the number of patterns in the query.
1084//
1085// This returns `true` if the symbol is present and `false` otherwise.
1086// If the symbol is not present `*result` is set to the index where the
1087// symbol should be inserted.
1088static inline bool ts_query__pattern_map_search(
1089 const TSQuery *self,
1090 TSSymbol needle,
1091 uint32_t *result
1092) {
1093 uint32_t base_index = self->wildcard_root_pattern_count;
1094 uint32_t size = self->pattern_map.size - base_index;
1095 if (size == 0) {
1096 *result = base_index;
1097 return false;
1098 }
1099 while (size > 1) {
1100 uint32_t half_size = size / 2;
1101 uint32_t mid_index = base_index + half_size;
1102 TSSymbol mid_symbol = self->steps.contents[
1103 self->pattern_map.contents[mid_index].step_index
1104 ].symbol;
1105 if (needle > mid_symbol) base_index = mid_index;
1106 size -= half_size;
1107 }
1108
1109 TSSymbol symbol = self->steps.contents[
1110 self->pattern_map.contents[base_index].step_index
1111 ].symbol;
1112
1113 if (needle > symbol) {
1114 base_index++;
1115 if (base_index < self->pattern_map.size) {
1116 symbol = self->steps.contents[
1117 self->pattern_map.contents[base_index].step_index
1118 ].symbol;
1119 }
1120 }
1121
1122 *result = base_index;
1123 return needle == symbol;
1124}
1125
1126// Insert a new pattern's start index into the pattern map, maintaining
1127// the pattern map's ordering invariant.
1128static inline void ts_query__pattern_map_insert(
1129 TSQuery *self,
1130 TSSymbol symbol,
1131 PatternEntry new_entry
1132) {
1133 uint32_t index;
1134 ts_query__pattern_map_search(self, symbol, &index);
1135
1136 // Ensure that the entries are sorted not only by symbol, but also
1137 // by pattern_index. This way, states for earlier patterns will be
1138 // initiated first, which allows the ordering of the states array
1139 // to be maintained more efficiently.
1140 while (index < self->pattern_map.size) {
1141 PatternEntry *entry = &self->pattern_map.contents[index];
1142 if (
1143 self->steps.contents[entry->step_index].symbol == symbol &&
1144 entry->pattern_index < new_entry.pattern_index
1145 ) {
1146 index++;
1147 } else {
1148 break;
1149 }
1150 }
1151
1152 array_insert(&self->pattern_map, index, new_entry);
1153}
1154
1155// Walk the subgraph for this non-terminal, tracking all of the possible
1156// sequences of progress within the pattern.
1157static void ts_query__perform_analysis(
1158 TSQuery *self,
1159 const AnalysisSubgraphArray *subgraphs,
1160 QueryAnalysis *analysis
1161) {
1162 unsigned recursion_depth_limit = 0;
1163 unsigned prev_final_step_count = 0;
1164 array_clear(&analysis->final_step_indices);
1165 array_clear(&analysis->finished_parent_symbols);
1166
1167 for (unsigned iteration = 0;; iteration++) {
1168 if (iteration == MAX_ANALYSIS_ITERATION_COUNT) {
1169 analysis->did_abort = true;
1170 break;
1171 }
1172
1173 #ifdef DEBUG_ANALYZE_QUERY
1174 printf("Iteration: %u. Final step indices:", iteration);
1175 for (unsigned j = 0; j < analysis->final_step_indices.size; j++) {
1176 printf(" %4u", analysis->final_step_indices.contents[j]);
1177 }
1178 printf("\n");
1179 for (unsigned j = 0; j < analysis->states.size; j++) {
1180 AnalysisState *state = analysis->states.contents[j];
1181 printf(" %3u: step: %u, stack: [", j, state->step_index);
1182 for (unsigned k = 0; k < state->depth; k++) {
1183 printf(
1184 " {%s, child: %u, state: %4u",
1185 self->language->symbol_names[state->stack[k].parent_symbol],
1186 state->stack[k].child_index,
1187 state->stack[k].parse_state
1188 );
1189 if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]);
1190 if (state->stack[k].done) printf(", DONE");
1191 printf("}");
1192 }
1193 printf(" ]\n");
1194 }
1195 #endif
1196
1197 // If no further progress can be made within the current recursion depth limit, then
1198 // bump the depth limit by one, and continue to process the states the exceeded the
1199 // limit. But only allow this if progress has been made since the last time the depth
1200 // limit was increased.
1201 if (analysis->states.size == 0) {
1202 if (
1203 analysis->deeper_states.size > 0 &&
1204 analysis->final_step_indices.size > prev_final_step_count
1205 ) {
1206 #ifdef DEBUG_ANALYZE_QUERY
1207 printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1);
1208 #endif
1209
1210 prev_final_step_count = analysis->final_step_indices.size;
1211 recursion_depth_limit++;
1212 AnalysisStateSet _states = analysis->states;
1213 analysis->states = analysis->deeper_states;
1214 analysis->deeper_states = _states;
1215 continue;
1216 }
1217
1218 break;
1219 }
1220
1221 analysis_state_set__clear(&analysis->next_states, &analysis->state_pool);
1222 for (unsigned j = 0; j < analysis->states.size; j++) {
1223 AnalysisState * const state = analysis->states.contents[j];
1224
1225 // For efficiency, it's important to avoid processing the same analysis state more
1226 // than once. To achieve this, keep the states in order of ascending position within
1227 // their hypothetical syntax trees. In each iteration of this loop, start by advancing
1228 // the states that have made the least progress. Avoid advancing states that have already
1229 // made more progress.
1230 if (analysis->next_states.size > 0) {
1231 int comparison = analysis_state__compare_position(
1232 &state,
1233 array_back(&analysis->next_states)
1234 );
1235 if (comparison == 0) {
1236 analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state);
1237 continue;
1238 } else if (comparison > 0) {
1239 #ifdef DEBUG_ANALYZE_QUERY
1240 printf("Terminate iteration at state %u\n", j);
1241 #endif
1242 while (j < analysis->states.size) {
1243 analysis_state_set__push(
1244 &analysis->next_states,
1245 &analysis->state_pool,
1246 analysis->states.contents[j]
1247 );
1248 j++;
1249 }
1250 break;
1251 }
1252 }
1253
1254 const TSStateId parse_state = analysis_state__top(state)->parse_state;
1255 const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol;
1256 const TSFieldId parent_field_id = analysis_state__top(state)->field_id;
1257 const unsigned child_index = analysis_state__top(state)->child_index;
1258 const QueryStep * const step = &self->steps.contents[state->step_index];
1259
1260 unsigned subgraph_index, exists;
1261 array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1262 if (!exists) continue;
1263 const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index];
1264
1265 // Follow every possible path in the parse table, but only visit states that
1266 // are part of the subgraph for the current symbol.
1267 LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state);
1268 while (ts_lookahead_iterator__next(&lookahead_iterator)) {
1269 TSSymbol sym = lookahead_iterator.symbol;
1270
1271 AnalysisSubgraphNode successor = {
1272 .state = parse_state,
1273 .child_index = child_index,
1274 };
1275 if (lookahead_iterator.action_count) {
1276 const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1];
1277 if (action->type == TSParseActionTypeShift) {
1278 if (!action->shift.extra) {
1279 successor.state = action->shift.state;
1280 successor.child_index++;
1281 }
1282 } else {
1283 continue;
1284 }
1285 } else if (lookahead_iterator.next_state != 0) {
1286 successor.state = lookahead_iterator.next_state;
1287 successor.child_index++;
1288 } else {
1289 continue;
1290 }
1291
1292 unsigned node_index;
1293 array_search_sorted_with(
1294 &subgraph->nodes,
1295 analysis_subgraph_node__compare, &successor,
1296 &node_index, &exists
1297 );
1298 while (node_index < subgraph->nodes.size) {
1299 AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++];
1300 if (node->state != successor.state || node->child_index != successor.child_index) break;
1301
1302 // Use the subgraph to determine what alias and field will eventually be applied
1303 // to this child node.
1304 TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
1305 TSSymbol visible_symbol = alias
1306 ? alias
1307 : self->language->symbol_metadata[sym].visible
1308 ? self->language->public_symbol_map[sym]
1309 : 0;
1310 TSFieldId field_id = parent_field_id;
1311 if (!field_id) {
1312 const TSFieldMapEntry *field_map, *field_map_end;
1313 ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
1314 for (; field_map != field_map_end; field_map++) {
1315 if (!field_map->inherited && field_map->child_index == child_index) {
1316 field_id = field_map->field_id;
1317 break;
1318 }
1319 }
1320 }
1321
1322 // Create a new state that has advanced past this hypothetical subtree.
1323 AnalysisState next_state = *state;
1324 AnalysisStateEntry *next_state_top = analysis_state__top(&next_state);
1325 next_state_top->child_index = successor.child_index;
1326 next_state_top->parse_state = successor.state;
1327 if (node->done) next_state_top->done = true;
1328
1329 // Determine if this hypothetical child node would match the current step
1330 // of the query pattern.
1331 bool does_match = false;
1332 if (visible_symbol) {
1333 does_match = true;
1334 if (step->symbol == WILDCARD_SYMBOL) {
1335 if (
1336 step->is_named &&
1337 !self->language->symbol_metadata[visible_symbol].named
1338 ) does_match = false;
1339 } else if (step->symbol != visible_symbol) {
1340 does_match = false;
1341 }
1342 if (step->field && step->field != field_id) {
1343 does_match = false;
1344 }
1345 if (
1346 step->supertype_symbol &&
1347 !analysis_state__has_supertype(state, step->supertype_symbol)
1348 ) does_match = false;
1349 }
1350
1351 // If this child is hidden, then descend into it and walk through its children.
1352 // If the top entry of the stack is at the end of its rule, then that entry can
1353 // be replaced. Otherwise, push a new entry onto the stack.
1354 else if (sym >= self->language->token_count) {
1355 if (!next_state_top->done) {
1356 if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) {
1357 #ifdef DEBUG_ANALYZE_QUERY
1358 printf("Exceeded depth limit for state %u\n", j);
1359 #endif
1360
1361 analysis->did_abort = true;
1362 continue;
1363 }
1364
1365 next_state.depth++;
1366 next_state_top = analysis_state__top(&next_state);
1367 }
1368
1369 *next_state_top = (AnalysisStateEntry) {
1370 .parse_state = parse_state,
1371 .parent_symbol = sym,
1372 .child_index = 0,
1373 .field_id = field_id,
1374 .done = false,
1375 };
1376
1377 if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) {
1378 analysis_state_set__insert_sorted(
1379 &analysis->deeper_states,
1380 &analysis->state_pool,
1381 &next_state
1382 );
1383 continue;
1384 }
1385 }
1386
1387 // Pop from the stack when this state reached the end of its current syntax node.
1388 while (next_state.depth > 0 && next_state_top->done) {
1389 next_state.depth--;
1390 next_state_top = analysis_state__top(&next_state);
1391 }
1392
1393 // If this hypothetical child did match the current step of the query pattern,
1394 // then advance to the next step at the current depth. This involves skipping
1395 // over any descendant steps of the current child.
1396 const QueryStep *next_step = step;
1397 if (does_match) {
1398 for (;;) {
1399 next_state.step_index++;
1400 next_step = &self->steps.contents[next_state.step_index];
1401 if (
1402 next_step->depth == PATTERN_DONE_MARKER ||
1403 next_step->depth <= step->depth
1404 ) break;
1405 }
1406 } else if (successor.state == parse_state) {
1407 continue;
1408 }
1409
1410 for (;;) {
1411 // Skip pass-through states. Although these states have alternatives, they are only
1412 // used to implement repetitions, and query analysis does not need to process
1413 // repetitions in order to determine whether steps are possible and definite.
1414 if (next_step->is_pass_through) {
1415 next_state.step_index++;
1416 next_step++;
1417 continue;
1418 }
1419
1420 // If the pattern is finished or hypothetical parent node is complete, then
1421 // record that matching can terminate at this step of the pattern. Otherwise,
1422 // add this state to the list of states to process on the next iteration.
1423 if (!next_step->is_dead_end) {
1424 bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth;
1425 if (did_finish_pattern) {
1426 array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol);
1427 } else if (next_state.depth == 0) {
1428 array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index);
1429 } else {
1430 analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state);
1431 }
1432 }
1433
1434 // If the state has advanced to a step with an alternative step, then add another state
1435 // at that alternative step. This process is simpler than the process of actually matching a
1436 // pattern during query execution, because for the purposes of query analysis, there is no
1437 // need to process repetitions.
1438 if (
1439 does_match &&
1440 next_step->alternative_index != NONE &&
1441 next_step->alternative_index > next_state.step_index
1442 ) {
1443 next_state.step_index = next_step->alternative_index;
1444 next_step = &self->steps.contents[next_state.step_index];
1445 } else {
1446 break;
1447 }
1448 }
1449 }
1450 }
1451 }
1452
1453 AnalysisStateSet _states = analysis->states;
1454 analysis->states = analysis->next_states;
1455 analysis->next_states = _states;
1456 }
1457}
1458
1459static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
1460 Array(uint16_t) non_rooted_pattern_start_steps = array_new();
1461 for (unsigned i = 0; i < self->pattern_map.size; i++) {
1462 PatternEntry *pattern = &self->pattern_map.contents[i];
1463 if (!pattern->is_rooted) {
1464 QueryStep *step = &self->steps.contents[pattern->step_index];
1465 if (step->symbol != WILDCARD_SYMBOL) {
1466 array_push(&non_rooted_pattern_start_steps, i);
1467 }
1468 }
1469 }
1470
1471 // Walk forward through all of the steps in the query, computing some
1472 // basic information about each step. Mark all of the steps that contain
1473 // captures, and record the indices of all of the steps that have child steps.
1474 Array(uint32_t) parent_step_indices = array_new();
1475 for (unsigned i = 0; i < self->steps.size; i++) {
1476 QueryStep *step = &self->steps.contents[i];
1477 if (step->depth == PATTERN_DONE_MARKER) {
1478 step->parent_pattern_guaranteed = true;
1479 step->root_pattern_guaranteed = true;
1480 continue;
1481 }
1482
1483 bool has_children = false;
1484 bool is_wildcard = step->symbol == WILDCARD_SYMBOL;
1485 step->contains_captures = step->capture_ids[0] != NONE;
1486 for (unsigned j = i + 1; j < self->steps.size; j++) {
1487 QueryStep *next_step = &self->steps.contents[j];
1488 if (
1489 next_step->depth == PATTERN_DONE_MARKER ||
1490 next_step->depth <= step->depth
1491 ) break;
1492 if (next_step->capture_ids[0] != NONE) {
1493 step->contains_captures = true;
1494 }
1495 if (!is_wildcard) {
1496 next_step->root_pattern_guaranteed = true;
1497 next_step->parent_pattern_guaranteed = true;
1498 }
1499 has_children = true;
1500 }
1501
1502 if (has_children && !is_wildcard) {
1503 array_push(&parent_step_indices, i);
1504 }
1505 }
1506
1507 // For every parent symbol in the query, initialize an 'analysis subgraph'.
1508 // This subgraph lists all of the states in the parse table that are directly
1509 // involved in building subtrees for this symbol.
1510 //
1511 // In addition to the parent symbols in the query, construct subgraphs for all
1512 // of the hidden symbols in the grammar, because these might occur within
1513 // one of the parent nodes, such that their children appear to belong to the
1514 // parent.
1515 AnalysisSubgraphArray subgraphs = array_new();
1516 for (unsigned i = 0; i < parent_step_indices.size; i++) {
1517 uint32_t parent_step_index = parent_step_indices.contents[i];
1518 TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
1519 AnalysisSubgraph subgraph = { .symbol = parent_symbol };
1520 array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1521 }
1522 for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) {
1523 if (!ts_language_symbol_metadata(self->language, sym).visible) {
1524 AnalysisSubgraph subgraph = { .symbol = sym };
1525 array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1526 }
1527 }
1528
1529 // Scan the parse table to find the data needed to populate these subgraphs.
1530 // Collect three things during this scan:
1531 // 1) All of the parse states where one of these symbols can start.
1532 // 2) All of the parse states where one of these symbols can end, along
1533 // with information about the node that would be created.
1534 // 3) A list of predecessor states for each state.
1535 StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
1536 for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) {
1537 unsigned subgraph_index, exists;
1538 LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state);
1539 while (ts_lookahead_iterator__next(&lookahead_iterator)) {
1540 if (lookahead_iterator.action_count) {
1541 for (unsigned i = 0; i < lookahead_iterator.action_count; i++) {
1542 const TSParseAction *action = &lookahead_iterator.actions[i];
1543 if (action->type == TSParseActionTypeReduce) {
1544 const TSSymbol *aliases, *aliases_end;
1545 ts_language_aliases_for_symbol(
1546 self->language,
1547 action->reduce.symbol,
1548 &aliases,
1549 &aliases_end
1550 );
1551 for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1552 array_search_sorted_by(
1553 &subgraphs,
1554 .symbol,
1555 *symbol,
1556 &subgraph_index,
1557 &exists
1558 );
1559 if (exists) {
1560 AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1561 if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
1562 array_push(&subgraph->nodes, ((AnalysisSubgraphNode) {
1563 .state = state,
1564 .production_id = action->reduce.production_id,
1565 .child_index = action->reduce.child_count,
1566 .done = true,
1567 }));
1568 }
1569 }
1570 }
1571 } else if (action->type == TSParseActionTypeShift && !action->shift.extra) {
1572 TSStateId next_state = action->shift.state;
1573 state_predecessor_map_add(&predecessor_map, next_state, state);
1574 }
1575 }
1576 } else if (lookahead_iterator.next_state != 0) {
1577 if (lookahead_iterator.next_state != state) {
1578 state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
1579 }
1580 if (ts_language_state_is_primary(self->language, state)) {
1581 const TSSymbol *aliases, *aliases_end;
1582 ts_language_aliases_for_symbol(
1583 self->language,
1584 lookahead_iterator.symbol,
1585 &aliases,
1586 &aliases_end
1587 );
1588 for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1589 array_search_sorted_by(
1590 &subgraphs,
1591 .symbol,
1592 *symbol,
1593 &subgraph_index,
1594 &exists
1595 );
1596 if (exists) {
1597 AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1598 if (
1599 subgraph->start_states.size == 0 ||
1600 *array_back(&subgraph->start_states) != state
1601 )
1602 array_push(&subgraph->start_states, state);
1603 }
1604 }
1605 }
1606 }
1607 }
1608 }
1609
1610 // For each subgraph, compute the preceding states by walking backward
1611 // from the end states using the predecessor map.
1612 Array(AnalysisSubgraphNode) next_nodes = array_new();
1613 for (unsigned i = 0; i < subgraphs.size; i++) {
1614 AnalysisSubgraph *subgraph = &subgraphs.contents[i];
1615 if (subgraph->nodes.size == 0) {
1616 array_delete(&subgraph->start_states);
1617 array_erase(&subgraphs, i);
1618 i--;
1619 continue;
1620 }
1621 array_assign(&next_nodes, &subgraph->nodes);
1622 while (next_nodes.size > 0) {
1623 AnalysisSubgraphNode node = array_pop(&next_nodes);
1624 if (node.child_index > 1) {
1625 unsigned predecessor_count;
1626 const TSStateId *predecessors = state_predecessor_map_get(
1627 &predecessor_map,
1628 node.state,
1629 &predecessor_count
1630 );
1631 for (unsigned j = 0; j < predecessor_count; j++) {
1632 AnalysisSubgraphNode predecessor_node = {
1633 .state = predecessors[j],
1634 .child_index = node.child_index - 1,
1635 .production_id = node.production_id,
1636 .done = false,
1637 };
1638 unsigned index, exists;
1639 array_search_sorted_with(
1640 &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node,
1641 &index, &exists
1642 );
1643 if (!exists) {
1644 array_insert(&subgraph->nodes, index, predecessor_node);
1645 array_push(&next_nodes, predecessor_node);
1646 }
1647 }
1648 }
1649 }
1650 }
1651
1652 #ifdef DEBUG_ANALYZE_QUERY
1653 printf("\nSubgraphs:\n");
1654 for (unsigned i = 0; i < subgraphs.size; i++) {
1655 AnalysisSubgraph *subgraph = &subgraphs.contents[i];
1656 printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
1657 for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1658 printf(
1659 " {state: %u}\n",
1660 subgraph->start_states.contents[j]
1661 );
1662 }
1663 for (unsigned j = 0; j < subgraph->nodes.size; j++) {
1664 AnalysisSubgraphNode *node = &subgraph->nodes.contents[j];
1665 printf(
1666 " {state: %u, child_index: %u, production_id: %u, done: %d}\n",
1667 node->state, node->child_index, node->production_id, node->done
1668 );
1669 }
1670 printf("\n");
1671 }
1672 #endif
1673
1674 // For each non-terminal pattern, determine if the pattern can successfully match,
1675 // and identify all of the possible children within the pattern where matching could fail.
1676 bool all_patterns_are_valid = true;
1677 QueryAnalysis analysis = query_analysis__new();
1678 for (unsigned i = 0; i < parent_step_indices.size; i++) {
1679 uint16_t parent_step_index = parent_step_indices.contents[i];
1680 uint16_t parent_depth = self->steps.contents[parent_step_index].depth;
1681 TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
1682 if (parent_symbol == ts_builtin_sym_error) continue;
1683
1684 // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's
1685 // root symbol is a terminal, then return an error.
1686 unsigned subgraph_index, exists;
1687 array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1688 if (!exists) {
1689 unsigned first_child_step_index = parent_step_index + 1;
1690 uint32_t j, child_exists;
1691 array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists);
1692 assert(child_exists);
1693 *error_offset = self->step_offsets.contents[j].byte_offset;
1694 all_patterns_are_valid = false;
1695 break;
1696 }
1697
1698 // Initialize an analysis state at every parse state in the table where
1699 // this parent symbol can occur.
1700 AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1701 analysis_state_set__clear(&analysis.states, &analysis.state_pool);
1702 analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
1703 for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1704 TSStateId parse_state = subgraph->start_states.contents[j];
1705 analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
1706 .step_index = parent_step_index + 1,
1707 .stack = {
1708 [0] = {
1709 .parse_state = parse_state,
1710 .parent_symbol = parent_symbol,
1711 .child_index = 0,
1712 .field_id = 0,
1713 .done = false,
1714 },
1715 },
1716 .depth = 1,
1717 .root_symbol = parent_symbol,
1718 }));
1719 }
1720
1721 #ifdef DEBUG_ANALYZE_QUERY
1722 printf(
1723 "\nWalk states for %s:\n",
1724 ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol)
1725 );
1726 #endif
1727
1728 analysis.did_abort = false;
1729 ts_query__perform_analysis(self, &subgraphs, &analysis);
1730
1731 // If this pattern could not be fully analyzed, then every step should
1732 // be considered fallible.
1733 if (analysis.did_abort) {
1734 for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
1735 QueryStep *step = &self->steps.contents[j];
1736 if (
1737 step->depth <= parent_depth ||
1738 step->depth == PATTERN_DONE_MARKER
1739 ) break;
1740 if (!step->is_dead_end) {
1741 step->parent_pattern_guaranteed = false;
1742 step->root_pattern_guaranteed = false;
1743 }
1744 }
1745 continue;
1746 }
1747
1748 // If this pattern cannot match, store the pattern index so that it can be
1749 // returned to the caller.
1750 if (analysis.finished_parent_symbols.size == 0) {
1751 assert(analysis.final_step_indices.size > 0);
1752 uint16_t impossible_step_index = *array_back(&analysis.final_step_indices);
1753 uint32_t j, impossible_exists;
1754 array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists);
1755 if (j >= self->step_offsets.size) j = self->step_offsets.size - 1;
1756 *error_offset = self->step_offsets.contents[j].byte_offset;
1757 all_patterns_are_valid = false;
1758 break;
1759 }
1760
1761 // Mark as fallible any step where a match terminated.
1762 // Later, this property will be propagated to all of the step's predecessors.
1763 for (unsigned j = 0; j < analysis.final_step_indices.size; j++) {
1764 uint32_t final_step_index = analysis.final_step_indices.contents[j];
1765 QueryStep *step = &self->steps.contents[final_step_index];
1766 if (
1767 step->depth != PATTERN_DONE_MARKER &&
1768 step->depth > parent_depth &&
1769 !step->is_dead_end
1770 ) {
1771 step->parent_pattern_guaranteed = false;
1772 step->root_pattern_guaranteed = false;
1773 }
1774 }
1775 }
1776
1777 // Mark as indefinite any step with captures that are used in predicates.
1778 Array(uint16_t) predicate_capture_ids = array_new();
1779 for (unsigned i = 0; i < self->patterns.size; i++) {
1780 QueryPattern *pattern = &self->patterns.contents[i];
1781
1782 // Gather all of the captures that are used in predicates for this pattern.
1783 array_clear(&predicate_capture_ids);
1784 for (
1785 unsigned start = pattern->predicate_steps.offset,
1786 end = start + pattern->predicate_steps.length,
1787 j = start; j < end; j++
1788 ) {
1789 TSQueryPredicateStep *step = &self->predicate_steps.contents[j];
1790 if (step->type == TSQueryPredicateStepTypeCapture) {
1791 uint16_t value_id = step->value_id;
1792 array_insert_sorted_by(&predicate_capture_ids, , value_id);
1793 }
1794 }
1795
1796 // Find all of the steps that have these captures.
1797 for (
1798 unsigned start = pattern->steps.offset,
1799 end = start + pattern->steps.length,
1800 j = start; j < end; j++
1801 ) {
1802 QueryStep *step = &self->steps.contents[j];
1803 for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
1804 uint16_t capture_id = step->capture_ids[k];
1805 if (capture_id == NONE) break;
1806 unsigned index, exists;
1807 array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists);
1808 if (exists) {
1809 step->root_pattern_guaranteed = false;
1810 break;
1811 }
1812 }
1813 }
1814 }
1815
1816 // Propagate fallibility. If a pattern is fallible at a given step, then it is
1817 // fallible at all of its preceding steps.
1818 bool done = self->steps.size == 0;
1819 while (!done) {
1820 done = true;
1821 for (unsigned i = self->steps.size - 1; i > 0; i--) {
1822 QueryStep *step = &self->steps.contents[i];
1823 if (step->depth == PATTERN_DONE_MARKER) continue;
1824
1825 // Determine if this step is definite or has definite alternatives.
1826 bool parent_pattern_guaranteed = false;
1827 for (;;) {
1828 if (step->root_pattern_guaranteed) {
1829 parent_pattern_guaranteed = true;
1830 break;
1831 }
1832 if (step->alternative_index == NONE || step->alternative_index < i) {
1833 break;
1834 }
1835 step = &self->steps.contents[step->alternative_index];
1836 }
1837
1838 // If not, mark its predecessor as indefinite.
1839 if (!parent_pattern_guaranteed) {
1840 QueryStep *prev_step = &self->steps.contents[i - 1];
1841 if (
1842 !prev_step->is_dead_end &&
1843 prev_step->depth != PATTERN_DONE_MARKER &&
1844 prev_step->root_pattern_guaranteed
1845 ) {
1846 prev_step->root_pattern_guaranteed = false;
1847 done = false;
1848 }
1849 }
1850 }
1851 }
1852
1853 #ifdef DEBUG_ANALYZE_QUERY
1854 printf("Steps:\n");
1855 for (unsigned i = 0; i < self->steps.size; i++) {
1856 QueryStep *step = &self->steps.contents[i];
1857 if (step->depth == PATTERN_DONE_MARKER) {
1858 printf(" %u: DONE\n", i);
1859 } else {
1860 printf(
1861 " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n",
1862 i,
1863 (step->symbol == WILDCARD_SYMBOL)
1864 ? "ANY"
1865 : ts_language_symbol_name(self->language, step->symbol),
1866 (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
1867 step->depth,
1868 step->parent_pattern_guaranteed,
1869 step->root_pattern_guaranteed
1870 );
1871 }
1872 }
1873 #endif
1874
1875 // Determine which repetition symbols in this language have the possibility
1876 // of matching non-rooted patterns in this query. These repetition symbols
1877 // prevent certain optimizations with range restrictions.
1878 analysis.did_abort = false;
1879 for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) {
1880 uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i];
1881 PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index];
1882
1883 analysis_state_set__clear(&analysis.states, &analysis.state_pool);
1884 analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
1885 for (unsigned j = 0; j < subgraphs.size; j++) {
1886 AnalysisSubgraph *subgraph = &subgraphs.contents[j];
1887 TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol);
1888 if (metadata.visible || metadata.named) continue;
1889
1890 for (uint32_t k = 0; k < subgraph->start_states.size; k++) {
1891 TSStateId parse_state = subgraph->start_states.contents[k];
1892 analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
1893 .step_index = pattern_entry->step_index,
1894 .stack = {
1895 [0] = {
1896 .parse_state = parse_state,
1897 .parent_symbol = subgraph->symbol,
1898 .child_index = 0,
1899 .field_id = 0,
1900 .done = false,
1901 },
1902 },
1903 .root_symbol = subgraph->symbol,
1904 .depth = 1,
1905 }));
1906 }
1907 }
1908
1909 #ifdef DEBUG_ANALYZE_QUERY
1910 printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index);
1911 #endif
1912
1913 ts_query__perform_analysis(
1914 self,
1915 &subgraphs,
1916 &analysis
1917 );
1918
1919 if (analysis.finished_parent_symbols.size > 0) {
1920 self->patterns.contents[pattern_entry->pattern_index].is_non_local = true;
1921 }
1922
1923 for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) {
1924 TSSymbol symbol = analysis.finished_parent_symbols.contents[k];
1925 array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol);
1926 }
1927 }
1928
1929 #ifdef DEBUG_ANALYZE_QUERY
1930 if (self->repeat_symbols_with_rootless_patterns.size > 0) {
1931 printf("\nRepetition symbols with rootless patterns:\n");
1932 printf("aborted analysis: %d\n", analysis.did_abort);
1933 for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) {
1934 TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i];
1935 printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol));
1936 }
1937 printf("\n");
1938 }
1939 #endif
1940
1941 // Cleanup
1942 for (unsigned i = 0; i < subgraphs.size; i++) {
1943 array_delete(&subgraphs.contents[i].start_states);
1944 array_delete(&subgraphs.contents[i].nodes);
1945 }
1946 array_delete(&subgraphs);
1947 query_analysis__delete(&analysis);
1948 array_delete(&next_nodes);
1949 array_delete(&non_rooted_pattern_start_steps);
1950 array_delete(&parent_step_indices);
1951 array_delete(&predicate_capture_ids);
1952 state_predecessor_map_delete(&predecessor_map);
1953
1954 return all_patterns_are_valid;
1955}
1956
1957static void ts_query__add_negated_fields(
1958 TSQuery *self,
1959 uint16_t step_index,
1960 TSFieldId *field_ids,
1961 uint16_t field_count
1962) {
1963 QueryStep *step = &self->steps.contents[step_index];
1964
1965 // The negated field array stores a list of field lists, separated by zeros.
1966 // Try to find the start index of an existing list that matches this new list.
1967 bool failed_match = false;
1968 unsigned match_count = 0;
1969 unsigned start_i = 0;
1970 for (unsigned i = 0; i < self->negated_fields.size; i++) {
1971 TSFieldId existing_field_id = self->negated_fields.contents[i];
1972
1973 // At each zero value, terminate the match attempt. If we've exactly
1974 // matched the new field list, then reuse this index. Otherwise,
1975 // start over the matching process.
1976 if (existing_field_id == 0) {
1977 if (match_count == field_count) {
1978 step->negated_field_list_id = start_i;
1979 return;
1980 } else {
1981 start_i = i + 1;
1982 match_count = 0;
1983 failed_match = false;
1984 }
1985 }
1986
1987 // If the existing list matches our new list so far, then advance
1988 // to the next element of the new list.
1989 else if (
1990 match_count < field_count &&
1991 existing_field_id == field_ids[match_count] &&
1992 !failed_match
1993 ) {
1994 match_count++;
1995 }
1996
1997 // Otherwise, this existing list has failed to match.
1998 else {
1999 match_count = 0;
2000 failed_match = true;
2001 }
2002 }
2003
2004 step->negated_field_list_id = self->negated_fields.size;
2005 array_extend(&self->negated_fields, field_count, field_ids);
2006 array_push(&self->negated_fields, 0);
2007}
2008
2009static TSQueryError ts_query__parse_string_literal(
2010 TSQuery *self,
2011 Stream *stream
2012) {
2013 const char *string_start = stream->input;
2014 if (stream->next != '"') return TSQueryErrorSyntax;
2015 stream_advance(stream);
2016 const char *prev_position = stream->input;
2017
2018 bool is_escaped = false;
2019 array_clear(&self->string_buffer);
2020 for (;;) {
2021 if (is_escaped) {
2022 is_escaped = false;
2023 switch (stream->next) {
2024 case 'n':
2025 array_push(&self->string_buffer, '\n');
2026 break;
2027 case 'r':
2028 array_push(&self->string_buffer, '\r');
2029 break;
2030 case 't':
2031 array_push(&self->string_buffer, '\t');
2032 break;
2033 case '0':
2034 array_push(&self->string_buffer, '\0');
2035 break;
2036 default:
2037 array_extend(&self->string_buffer, stream->next_size, stream->input);
2038 break;
2039 }
2040 prev_position = stream->input + stream->next_size;
2041 } else {
2042 if (stream->next == '\\') {
2043 array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
2044 prev_position = stream->input + 1;
2045 is_escaped = true;
2046 } else if (stream->next == '"') {
2047 array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
2048 stream_advance(stream);
2049 return TSQueryErrorNone;
2050 } else if (stream->next == '\n') {
2051 stream_reset(stream, string_start);
2052 return TSQueryErrorSyntax;
2053 }
2054 }
2055 if (!stream_advance(stream)) {
2056 stream_reset(stream, string_start);
2057 return TSQueryErrorSyntax;
2058 }
2059 }
2060}
2061
2062// Parse a single predicate associated with a pattern, adding it to the
2063// query's internal `predicate_steps` array. Predicates are arbitrary
2064// S-expressions associated with a pattern which are meant to be handled at
2065// a higher level of abstraction, such as the Rust/JavaScript bindings. They
2066// can contain '@'-prefixed capture names, double-quoted strings, and bare
2067// symbols, which also represent strings.
2068static TSQueryError ts_query__parse_predicate(
2069 TSQuery *self,
2070 Stream *stream
2071) {
2072 if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2073 const char *predicate_name = stream->input;
2074 stream_scan_identifier(stream);
2075 uint32_t length = (uint32_t)(stream->input - predicate_name);
2076 uint16_t id = symbol_table_insert_name(
2077 &self->predicate_values,
2078 predicate_name,
2079 length
2080 );
2081 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2082 .type = TSQueryPredicateStepTypeString,
2083 .value_id = id,
2084 }));
2085 stream_skip_whitespace(stream);
2086
2087 for (;;) {
2088 if (stream->next == ')') {
2089 stream_advance(stream);
2090 stream_skip_whitespace(stream);
2091 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2092 .type = TSQueryPredicateStepTypeDone,
2093 .value_id = 0,
2094 }));
2095 break;
2096 }
2097
2098 // Parse an '@'-prefixed capture name
2099 else if (stream->next == '@') {
2100 stream_advance(stream);
2101
2102 // Parse the capture name
2103 if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2104 const char *capture_name = stream->input;
2105 stream_scan_identifier(stream);
2106 uint32_t capture_length = (uint32_t)(stream->input - capture_name);
2107
2108 // Add the capture id to the first step of the pattern
2109 int capture_id = symbol_table_id_for_name(
2110 &self->captures,
2111 capture_name,
2112 capture_length
2113 );
2114 if (capture_id == -1) {
2115 stream_reset(stream, capture_name);
2116 return TSQueryErrorCapture;
2117 }
2118
2119 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2120 .type = TSQueryPredicateStepTypeCapture,
2121 .value_id = capture_id,
2122 }));
2123 }
2124
2125 // Parse a string literal
2126 else if (stream->next == '"') {
2127 TSQueryError e = ts_query__parse_string_literal(self, stream);
2128 if (e) return e;
2129 uint16_t query_id = symbol_table_insert_name(
2130 &self->predicate_values,
2131 self->string_buffer.contents,
2132 self->string_buffer.size
2133 );
2134 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2135 .type = TSQueryPredicateStepTypeString,
2136 .value_id = query_id,
2137 }));
2138 }
2139
2140 // Parse a bare symbol
2141 else if (stream_is_ident_start(stream)) {
2142 const char *symbol_start = stream->input;
2143 stream_scan_identifier(stream);
2144 uint32_t symbol_length = (uint32_t)(stream->input - symbol_start);
2145 uint16_t query_id = symbol_table_insert_name(
2146 &self->predicate_values,
2147 symbol_start,
2148 symbol_length
2149 );
2150 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2151 .type = TSQueryPredicateStepTypeString,
2152 .value_id = query_id,
2153 }));
2154 }
2155
2156 else {
2157 return TSQueryErrorSyntax;
2158 }
2159
2160 stream_skip_whitespace(stream);
2161 }
2162
2163 return 0;
2164}
2165
2166// Read one S-expression pattern from the stream, and incorporate it into
2167// the query's internal state machine representation. For nested patterns,
2168// this function calls itself recursively.
2169//
2170// The caller is responsible for passing in a dedicated CaptureQuantifiers.
2171// These should not be shared between different calls to ts_query__parse_pattern!
2172static TSQueryError ts_query__parse_pattern(
2173 TSQuery *self,
2174 Stream *stream,
2175 uint32_t depth,
2176 bool is_immediate,
2177 CaptureQuantifiers *capture_quantifiers
2178) {
2179 if (stream->next == 0) return TSQueryErrorSyntax;
2180 if (stream->next == ')' || stream->next == ']') return PARENT_DONE;
2181
2182 const uint32_t starting_step_index = self->steps.size;
2183
2184 // Store the byte offset of each step in the query.
2185 if (
2186 self->step_offsets.size == 0 ||
2187 array_back(&self->step_offsets)->step_index != starting_step_index
2188 ) {
2189 array_push(&self->step_offsets, ((StepOffset) {
2190 .step_index = starting_step_index,
2191 .byte_offset = stream_offset(stream),
2192 }));
2193 }
2194
2195 // An open bracket is the start of an alternation.
2196 if (stream->next == '[') {
2197 stream_advance(stream);
2198 stream_skip_whitespace(stream);
2199
2200 // Parse each branch, and add a placeholder step in between the branches.
2201 Array(uint32_t) branch_step_indices = array_new();
2202 CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new();
2203 for (;;) {
2204 uint32_t start_index = self->steps.size;
2205 TSQueryError e = ts_query__parse_pattern(
2206 self,
2207 stream,
2208 depth,
2209 is_immediate,
2210 &branch_capture_quantifiers
2211 );
2212
2213 if (e == PARENT_DONE) {
2214 if (stream->next == ']' && branch_step_indices.size > 0) {
2215 stream_advance(stream);
2216 break;
2217 }
2218 e = TSQueryErrorSyntax;
2219 }
2220 if (e) {
2221 capture_quantifiers_delete(&branch_capture_quantifiers);
2222 array_delete(&branch_step_indices);
2223 return e;
2224 }
2225
2226 if (start_index == starting_step_index) {
2227 capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers);
2228 } else {
2229 capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers);
2230 }
2231
2232 array_push(&branch_step_indices, start_index);
2233 array_push(&self->steps, query_step__new(0, depth, false));
2234 capture_quantifiers_clear(&branch_capture_quantifiers);
2235 }
2236 (void)array_pop(&self->steps);
2237
2238 // For all of the branches except for the last one, add the subsequent branch as an
2239 // alternative, and link the end of the branch to the current end of the steps.
2240 for (unsigned i = 0; i < branch_step_indices.size - 1; i++) {
2241 uint32_t step_index = branch_step_indices.contents[i];
2242 uint32_t next_step_index = branch_step_indices.contents[i + 1];
2243 QueryStep *start_step = &self->steps.contents[step_index];
2244 QueryStep *end_step = &self->steps.contents[next_step_index - 1];
2245 start_step->alternative_index = next_step_index;
2246 end_step->alternative_index = self->steps.size;
2247 end_step->is_dead_end = true;
2248 }
2249
2250 capture_quantifiers_delete(&branch_capture_quantifiers);
2251 array_delete(&branch_step_indices);
2252 }
2253
2254 // An open parenthesis can be the start of three possible constructs:
2255 // * A grouped sequence
2256 // * A predicate
2257 // * A named node
2258 else if (stream->next == '(') {
2259 stream_advance(stream);
2260 stream_skip_whitespace(stream);
2261
2262 // If this parenthesis is followed by a node, then it represents a grouped sequence.
2263 if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
2264 bool child_is_immediate = is_immediate;
2265 CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2266 for (;;) {
2267 if (stream->next == '.') {
2268 child_is_immediate = true;
2269 stream_advance(stream);
2270 stream_skip_whitespace(stream);
2271 }
2272 TSQueryError e = ts_query__parse_pattern(
2273 self,
2274 stream,
2275 depth,
2276 child_is_immediate,
2277 &child_capture_quantifiers
2278 );
2279 if (e == PARENT_DONE) {
2280 if (stream->next == ')') {
2281 stream_advance(stream);
2282 break;
2283 }
2284 e = TSQueryErrorSyntax;
2285 }
2286 if (e) {
2287 capture_quantifiers_delete(&child_capture_quantifiers);
2288 return e;
2289 }
2290
2291 capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2292 capture_quantifiers_clear(&child_capture_quantifiers);
2293 child_is_immediate = false;
2294 }
2295
2296 capture_quantifiers_delete(&child_capture_quantifiers);
2297 }
2298
2299 // A dot/pound character indicates the start of a predicate.
2300 else if (stream->next == '.' || stream->next == '#') {
2301 stream_advance(stream);
2302 return ts_query__parse_predicate(self, stream);
2303 }
2304
2305 // Otherwise, this parenthesis is the start of a named node.
2306 else {
2307 TSSymbol symbol;
2308
2309 // Parse a normal node name
2310 if (stream_is_ident_start(stream)) {
2311 const char *node_name = stream->input;
2312 stream_scan_identifier(stream);
2313 uint32_t length = (uint32_t)(stream->input - node_name);
2314
2315 // Parse the wildcard symbol
2316 if (length == 1 && node_name[0] == '_') {
2317 symbol = WILDCARD_SYMBOL;
2318 }
2319
2320 else {
2321 symbol = ts_language_symbol_for_name(
2322 self->language,
2323 node_name,
2324 length,
2325 true
2326 );
2327 if (!symbol) {
2328 stream_reset(stream, node_name);
2329 return TSQueryErrorNodeType;
2330 }
2331 }
2332 } else {
2333 return TSQueryErrorSyntax;
2334 }
2335
2336 // Add a step for the node.
2337 array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2338 QueryStep *step = array_back(&self->steps);
2339 if (ts_language_symbol_metadata(self->language, symbol).supertype) {
2340 step->supertype_symbol = step->symbol;
2341 step->symbol = WILDCARD_SYMBOL;
2342 }
2343 if (symbol == WILDCARD_SYMBOL) {
2344 step->is_named = true;
2345 }
2346
2347 stream_skip_whitespace(stream);
2348
2349 if (stream->next == '/') {
2350 stream_advance(stream);
2351 if (!stream_is_ident_start(stream)) {
2352 return TSQueryErrorSyntax;
2353 }
2354
2355 const char *node_name = stream->input;
2356 stream_scan_identifier(stream);
2357 uint32_t length = (uint32_t)(stream->input - node_name);
2358
2359 step->symbol = ts_language_symbol_for_name(
2360 self->language,
2361 node_name,
2362 length,
2363 true
2364 );
2365 if (!step->symbol) {
2366 stream_reset(stream, node_name);
2367 return TSQueryErrorNodeType;
2368 }
2369
2370 stream_skip_whitespace(stream);
2371 }
2372
2373 // Parse the child patterns
2374 bool child_is_immediate = false;
2375 uint16_t last_child_step_index = 0;
2376 uint16_t negated_field_count = 0;
2377 TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT];
2378 CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2379 for (;;) {
2380 // Parse a negated field assertion
2381 if (stream->next == '!') {
2382 stream_advance(stream);
2383 stream_skip_whitespace(stream);
2384 if (!stream_is_ident_start(stream)) {
2385 capture_quantifiers_delete(&child_capture_quantifiers);
2386 return TSQueryErrorSyntax;
2387 }
2388 const char *field_name = stream->input;
2389 stream_scan_identifier(stream);
2390 uint32_t length = (uint32_t)(stream->input - field_name);
2391 stream_skip_whitespace(stream);
2392
2393 TSFieldId field_id = ts_language_field_id_for_name(
2394 self->language,
2395 field_name,
2396 length
2397 );
2398 if (!field_id) {
2399 stream->input = field_name;
2400 capture_quantifiers_delete(&child_capture_quantifiers);
2401 return TSQueryErrorField;
2402 }
2403
2404 // Keep the field ids sorted.
2405 if (negated_field_count < MAX_NEGATED_FIELD_COUNT) {
2406 negated_field_ids[negated_field_count] = field_id;
2407 negated_field_count++;
2408 }
2409
2410 continue;
2411 }
2412
2413 // Parse a sibling anchor
2414 if (stream->next == '.') {
2415 child_is_immediate = true;
2416 stream_advance(stream);
2417 stream_skip_whitespace(stream);
2418 }
2419
2420 uint16_t step_index = self->steps.size;
2421 TSQueryError e = ts_query__parse_pattern(
2422 self,
2423 stream,
2424 depth + 1,
2425 child_is_immediate,
2426 &child_capture_quantifiers
2427 );
2428 if (e == PARENT_DONE) {
2429 if (stream->next == ')') {
2430 if (child_is_immediate) {
2431 if (last_child_step_index == 0) {
2432 capture_quantifiers_delete(&child_capture_quantifiers);
2433 return TSQueryErrorSyntax;
2434 }
2435 self->steps.contents[last_child_step_index].is_last_child = true;
2436 }
2437
2438 if (negated_field_count) {
2439 ts_query__add_negated_fields(
2440 self,
2441 starting_step_index,
2442 negated_field_ids,
2443 negated_field_count
2444 );
2445 }
2446
2447 stream_advance(stream);
2448 break;
2449 }
2450 e = TSQueryErrorSyntax;
2451 }
2452 if (e) {
2453 capture_quantifiers_delete(&child_capture_quantifiers);
2454 return e;
2455 }
2456
2457 capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2458
2459 last_child_step_index = step_index;
2460 child_is_immediate = false;
2461 capture_quantifiers_clear(&child_capture_quantifiers);
2462 }
2463 capture_quantifiers_delete(&child_capture_quantifiers);
2464 }
2465 }
2466
2467 // Parse a wildcard pattern
2468 else if (stream->next == '_') {
2469 stream_advance(stream);
2470 stream_skip_whitespace(stream);
2471
2472 // Add a step that matches any kind of node
2473 array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate));
2474 }
2475
2476 // Parse a double-quoted anonymous leaf node expression
2477 else if (stream->next == '"') {
2478 const char *string_start = stream->input;
2479 TSQueryError e = ts_query__parse_string_literal(self, stream);
2480 if (e) return e;
2481
2482 // Add a step for the node
2483 TSSymbol symbol = ts_language_symbol_for_name(
2484 self->language,
2485 self->string_buffer.contents,
2486 self->string_buffer.size,
2487 false
2488 );
2489 if (!symbol) {
2490 stream_reset(stream, string_start + 1);
2491 return TSQueryErrorNodeType;
2492 }
2493 array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2494 }
2495
2496 // Parse a field-prefixed pattern
2497 else if (stream_is_ident_start(stream)) {
2498 // Parse the field name
2499 const char *field_name = stream->input;
2500 stream_scan_identifier(stream);
2501 uint32_t length = (uint32_t)(stream->input - field_name);
2502 stream_skip_whitespace(stream);
2503
2504 if (stream->next != ':') {
2505 stream_reset(stream, field_name);
2506 return TSQueryErrorSyntax;
2507 }
2508 stream_advance(stream);
2509 stream_skip_whitespace(stream);
2510
2511 // Parse the pattern
2512 CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new();
2513 TSQueryError e = ts_query__parse_pattern(
2514 self,
2515 stream,
2516 depth,
2517 is_immediate,
2518 &field_capture_quantifiers
2519 );
2520 if (e) {
2521 capture_quantifiers_delete(&field_capture_quantifiers);
2522 if (e == PARENT_DONE) e = TSQueryErrorSyntax;
2523 return e;
2524 }
2525
2526 // Add the field name to the first step of the pattern
2527 TSFieldId field_id = ts_language_field_id_for_name(
2528 self->language,
2529 field_name,
2530 length
2531 );
2532 if (!field_id) {
2533 stream->input = field_name;
2534 return TSQueryErrorField;
2535 }
2536
2537 uint32_t step_index = starting_step_index;
2538 QueryStep *step = &self->steps.contents[step_index];
2539 for (;;) {
2540 step->field = field_id;
2541 if (
2542 step->alternative_index != NONE &&
2543 step->alternative_index > step_index &&
2544 step->alternative_index < self->steps.size
2545 ) {
2546 step_index = step->alternative_index;
2547 step = &self->steps.contents[step_index];
2548 } else {
2549 break;
2550 }
2551 }
2552
2553 capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers);
2554 capture_quantifiers_delete(&field_capture_quantifiers);
2555 }
2556
2557 else {
2558 return TSQueryErrorSyntax;
2559 }
2560
2561 stream_skip_whitespace(stream);
2562
2563 // Parse suffixes modifiers for this pattern
2564 TSQuantifier quantifier = TSQuantifierOne;
2565 for (;;) {
2566 // Parse the one-or-more operator.
2567 if (stream->next == '+') {
2568 quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier);
2569
2570 stream_advance(stream);
2571 stream_skip_whitespace(stream);
2572
2573 QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2574 repeat_step.alternative_index = starting_step_index;
2575 repeat_step.is_pass_through = true;
2576 repeat_step.alternative_is_immediate = true;
2577 array_push(&self->steps, repeat_step);
2578 }
2579
2580 // Parse the zero-or-more repetition operator.
2581 else if (stream->next == '*') {
2582 quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier);
2583
2584 stream_advance(stream);
2585 stream_skip_whitespace(stream);
2586
2587 QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2588 repeat_step.alternative_index = starting_step_index;
2589 repeat_step.is_pass_through = true;
2590 repeat_step.alternative_is_immediate = true;
2591 array_push(&self->steps, repeat_step);
2592
2593 // Stop when `step->alternative_index` is `NONE` or it points to
2594 // `repeat_step` or beyond. Note that having just been pushed,
2595 // `repeat_step` occupies slot `self->steps.size - 1`.
2596 QueryStep *step = &self->steps.contents[starting_step_index];
2597 while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) {
2598 step = &self->steps.contents[step->alternative_index];
2599 }
2600 step->alternative_index = self->steps.size;
2601 }
2602
2603 // Parse the optional operator.
2604 else if (stream->next == '?') {
2605 quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier);
2606
2607 stream_advance(stream);
2608 stream_skip_whitespace(stream);
2609
2610 QueryStep *step = &self->steps.contents[starting_step_index];
2611 while (step->alternative_index != NONE && step->alternative_index < self->steps.size) {
2612 step = &self->steps.contents[step->alternative_index];
2613 }
2614 step->alternative_index = self->steps.size;
2615 }
2616
2617 // Parse an '@'-prefixed capture pattern
2618 else if (stream->next == '@') {
2619 stream_advance(stream);
2620 if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2621 const char *capture_name = stream->input;
2622 stream_scan_identifier(stream);
2623 uint32_t length = (uint32_t)(stream->input - capture_name);
2624 stream_skip_whitespace(stream);
2625
2626 // Add the capture id to the first step of the pattern
2627 uint16_t capture_id = symbol_table_insert_name(
2628 &self->captures,
2629 capture_name,
2630 length
2631 );
2632
2633 // Add the capture quantifier
2634 capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne);
2635
2636 uint32_t step_index = starting_step_index;
2637 for (;;) {
2638 QueryStep *step = &self->steps.contents[step_index];
2639 query_step__add_capture(step, capture_id);
2640 if (
2641 step->alternative_index != NONE &&
2642 step->alternative_index > step_index &&
2643 step->alternative_index < self->steps.size
2644 ) {
2645 step_index = step->alternative_index;
2646 } else {
2647 break;
2648 }
2649 }
2650 }
2651
2652 // No more suffix modifiers
2653 else {
2654 break;
2655 }
2656 }
2657
2658 capture_quantifiers_mul(capture_quantifiers, quantifier);
2659
2660 return 0;
2661}
2662
2663TSQuery *ts_query_new(
2664 const TSLanguage *language,
2665 const char *source,
2666 uint32_t source_len,
2667 uint32_t *error_offset,
2668 TSQueryError *error_type
2669) {
2670 if (
2671 !language ||
2672 language->version > TREE_SITTER_LANGUAGE_VERSION ||
2673 language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
2674 ) {
2675 *error_type = TSQueryErrorLanguage;
2676 return NULL;
2677 }
2678
2679 TSQuery *self = ts_malloc(sizeof(TSQuery));
2680 *self = (TSQuery) {
2681 .steps = array_new(),
2682 .pattern_map = array_new(),
2683 .captures = symbol_table_new(),
2684 .capture_quantifiers = array_new(),
2685 .predicate_values = symbol_table_new(),
2686 .predicate_steps = array_new(),
2687 .patterns = array_new(),
2688 .step_offsets = array_new(),
2689 .string_buffer = array_new(),
2690 .negated_fields = array_new(),
2691 .repeat_symbols_with_rootless_patterns = array_new(),
2692 .wildcard_root_pattern_count = 0,
2693 .language = ts_language_copy(language),
2694 };
2695
2696 array_push(&self->negated_fields, 0);
2697
2698 // Parse all of the S-expressions in the given string.
2699 Stream stream = stream_new(source, source_len);
2700 stream_skip_whitespace(&stream);
2701 while (stream.input < stream.end) {
2702 uint32_t pattern_index = self->patterns.size;
2703 uint32_t start_step_index = self->steps.size;
2704 uint32_t start_predicate_step_index = self->predicate_steps.size;
2705 array_push(&self->patterns, ((QueryPattern) {
2706 .steps = (Slice) {.offset = start_step_index},
2707 .predicate_steps = (Slice) {.offset = start_predicate_step_index},
2708 .start_byte = stream_offset(&stream),
2709 .is_non_local = false,
2710 }));
2711 CaptureQuantifiers capture_quantifiers = capture_quantifiers_new();
2712 *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers);
2713 array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
2714
2715 QueryPattern *pattern = array_back(&self->patterns);
2716 pattern->steps.length = self->steps.size - start_step_index;
2717 pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index;
2718
2719 // If any pattern could not be parsed, then report the error information
2720 // and terminate.
2721 if (*error_type) {
2722 if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax;
2723 *error_offset = stream_offset(&stream);
2724 capture_quantifiers_delete(&capture_quantifiers);
2725 ts_query_delete(self);
2726 return NULL;
2727 }
2728
2729 // Maintain a list of capture quantifiers for each pattern
2730 array_push(&self->capture_quantifiers, capture_quantifiers);
2731
2732 // Maintain a map that can look up patterns for a given root symbol.
2733 uint16_t wildcard_root_alternative_index = NONE;
2734 for (;;) {
2735 QueryStep *step = &self->steps.contents[start_step_index];
2736
2737 // If a pattern has a wildcard at its root, but it has a non-wildcard child,
2738 // then optimize the matching process by skipping matching the wildcard.
2739 // Later, during the matching process, the query cursor will check that
2740 // there is a parent node, and capture it if necessary.
2741 if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) {
2742 QueryStep *second_step = &self->steps.contents[start_step_index + 1];
2743 if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) {
2744 wildcard_root_alternative_index = step->alternative_index;
2745 start_step_index += 1;
2746 step = second_step;
2747 }
2748 }
2749
2750 // Determine whether the pattern has a single root node. This affects
2751 // decisions about whether or not to start matching the pattern when
2752 // a query cursor has a range restriction or when immediately within an
2753 // error node.
2754 uint32_t start_depth = step->depth;
2755 bool is_rooted = start_depth == 0;
2756 for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) {
2757 QueryStep *child_step = &self->steps.contents[step_index];
2758 if (child_step->is_dead_end) break;
2759 if (child_step->depth == start_depth) {
2760 is_rooted = false;
2761 break;
2762 }
2763 }
2764
2765 ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) {
2766 .step_index = start_step_index,
2767 .pattern_index = pattern_index,
2768 .is_rooted = is_rooted
2769 });
2770 if (step->symbol == WILDCARD_SYMBOL) {
2771 self->wildcard_root_pattern_count++;
2772 }
2773
2774 // If there are alternatives or options at the root of the pattern,
2775 // then add multiple entries to the pattern map.
2776 if (step->alternative_index != NONE) {
2777 start_step_index = step->alternative_index;
2778 } else if (wildcard_root_alternative_index != NONE) {
2779 start_step_index = wildcard_root_alternative_index;
2780 wildcard_root_alternative_index = NONE;
2781 } else {
2782 break;
2783 }
2784 }
2785 }
2786
2787 if (!ts_query__analyze_patterns(self, error_offset)) {
2788 *error_type = TSQueryErrorStructure;
2789 ts_query_delete(self);
2790 return NULL;
2791 }
2792
2793 array_delete(&self->string_buffer);
2794 return self;
2795}
2796
2797void ts_query_delete(TSQuery *self) {
2798 if (self) {
2799 array_delete(&self->steps);
2800 array_delete(&self->pattern_map);
2801 array_delete(&self->predicate_steps);
2802 array_delete(&self->patterns);
2803 array_delete(&self->step_offsets);
2804 array_delete(&self->string_buffer);
2805 array_delete(&self->negated_fields);
2806 array_delete(&self->repeat_symbols_with_rootless_patterns);
2807 ts_language_delete(self->language);
2808 symbol_table_delete(&self->captures);
2809 symbol_table_delete(&self->predicate_values);
2810 for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) {
2811 CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index);
2812 capture_quantifiers_delete(capture_quantifiers);
2813 }
2814 array_delete(&self->capture_quantifiers);
2815 ts_free(self);
2816 }
2817}
2818
2819uint32_t ts_query_pattern_count(const TSQuery *self) {
2820 return self->patterns.size;
2821}
2822
2823uint32_t ts_query_capture_count(const TSQuery *self) {
2824 return self->captures.slices.size;
2825}
2826
2827uint32_t ts_query_string_count(const TSQuery *self) {
2828 return self->predicate_values.slices.size;
2829}
2830
2831const char *ts_query_capture_name_for_id(
2832 const TSQuery *self,
2833 uint32_t index,
2834 uint32_t *length
2835) {
2836 return symbol_table_name_for_id(&self->captures, index, length);
2837}
2838
2839TSQuantifier ts_query_capture_quantifier_for_id(
2840 const TSQuery *self,
2841 uint32_t pattern_index,
2842 uint32_t capture_index
2843) {
2844 CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index);
2845 return capture_quantifier_for_id(capture_quantifiers, capture_index);
2846}
2847
2848const char *ts_query_string_value_for_id(
2849 const TSQuery *self,
2850 uint32_t index,
2851 uint32_t *length
2852) {
2853 return symbol_table_name_for_id(&self->predicate_values, index, length);
2854}
2855
2856const TSQueryPredicateStep *ts_query_predicates_for_pattern(
2857 const TSQuery *self,
2858 uint32_t pattern_index,
2859 uint32_t *step_count
2860) {
2861 Slice slice = self->patterns.contents[pattern_index].predicate_steps;
2862 *step_count = slice.length;
2863 if (self->predicate_steps.contents == NULL) {
2864 return NULL;
2865 }
2866 return &self->predicate_steps.contents[slice.offset];
2867}
2868
2869uint32_t ts_query_start_byte_for_pattern(
2870 const TSQuery *self,
2871 uint32_t pattern_index
2872) {
2873 return self->patterns.contents[pattern_index].start_byte;
2874}
2875
2876bool ts_query_is_pattern_rooted(
2877 const TSQuery *self,
2878 uint32_t pattern_index
2879) {
2880 for (unsigned i = 0; i < self->pattern_map.size; i++) {
2881 PatternEntry *entry = &self->pattern_map.contents[i];
2882 if (entry->pattern_index == pattern_index) {
2883 if (!entry->is_rooted) return false;
2884 }
2885 }
2886 return true;
2887}
2888
2889bool ts_query_is_pattern_non_local(
2890 const TSQuery *self,
2891 uint32_t pattern_index
2892) {
2893 if (pattern_index < self->patterns.size) {
2894 return self->patterns.contents[pattern_index].is_non_local;
2895 } else {
2896 return false;
2897 }
2898}
2899
2900bool ts_query_is_pattern_guaranteed_at_step(
2901 const TSQuery *self,
2902 uint32_t byte_offset
2903) {
2904 uint32_t step_index = UINT32_MAX;
2905 for (unsigned i = 0; i < self->step_offsets.size; i++) {
2906 StepOffset *step_offset = &self->step_offsets.contents[i];
2907 if (step_offset->byte_offset > byte_offset) break;
2908 step_index = step_offset->step_index;
2909 }
2910 if (step_index < self->steps.size) {
2911 return self->steps.contents[step_index].root_pattern_guaranteed;
2912 } else {
2913 return false;
2914 }
2915}
2916
2917bool ts_query__step_is_fallible(
2918 const TSQuery *self,
2919 uint16_t step_index
2920) {
2921 assert((uint32_t)step_index + 1 < self->steps.size);
2922 QueryStep *step = &self->steps.contents[step_index];
2923 QueryStep *next_step = &self->steps.contents[step_index + 1];
2924 return (
2925 next_step->depth != PATTERN_DONE_MARKER &&
2926 next_step->depth > step->depth &&
2927 !next_step->parent_pattern_guaranteed
2928 );
2929}
2930
2931void ts_query_disable_capture(
2932 TSQuery *self,
2933 const char *name,
2934 uint32_t length
2935) {
2936 // Remove capture information for any pattern step that previously
2937 // captured with the given name.
2938 int id = symbol_table_id_for_name(&self->captures, name, length);
2939 if (id != -1) {
2940 for (unsigned i = 0; i < self->steps.size; i++) {
2941 QueryStep *step = &self->steps.contents[i];
2942 query_step__remove_capture(step, id);
2943 }
2944 }
2945}
2946
2947void ts_query_disable_pattern(
2948 TSQuery *self,
2949 uint32_t pattern_index
2950) {
2951 // Remove the given pattern from the pattern map. Its steps will still
2952 // be in the `steps` array, but they will never be read.
2953 for (unsigned i = 0; i < self->pattern_map.size; i++) {
2954 PatternEntry *pattern = &self->pattern_map.contents[i];
2955 if (pattern->pattern_index == pattern_index) {
2956 array_erase(&self->pattern_map, i);
2957 i--;
2958 }
2959 }
2960}
2961
2962/***************
2963 * QueryCursor
2964 ***************/
2965
2966TSQueryCursor *ts_query_cursor_new(void) {
2967 TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
2968 *self = (TSQueryCursor) {
2969 .did_exceed_match_limit = false,
2970 .ascending = false,
2971 .halted = false,
2972 .states = array_new(),
2973 .finished_states = array_new(),
2974 .capture_list_pool = capture_list_pool_new(),
2975 .start_byte = 0,
2976 .end_byte = UINT32_MAX,
2977 .start_point = {0, 0},
2978 .end_point = POINT_MAX,
2979 .max_start_depth = UINT32_MAX,
2980 };
2981 array_reserve(&self->states, 8);
2982 array_reserve(&self->finished_states, 8);
2983 return self;
2984}
2985
2986void ts_query_cursor_delete(TSQueryCursor *self) {
2987 array_delete(&self->states);
2988 array_delete(&self->finished_states);
2989 ts_tree_cursor_delete(&self->cursor);
2990 capture_list_pool_delete(&self->capture_list_pool);
2991 ts_free(self);
2992}
2993
2994bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
2995 return self->did_exceed_match_limit;
2996}
2997
2998uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
2999 return self->capture_list_pool.max_capture_list_count;
3000}
3001
3002void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
3003 self->capture_list_pool.max_capture_list_count = limit;
3004}
3005
3006#ifdef DEBUG_EXECUTE_QUERY
3007#define LOG(...) fprintf(stderr, __VA_ARGS__)
3008#else
3009#define LOG(...)
3010#endif
3011
3012void ts_query_cursor_exec(
3013 TSQueryCursor *self,
3014 const TSQuery *query,
3015 TSNode node
3016) {
3017 if (query) {
3018 LOG("query steps:\n");
3019 for (unsigned i = 0; i < query->steps.size; i++) {
3020 QueryStep *step = &query->steps.contents[i];
3021 LOG(" %u: {", i);
3022 if (step->depth == PATTERN_DONE_MARKER) {
3023 LOG("DONE");
3024 } else if (step->is_dead_end) {
3025 LOG("dead_end");
3026 } else if (step->is_pass_through) {
3027 LOG("pass_through");
3028 } else if (step->symbol != WILDCARD_SYMBOL) {
3029 LOG("symbol: %s", query->language->symbol_names[step->symbol]);
3030 } else {
3031 LOG("symbol: *");
3032 }
3033 if (step->field) {
3034 LOG(", field: %s", query->language->field_names[step->field]);
3035 }
3036 if (step->alternative_index != NONE) {
3037 LOG(", alternative: %u", step->alternative_index);
3038 }
3039 LOG("},\n");
3040 }
3041 }
3042
3043 array_clear(&self->states);
3044 array_clear(&self->finished_states);
3045 ts_tree_cursor_reset(&self->cursor, node);
3046 capture_list_pool_reset(&self->capture_list_pool);
3047 self->on_visible_node = true;
3048 self->next_state_id = 0;
3049 self->depth = 0;
3050 self->ascending = false;
3051 self->halted = false;
3052 self->query = query;
3053 self->did_exceed_match_limit = false;
3054}
3055
3056void ts_query_cursor_set_byte_range(
3057 TSQueryCursor *self,
3058 uint32_t start_byte,
3059 uint32_t end_byte
3060) {
3061 if (end_byte == 0) {
3062 end_byte = UINT32_MAX;
3063 }
3064 self->start_byte = start_byte;
3065 self->end_byte = end_byte;
3066}
3067
3068void ts_query_cursor_set_point_range(
3069 TSQueryCursor *self,
3070 TSPoint start_point,
3071 TSPoint end_point
3072) {
3073 if (end_point.row == 0 && end_point.column == 0) {
3074 end_point = POINT_MAX;
3075 }
3076 self->start_point = start_point;
3077 self->end_point = end_point;
3078}
3079
3080// Search through all of the in-progress states, and find the captured
3081// node that occurs earliest in the document.
3082static bool ts_query_cursor__first_in_progress_capture(
3083 TSQueryCursor *self,
3084 uint32_t *state_index,
3085 uint32_t *byte_offset,
3086 uint32_t *pattern_index,
3087 bool *root_pattern_guaranteed
3088) {
3089 bool result = false;
3090 *state_index = UINT32_MAX;
3091 *byte_offset = UINT32_MAX;
3092 *pattern_index = UINT32_MAX;
3093 for (unsigned i = 0; i < self->states.size; i++) {
3094 QueryState *state = &self->states.contents[i];
3095 if (state->dead) continue;
3096
3097 const CaptureList *captures = capture_list_pool_get(
3098 &self->capture_list_pool,
3099 state->capture_list_id
3100 );
3101 if (state->consumed_capture_count >= captures->size) {
3102 continue;
3103 }
3104
3105 TSNode node = captures->contents[state->consumed_capture_count].node;
3106 if (
3107 ts_node_end_byte(node) <= self->start_byte ||
3108 point_lte(ts_node_end_point(node), self->start_point)
3109 ) {
3110 state->consumed_capture_count++;
3111 i--;
3112 continue;
3113 }
3114
3115 uint32_t node_start_byte = ts_node_start_byte(node);
3116 if (
3117 !result ||
3118 node_start_byte < *byte_offset ||
3119 (node_start_byte == *byte_offset && state->pattern_index < *pattern_index)
3120 ) {
3121 QueryStep *step = &self->query->steps.contents[state->step_index];
3122 if (root_pattern_guaranteed) {
3123 *root_pattern_guaranteed = step->root_pattern_guaranteed;
3124 } else if (step->root_pattern_guaranteed) {
3125 continue;
3126 }
3127
3128 result = true;
3129 *state_index = i;
3130 *byte_offset = node_start_byte;
3131 *pattern_index = state->pattern_index;
3132 }
3133 }
3134 return result;
3135}
3136
3137// Determine which node is first in a depth-first traversal
3138int ts_query_cursor__compare_nodes(TSNode left, TSNode right) {
3139 if (left.id != right.id) {
3140 uint32_t left_start = ts_node_start_byte(left);
3141 uint32_t right_start = ts_node_start_byte(right);
3142 if (left_start < right_start) return -1;
3143 if (left_start > right_start) return 1;
3144 uint32_t left_node_count = ts_node_end_byte(left);
3145 uint32_t right_node_count = ts_node_end_byte(right);
3146 if (left_node_count > right_node_count) return -1;
3147 if (left_node_count < right_node_count) return 1;
3148 }
3149 return 0;
3150}
3151
3152// Determine if either state contains a superset of the other state's captures.
3153void ts_query_cursor__compare_captures(
3154 TSQueryCursor *self,
3155 QueryState *left_state,
3156 QueryState *right_state,
3157 bool *left_contains_right,
3158 bool *right_contains_left
3159) {
3160 const CaptureList *left_captures = capture_list_pool_get(
3161 &self->capture_list_pool,
3162 left_state->capture_list_id
3163 );
3164 const CaptureList *right_captures = capture_list_pool_get(
3165 &self->capture_list_pool,
3166 right_state->capture_list_id
3167 );
3168 *left_contains_right = true;
3169 *right_contains_left = true;
3170 unsigned i = 0, j = 0;
3171 for (;;) {
3172 if (i < left_captures->size) {
3173 if (j < right_captures->size) {
3174 TSQueryCapture *left = &left_captures->contents[i];
3175 TSQueryCapture *right = &right_captures->contents[j];
3176 if (left->node.id == right->node.id && left->index == right->index) {
3177 i++;
3178 j++;
3179 } else {
3180 switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
3181 case -1:
3182 *right_contains_left = false;
3183 i++;
3184 break;
3185 case 1:
3186 *left_contains_right = false;
3187 j++;
3188 break;
3189 default:
3190 *right_contains_left = false;
3191 *left_contains_right = false;
3192 i++;
3193 j++;
3194 break;
3195 }
3196 }
3197 } else {
3198 *right_contains_left = false;
3199 break;
3200 }
3201 } else {
3202 if (j < right_captures->size) {
3203 *left_contains_right = false;
3204 }
3205 break;
3206 }
3207 }
3208}
3209
3210static void ts_query_cursor__add_state(
3211 TSQueryCursor *self,
3212 const PatternEntry *pattern
3213) {
3214 QueryStep *step = &self->query->steps.contents[pattern->step_index];
3215 uint32_t start_depth = self->depth - step->depth;
3216
3217 // Keep the states array in ascending order of start_depth and pattern_index,
3218 // so that it can be processed more efficiently elsewhere. Usually, there is
3219 // no work to do here because of two facts:
3220 // * States with lower start_depth are naturally added first due to the
3221 // order in which nodes are visited.
3222 // * Earlier patterns are naturally added first because of the ordering of the
3223 // pattern_map data structure that's used to initiate matches.
3224 //
3225 // This loop is only needed in cases where two conditions hold:
3226 // * A pattern consists of more than one sibling node, so that its states
3227 // remain in progress after exiting the node that started the match.
3228 // * The first node in the pattern matches against multiple nodes at the
3229 // same depth.
3230 //
3231 // An example of this is the pattern '((comment)* (function))'. If multiple
3232 // `comment` nodes appear in a row, then we may initiate a new state for this
3233 // pattern while another state for the same pattern is already in progress.
3234 // If there are multiple patterns like this in a query, then this loop will
3235 // need to execute in order to keep the states ordered by pattern_index.
3236 uint32_t index = self->states.size;
3237 while (index > 0) {
3238 QueryState *prev_state = &self->states.contents[index - 1];
3239 if (prev_state->start_depth < start_depth) break;
3240 if (prev_state->start_depth == start_depth) {
3241 // Avoid inserting an unnecessary duplicate state, which would be
3242 // immediately pruned by the longest-match criteria.
3243 if (
3244 prev_state->pattern_index == pattern->pattern_index &&
3245 prev_state->step_index == pattern->step_index
3246 ) return;
3247 if (prev_state->pattern_index <= pattern->pattern_index) break;
3248 }
3249 index--;
3250 }
3251
3252 LOG(
3253 " start state. pattern:%u, step:%u\n",
3254 pattern->pattern_index,
3255 pattern->step_index
3256 );
3257 array_insert(&self->states, index, ((QueryState) {
3258 .id = UINT32_MAX,
3259 .capture_list_id = NONE,
3260 .step_index = pattern->step_index,
3261 .pattern_index = pattern->pattern_index,
3262 .start_depth = start_depth,
3263 .consumed_capture_count = 0,
3264 .seeking_immediate_match = true,
3265 .has_in_progress_alternatives = false,
3266 .needs_parent = step->depth == 1,
3267 .dead = false,
3268 }));
3269}
3270
3271// Acquire a capture list for this state. If there are no capture lists left in the
3272// pool, this will steal the capture list from another existing state, and mark that
3273// other state as 'dead'.
3274static CaptureList *ts_query_cursor__prepare_to_capture(
3275 TSQueryCursor *self,
3276 QueryState *state,
3277 unsigned state_index_to_preserve
3278) {
3279 if (state->capture_list_id == NONE) {
3280 state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
3281
3282 // If there are no capture lists left in the pool, then terminate whichever
3283 // state has captured the earliest node in the document, and steal its
3284 // capture list.
3285 if (state->capture_list_id == NONE) {
3286 self->did_exceed_match_limit = true;
3287 uint32_t state_index, byte_offset, pattern_index;
3288 if (
3289 ts_query_cursor__first_in_progress_capture(
3290 self,
3291 &state_index,
3292 &byte_offset,
3293 &pattern_index,
3294 NULL
3295 ) &&
3296 state_index != state_index_to_preserve
3297 ) {
3298 LOG(
3299 " abandon state. index:%u, pattern:%u, offset:%u.\n",
3300 state_index, pattern_index, byte_offset
3301 );
3302 QueryState *other_state = &self->states.contents[state_index];
3303 state->capture_list_id = other_state->capture_list_id;
3304 other_state->capture_list_id = NONE;
3305 other_state->dead = true;
3306 CaptureList *list = capture_list_pool_get_mut(
3307 &self->capture_list_pool,
3308 state->capture_list_id
3309 );
3310 array_clear(list);
3311 return list;
3312 } else {
3313 LOG(" ran out of capture lists");
3314 return NULL;
3315 }
3316 }
3317 }
3318 return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
3319}
3320
3321static void ts_query_cursor__capture(
3322 TSQueryCursor *self,
3323 QueryState *state,
3324 QueryStep *step,
3325 TSNode node
3326) {
3327 if (state->dead) return;
3328 CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
3329 if (!capture_list) {
3330 state->dead = true;
3331 return;
3332 }
3333
3334 for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
3335 uint16_t capture_id = step->capture_ids[j];
3336 if (step->capture_ids[j] == NONE) break;
3337 array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
3338 LOG(
3339 " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n",
3340 ts_node_type(node),
3341 state->pattern_index,
3342 capture_id,
3343 capture_list->size
3344 );
3345 }
3346}
3347
3348// Duplicate the given state and insert the newly-created state immediately after
3349// the given state in the `states` array. Ensures that the given state reference is
3350// still valid, even if the states array is reallocated.
3351static QueryState *ts_query_cursor__copy_state(
3352 TSQueryCursor *self,
3353 QueryState **state_ref
3354) {
3355 const QueryState *state = *state_ref;
3356 uint32_t state_index = (uint32_t)(state - self->states.contents);
3357 QueryState copy = *state;
3358 copy.capture_list_id = NONE;
3359
3360 // If the state has captures, copy its capture list.
3361 if (state->capture_list_id != NONE) {
3362 CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, &copy, state_index);
3363 if (!new_captures) return NULL;
3364 const CaptureList *old_captures = capture_list_pool_get(
3365 &self->capture_list_pool,
3366 state->capture_list_id
3367 );
3368 array_push_all(new_captures, old_captures);
3369 }
3370
3371 array_insert(&self->states, state_index + 1, copy);
3372 *state_ref = &self->states.contents[state_index];
3373 return &self->states.contents[state_index + 1];
3374}
3375
3376static inline bool ts_query_cursor__should_descend(
3377 TSQueryCursor *self,
3378 bool node_intersects_range
3379) {
3380
3381 if (node_intersects_range && self->depth < self->max_start_depth) {
3382 return true;
3383 }
3384
3385 // If there are in-progress matches whose remaining steps occur
3386 // deeper in the tree, then descend.
3387 for (unsigned i = 0; i < self->states.size; i++) {
3388 QueryState *state = &self->states.contents[i];;
3389 QueryStep *next_step = &self->query->steps.contents[state->step_index];
3390 if (
3391 next_step->depth != PATTERN_DONE_MARKER &&
3392 state->start_depth + next_step->depth > self->depth
3393 ) {
3394 return true;
3395 }
3396 }
3397
3398 if (self->depth >= self->max_start_depth) {
3399 return false;
3400 }
3401
3402 // If the current node is hidden, then a non-rooted pattern might match
3403 // one if its roots inside of this node, and match another of its roots
3404 // as part of a sibling node, so we may need to descend.
3405 if (!self->on_visible_node) {
3406 // Descending into a repetition node outside of the range can be
3407 // expensive, because these nodes can have many visible children.
3408 // Avoid descending into repetition nodes unless we have already
3409 // determined that this query can match rootless patterns inside
3410 // of this type of repetition node.
3411 Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor);
3412 if (ts_subtree_is_repetition(subtree)) {
3413 bool exists;
3414 uint32_t index;
3415 array_search_sorted_by(
3416 &self->query->repeat_symbols_with_rootless_patterns,,
3417 ts_subtree_symbol(subtree),
3418 &index,
3419 &exists
3420 );
3421 return exists;
3422 }
3423
3424 return true;
3425 }
3426
3427 return false;
3428}
3429
3430// Walk the tree, processing patterns until at least one pattern finishes,
3431// If one or more patterns finish, return `true` and store their states in the
3432// `finished_states` array. Multiple patterns can finish on the same node. If
3433// there are no more matches, return `false`.
3434static inline bool ts_query_cursor__advance(
3435 TSQueryCursor *self,
3436 bool stop_on_definite_step
3437) {
3438 bool did_match = false;
3439 for (;;) {
3440 if (self->halted) {
3441 while (self->states.size > 0) {
3442 QueryState state = array_pop(&self->states);
3443 capture_list_pool_release(
3444 &self->capture_list_pool,
3445 state.capture_list_id
3446 );
3447 }
3448 }
3449
3450 if (did_match || self->halted) return did_match;
3451
3452 // Exit the current node.
3453 if (self->ascending) {
3454 if (self->on_visible_node) {
3455 LOG(
3456 "leave node. depth:%u, type:%s\n",
3457 self->depth,
3458 ts_node_type(ts_tree_cursor_current_node(&self->cursor))
3459 );
3460
3461 // After leaving a node, remove any states that cannot make further progress.
3462 uint32_t deleted_count = 0;
3463 for (unsigned i = 0, n = self->states.size; i < n; i++) {
3464 QueryState *state = &self->states.contents[i];
3465 QueryStep *step = &self->query->steps.contents[state->step_index];
3466
3467 // If a state completed its pattern inside of this node, but was deferred from finishing
3468 // in order to search for longer matches, mark it as finished.
3469 if (
3470 step->depth == PATTERN_DONE_MARKER &&
3471 (state->start_depth > self->depth || self->depth == 0)
3472 ) {
3473 LOG(" finish pattern %u\n", state->pattern_index);
3474 array_push(&self->finished_states, *state);
3475 did_match = true;
3476 deleted_count++;
3477 }
3478
3479 // If a state needed to match something within this node, then remove that state
3480 // as it has failed to match.
3481 else if (
3482 step->depth != PATTERN_DONE_MARKER &&
3483 (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth
3484 ) {
3485 LOG(
3486 " failed to match. pattern:%u, step:%u\n",
3487 state->pattern_index,
3488 state->step_index
3489 );
3490 capture_list_pool_release(
3491 &self->capture_list_pool,
3492 state->capture_list_id
3493 );
3494 deleted_count++;
3495 }
3496
3497 else if (deleted_count > 0) {
3498 self->states.contents[i - deleted_count] = *state;
3499 }
3500 }
3501 self->states.size -= deleted_count;
3502 }
3503
3504 // Leave this node by stepping to its next sibling or to its parent.
3505 switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) {
3506 case TreeCursorStepVisible:
3507 if (!self->on_visible_node) {
3508 self->depth++;
3509 self->on_visible_node = true;
3510 }
3511 self->ascending = false;
3512 break;
3513 case TreeCursorStepHidden:
3514 if (self->on_visible_node) {
3515 self->depth--;
3516 self->on_visible_node = false;
3517 }
3518 self->ascending = false;
3519 break;
3520 default:
3521 if (ts_tree_cursor_goto_parent(&self->cursor)) {
3522 self->depth--;
3523 } else {
3524 LOG("halt at root\n");
3525 self->halted = true;
3526 }
3527 }
3528 }
3529
3530 // Enter a new node.
3531 else {
3532 // Get the properties of the current node.
3533 TSNode node = ts_tree_cursor_current_node(&self->cursor);
3534 TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor);
3535 bool parent_precedes_range = !ts_node_is_null(parent_node) && (
3536 ts_node_end_byte(parent_node) <= self->start_byte ||
3537 point_lte(ts_node_end_point(parent_node), self->start_point)
3538 );
3539 bool parent_follows_range = !ts_node_is_null(parent_node) && (
3540 ts_node_start_byte(parent_node) >= self->end_byte ||
3541 point_gte(ts_node_start_point(parent_node), self->end_point)
3542 );
3543 bool node_precedes_range = parent_precedes_range || (
3544 ts_node_end_byte(node) <= self->start_byte ||
3545 point_lte(ts_node_end_point(node), self->start_point)
3546 );
3547 bool node_follows_range = parent_follows_range || (
3548 ts_node_start_byte(node) >= self->end_byte ||
3549 point_gte(ts_node_start_point(node), self->end_point)
3550 );
3551 bool parent_intersects_range = !parent_precedes_range && !parent_follows_range;
3552 bool node_intersects_range = !node_precedes_range && !node_follows_range;
3553
3554 if (self->on_visible_node) {
3555 TSSymbol symbol = ts_node_symbol(node);
3556 bool is_named = ts_node_is_named(node);
3557 bool has_later_siblings;
3558 bool has_later_named_siblings;
3559 bool can_have_later_siblings_with_this_field;
3560 TSFieldId field_id = 0;
3561 TSSymbol supertypes[8] = {0};
3562 unsigned supertype_count = 8;
3563 ts_tree_cursor_current_status(
3564 &self->cursor,
3565 &field_id,
3566 &has_later_siblings,
3567 &has_later_named_siblings,
3568 &can_have_later_siblings_with_this_field,
3569 supertypes,
3570 &supertype_count
3571 );
3572 LOG(
3573 "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
3574 self->depth,
3575 ts_node_type(node),
3576 ts_language_field_name_for_id(self->query->language, field_id),
3577 ts_node_start_point(node).row,
3578 self->states.size,
3579 self->finished_states.size
3580 );
3581
3582 bool node_is_error = symbol == ts_builtin_sym_error;
3583 bool parent_is_error =
3584 !ts_node_is_null(parent_node) &&
3585 ts_node_symbol(parent_node) == ts_builtin_sym_error;
3586
3587 // Add new states for any patterns whose root node is a wildcard.
3588 if (!node_is_error) {
3589 for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
3590 PatternEntry *pattern = &self->query->pattern_map.contents[i];
3591
3592 // If this node matches the first step of the pattern, then add a new
3593 // state at the start of this pattern.
3594 QueryStep *step = &self->query->steps.contents[pattern->step_index];
3595 uint32_t start_depth = self->depth - step->depth;
3596 if (
3597 (pattern->is_rooted ?
3598 node_intersects_range :
3599 (parent_intersects_range && !parent_is_error)) &&
3600 (!step->field || field_id == step->field) &&
3601 (!step->supertype_symbol || supertype_count > 0) &&
3602 (start_depth <= self->max_start_depth)
3603 ) {
3604 ts_query_cursor__add_state(self, pattern);
3605 }
3606 }
3607 }
3608
3609 // Add new states for any patterns whose root node matches this node.
3610 unsigned i;
3611 if (ts_query__pattern_map_search(self->query, symbol, &i)) {
3612 PatternEntry *pattern = &self->query->pattern_map.contents[i];
3613
3614 QueryStep *step = &self->query->steps.contents[pattern->step_index];
3615 uint32_t start_depth = self->depth - step->depth;
3616 do {
3617 // If this node matches the first step of the pattern, then add a new
3618 // state at the start of this pattern.
3619 if (
3620 (pattern->is_rooted ?
3621 node_intersects_range :
3622 (parent_intersects_range && !parent_is_error)) &&
3623 (!step->field || field_id == step->field) &&
3624 (start_depth <= self->max_start_depth)
3625 ) {
3626 ts_query_cursor__add_state(self, pattern);
3627 }
3628
3629 // Advance to the next pattern whose root node matches this node.
3630 i++;
3631 if (i == self->query->pattern_map.size) break;
3632 pattern = &self->query->pattern_map.contents[i];
3633 step = &self->query->steps.contents[pattern->step_index];
3634 } while (step->symbol == symbol);
3635 }
3636
3637 // Update all of the in-progress states with current node.
3638 for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) {
3639 QueryState *state = &self->states.contents[j];
3640 QueryStep *step = &self->query->steps.contents[state->step_index];
3641 state->has_in_progress_alternatives = false;
3642 copy_count = 0;
3643
3644 // Check that the node matches all of the criteria for the next
3645 // step of the pattern.
3646 if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
3647
3648 // Determine if this node matches this step of the pattern, and also
3649 // if this node can have later siblings that match this step of the
3650 // pattern.
3651 bool node_does_match = false;
3652 if (step->symbol == WILDCARD_SYMBOL) {
3653 node_does_match = !node_is_error && (is_named || !step->is_named);
3654 } else {
3655 node_does_match = symbol == step->symbol;
3656 }
3657 bool later_sibling_can_match = has_later_siblings;
3658 if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
3659 later_sibling_can_match = false;
3660 }
3661 if (step->is_last_child && has_later_named_siblings) {
3662 node_does_match = false;
3663 }
3664 if (step->supertype_symbol) {
3665 bool has_supertype = false;
3666 for (unsigned k = 0; k < supertype_count; k++) {
3667 if (supertypes[k] == step->supertype_symbol) {
3668 has_supertype = true;
3669 break;
3670 }
3671 }
3672 if (!has_supertype) node_does_match = false;
3673 }
3674 if (step->field) {
3675 if (step->field == field_id) {
3676 if (!can_have_later_siblings_with_this_field) {
3677 later_sibling_can_match = false;
3678 }
3679 } else {
3680 node_does_match = false;
3681 }
3682 }
3683
3684 if (step->negated_field_list_id) {
3685 TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id];
3686 for (;;) {
3687 TSFieldId negated_field_id = *negated_field_ids;
3688 if (negated_field_id) {
3689 negated_field_ids++;
3690 if (ts_node_child_by_field_id(node, negated_field_id).id) {
3691 node_does_match = false;
3692 break;
3693 }
3694 } else {
3695 break;
3696 }
3697 }
3698 }
3699
3700 // Remove states immediately if it is ever clear that they cannot match.
3701 if (!node_does_match) {
3702 if (!later_sibling_can_match) {
3703 LOG(
3704 " discard state. pattern:%u, step:%u\n",
3705 state->pattern_index,
3706 state->step_index
3707 );
3708 capture_list_pool_release(
3709 &self->capture_list_pool,
3710 state->capture_list_id
3711 );
3712 array_erase(&self->states, j);
3713 j--;
3714 }
3715 continue;
3716 }
3717
3718 // Some patterns can match their root node in multiple ways, capturing different
3719 // children. If this pattern step could match later children within the same
3720 // parent, then this query state cannot simply be updated in place. It must be
3721 // split into two states: one that matches this node, and one which skips over
3722 // this node, to preserve the possibility of matching later siblings.
3723 if (later_sibling_can_match && (
3724 step->contains_captures ||
3725 ts_query__step_is_fallible(self->query, state->step_index)
3726 )) {
3727 if (ts_query_cursor__copy_state(self, &state)) {
3728 LOG(
3729 " split state for capture. pattern:%u, step:%u\n",
3730 state->pattern_index,
3731 state->step_index
3732 );
3733 copy_count++;
3734 }
3735 }
3736
3737 // If this pattern started with a wildcard, such that the pattern map
3738 // actually points to the *second* step of the pattern, then check
3739 // that the node has a parent, and capture the parent node if necessary.
3740 if (state->needs_parent) {
3741 TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
3742 if (ts_node_is_null(parent)) {
3743 LOG(" missing parent node\n");
3744 state->dead = true;
3745 } else {
3746 state->needs_parent = false;
3747 QueryStep *skipped_wildcard_step = step;
3748 do {
3749 skipped_wildcard_step--;
3750 } while (
3751 skipped_wildcard_step->is_dead_end ||
3752 skipped_wildcard_step->is_pass_through ||
3753 skipped_wildcard_step->depth > 0
3754 );
3755 if (skipped_wildcard_step->capture_ids[0] != NONE) {
3756 LOG(" capture wildcard parent\n");
3757 ts_query_cursor__capture(
3758 self,
3759 state,
3760 skipped_wildcard_step,
3761 parent
3762 );
3763 }
3764 }
3765 }
3766
3767 // If the current node is captured in this pattern, add it to the capture list.
3768 if (step->capture_ids[0] != NONE) {
3769 ts_query_cursor__capture(self, state, step, node);
3770 }
3771
3772 if (state->dead) {
3773 array_erase(&self->states, j);
3774 j--;
3775 continue;
3776 }
3777
3778 // Advance this state to the next step of its pattern.
3779 state->step_index++;
3780 state->seeking_immediate_match = false;
3781 LOG(
3782 " advance state. pattern:%u, step:%u\n",
3783 state->pattern_index,
3784 state->step_index
3785 );
3786
3787 QueryStep *next_step = &self->query->steps.contents[state->step_index];
3788 if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true;
3789
3790 // If this state's next step has an alternative step, then copy the state in order
3791 // to pursue both alternatives. The alternative step itself may have an alternative,
3792 // so this is an interactive process.
3793 unsigned end_index = j + 1;
3794 for (unsigned k = j; k < end_index; k++) {
3795 QueryState *child_state = &self->states.contents[k];
3796 QueryStep *child_step = &self->query->steps.contents[child_state->step_index];
3797 if (child_step->alternative_index != NONE) {
3798 // A "dead-end" step exists only to add a non-sequential jump into the step sequence,
3799 // via its alternative index. When a state reaches a dead-end step, it jumps straight
3800 // to the step's alternative.
3801 if (child_step->is_dead_end) {
3802 child_state->step_index = child_step->alternative_index;
3803 k--;
3804 continue;
3805 }
3806
3807 // A "pass-through" step exists only to add a branch into the step sequence,
3808 // via its alternative_index. When a state reaches a pass-through step, it splits
3809 // in order to process the alternative step, and then it advances to the next step.
3810 if (child_step->is_pass_through) {
3811 child_state->step_index++;
3812 k--;
3813 }
3814
3815 QueryState *copy = ts_query_cursor__copy_state(self, &child_state);
3816 if (copy) {
3817 LOG(
3818 " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
3819 copy->pattern_index,
3820 copy->step_index,
3821 next_step->alternative_index,
3822 next_step->alternative_is_immediate,
3823 capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
3824 );
3825 end_index++;
3826 copy_count++;
3827 copy->step_index = child_step->alternative_index;
3828 if (child_step->alternative_is_immediate) {
3829 copy->seeking_immediate_match = true;
3830 }
3831 }
3832 }
3833 }
3834 }
3835
3836 for (unsigned j = 0; j < self->states.size; j++) {
3837 QueryState *state = &self->states.contents[j];
3838 if (state->dead) {
3839 array_erase(&self->states, j);
3840 j--;
3841 continue;
3842 }
3843
3844 // Enforce the longest-match criteria. When a query pattern contains optional or
3845 // repeated nodes, this is necessary to avoid multiple redundant states, where
3846 // one state has a strict subset of another state's captures.
3847 bool did_remove = false;
3848 for (unsigned k = j + 1; k < self->states.size; k++) {
3849 QueryState *other_state = &self->states.contents[k];
3850
3851 // Query states are kept in ascending order of start_depth and pattern_index.
3852 // Since the longest-match criteria is only used for deduping matches of the same
3853 // pattern and root node, we only need to perform pairwise comparisons within a
3854 // small slice of the states array.
3855 if (
3856 other_state->start_depth != state->start_depth ||
3857 other_state->pattern_index != state->pattern_index
3858 ) break;
3859
3860 bool left_contains_right, right_contains_left;
3861 ts_query_cursor__compare_captures(
3862 self,
3863 state,
3864 other_state,
3865 &left_contains_right,
3866 &right_contains_left
3867 );
3868 if (left_contains_right) {
3869 if (state->step_index == other_state->step_index) {
3870 LOG(
3871 " drop shorter state. pattern: %u, step_index: %u\n",
3872 state->pattern_index,
3873 state->step_index
3874 );
3875 capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
3876 array_erase(&self->states, k);
3877 k--;
3878 continue;
3879 }
3880 other_state->has_in_progress_alternatives = true;
3881 }
3882 if (right_contains_left) {
3883 if (state->step_index == other_state->step_index) {
3884 LOG(
3885 " drop shorter state. pattern: %u, step_index: %u\n",
3886 state->pattern_index,
3887 state->step_index
3888 );
3889 capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
3890 array_erase(&self->states, j);
3891 j--;
3892 did_remove = true;
3893 break;
3894 }
3895 state->has_in_progress_alternatives = true;
3896 }
3897 }
3898
3899 // If the state is at the end of its pattern, remove it from the list
3900 // of in-progress states and add it to the list of finished states.
3901 if (!did_remove) {
3902 LOG(
3903 " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
3904 state->pattern_index,
3905 state->start_depth,
3906 state->step_index,
3907 capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
3908 );
3909 QueryStep *next_step = &self->query->steps.contents[state->step_index];
3910 if (next_step->depth == PATTERN_DONE_MARKER) {
3911 if (state->has_in_progress_alternatives) {
3912 LOG(" defer finishing pattern %u\n", state->pattern_index);
3913 } else {
3914 LOG(" finish pattern %u\n", state->pattern_index);
3915 array_push(&self->finished_states, *state);
3916 array_erase(&self->states, (uint32_t)(state - self->states.contents));
3917 did_match = true;
3918 j--;
3919 }
3920 }
3921 }
3922 }
3923 }
3924
3925 if (ts_query_cursor__should_descend(self, node_intersects_range)) {
3926 switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) {
3927 case TreeCursorStepVisible:
3928 self->depth++;
3929 self->on_visible_node = true;
3930 continue;
3931 case TreeCursorStepHidden:
3932 self->on_visible_node = false;
3933 continue;
3934 default:
3935 break;
3936 }
3937 }
3938
3939 self->ascending = true;
3940 }
3941 }
3942}
3943
3944bool ts_query_cursor_next_match(
3945 TSQueryCursor *self,
3946 TSQueryMatch *match
3947) {
3948 if (self->finished_states.size == 0) {
3949 if (!ts_query_cursor__advance(self, false)) {
3950 return false;
3951 }
3952 }
3953
3954 QueryState *state = &self->finished_states.contents[0];
3955 if (state->id == UINT32_MAX) state->id = self->next_state_id++;
3956 match->id = state->id;
3957 match->pattern_index = state->pattern_index;
3958 const CaptureList *captures = capture_list_pool_get(
3959 &self->capture_list_pool,
3960 state->capture_list_id
3961 );
3962 match->captures = captures->contents;
3963 match->capture_count = captures->size;
3964 capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
3965 array_erase(&self->finished_states, 0);
3966 return true;
3967}
3968
3969void ts_query_cursor_remove_match(
3970 TSQueryCursor *self,
3971 uint32_t match_id
3972) {
3973 for (unsigned i = 0; i < self->finished_states.size; i++) {
3974 const QueryState *state = &self->finished_states.contents[i];
3975 if (state->id == match_id) {
3976 capture_list_pool_release(
3977 &self->capture_list_pool,
3978 state->capture_list_id
3979 );
3980 array_erase(&self->finished_states, i);
3981 return;
3982 }
3983 }
3984
3985 // Remove unfinished query states as well to prevent future
3986 // captures for a match being removed.
3987 for (unsigned i = 0; i < self->states.size; i++) {
3988 const QueryState *state = &self->states.contents[i];
3989 if (state->id == match_id) {
3990 capture_list_pool_release(
3991 &self->capture_list_pool,
3992 state->capture_list_id
3993 );
3994 array_erase(&self->states, i);
3995 return;
3996 }
3997 }
3998}
3999
4000bool ts_query_cursor_next_capture(
4001 TSQueryCursor *self,
4002 TSQueryMatch *match,
4003 uint32_t *capture_index
4004) {
4005 // The goal here is to return captures in order, even though they may not
4006 // be discovered in order, because patterns can overlap. Search for matches
4007 // until there is a finished capture that is before any unfinished capture.
4008 for (;;) {
4009 // First, find the earliest capture in an unfinished match.
4010 uint32_t first_unfinished_capture_byte;
4011 uint32_t first_unfinished_pattern_index;
4012 uint32_t first_unfinished_state_index;
4013 bool first_unfinished_state_is_definite = false;
4014 ts_query_cursor__first_in_progress_capture(
4015 self,
4016 &first_unfinished_state_index,
4017 &first_unfinished_capture_byte,
4018 &first_unfinished_pattern_index,
4019 &first_unfinished_state_is_definite
4020 );
4021
4022 // Then find the earliest capture in a finished match. It must occur
4023 // before the first capture in an *unfinished* match.
4024 QueryState *first_finished_state = NULL;
4025 uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
4026 uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
4027 for (unsigned i = 0; i < self->finished_states.size;) {
4028 QueryState *state = &self->finished_states.contents[i];
4029 const CaptureList *captures = capture_list_pool_get(
4030 &self->capture_list_pool,
4031 state->capture_list_id
4032 );
4033
4034 // Remove states whose captures are all consumed.
4035 if (state->consumed_capture_count >= captures->size) {
4036 capture_list_pool_release(
4037 &self->capture_list_pool,
4038 state->capture_list_id
4039 );
4040 array_erase(&self->finished_states, i);
4041 continue;
4042 }
4043
4044 TSNode node = captures->contents[state->consumed_capture_count].node;
4045
4046 bool node_precedes_range = (
4047 ts_node_end_byte(node) <= self->start_byte ||
4048 point_lte(ts_node_end_point(node), self->start_point)
4049 );
4050 bool node_follows_range = (
4051 ts_node_start_byte(node) >= self->end_byte ||
4052 point_gte(ts_node_start_point(node), self->end_point)
4053 );
4054 bool node_outside_of_range = node_precedes_range || node_follows_range;
4055
4056 // Skip captures that are outside of the cursor's range.
4057 if (node_outside_of_range) {
4058 state->consumed_capture_count++;
4059 continue;
4060 }
4061
4062 uint32_t node_start_byte = ts_node_start_byte(node);
4063 if (
4064 node_start_byte < first_finished_capture_byte ||
4065 (
4066 node_start_byte == first_finished_capture_byte &&
4067 state->pattern_index < first_finished_pattern_index
4068 )
4069 ) {
4070 first_finished_state = state;
4071 first_finished_capture_byte = node_start_byte;
4072 first_finished_pattern_index = state->pattern_index;
4073 }
4074 i++;
4075 }
4076
4077 // If there is finished capture that is clearly before any unfinished
4078 // capture, then return its match, and its capture index. Internally
4079 // record the fact that the capture has been 'consumed'.
4080 QueryState *state;
4081 if (first_finished_state) {
4082 state = first_finished_state;
4083 } else if (first_unfinished_state_is_definite) {
4084 state = &self->states.contents[first_unfinished_state_index];
4085 } else {
4086 state = NULL;
4087 }
4088
4089 if (state) {
4090 if (state->id == UINT32_MAX) state->id = self->next_state_id++;
4091 match->id = state->id;
4092 match->pattern_index = state->pattern_index;
4093 const CaptureList *captures = capture_list_pool_get(
4094 &self->capture_list_pool,
4095 state->capture_list_id
4096 );
4097 match->captures = captures->contents;
4098 match->capture_count = captures->size;
4099 *capture_index = state->consumed_capture_count;
4100 state->consumed_capture_count++;
4101 return true;
4102 }
4103
4104 if (capture_list_pool_is_empty(&self->capture_list_pool)) {
4105 LOG(
4106 " abandon state. index:%u, pattern:%u, offset:%u.\n",
4107 first_unfinished_state_index,
4108 first_unfinished_pattern_index,
4109 first_unfinished_capture_byte
4110 );
4111 capture_list_pool_release(
4112 &self->capture_list_pool,
4113 self->states.contents[first_unfinished_state_index].capture_list_id
4114 );
4115 array_erase(&self->states, first_unfinished_state_index);
4116 }
4117
4118 // If there are no finished matches that are ready to be returned, then
4119 // continue finding more matches.
4120 if (
4121 !ts_query_cursor__advance(self, true) &&
4122 self->finished_states.size == 0
4123 ) return false;
4124 }
4125}
4126
4127void ts_query_cursor_set_max_start_depth(
4128 TSQueryCursor *self,
4129 uint32_t max_start_depth
4130) {
4131 self->max_start_depth = max_start_depth;
4132}
4133
4134#undef LOG