1#include "api.h"
   2#include "./alloc.h"
   3#include "./array.h"
   4#include "./language.h"
   5#include "./point.h"
   6#include "./tree_cursor.h"
   7#include "./unicode.h"
   8#include <wctype.h>
   9
  10// #define DEBUG_ANALYZE_QUERY
  11// #define DEBUG_EXECUTE_QUERY
  12
  13#define MAX_STEP_CAPTURE_COUNT 3
  14#define MAX_NEGATED_FIELD_COUNT 8
  15#define MAX_STATE_PREDECESSOR_COUNT 256
  16#define MAX_ANALYSIS_STATE_DEPTH 8
  17#define MAX_ANALYSIS_ITERATION_COUNT 256
  18
  19/*
  20 * Stream - A sequence of unicode characters derived from a UTF8 string.
  21 * This struct is used in parsing queries from S-expressions.
  22 */
  23typedef struct {
  24  const char *input;
  25  const char *start;
  26  const char *end;
  27  int32_t next;
  28  uint8_t next_size;
  29} Stream;
  30
  31/*
  32 * QueryStep - A step in the process of matching a query. Each node within
  33 * a query S-expression corresponds to one of these steps. An entire pattern
  34 * is represented as a sequence of these steps. The basic properties of a
  35 * node are represented by these fields:
  36 * - `symbol` - The grammar symbol to match. A zero value represents the
  37 *    wildcard symbol, '_'.
  38 * - `field` - The field name to match. A zero value means that a field name
  39 *    was not specified.
  40 * - `capture_ids` - An array of integers representing the names of captures
  41 *    associated with this node in the pattern, terminated by a `NONE` value.
  42 * - `depth` - The depth where this node occurs in the pattern. The root node
  43 *    of the pattern has depth zero.
  44 * - `negated_field_list_id` - An id representing a set of fields that must
  45 *    not be present on a node matching this step.
  46 *
  47 * Steps have some additional fields in order to handle the `.` (or "anchor") operator,
  48 * which forbids additional child nodes:
  49 * - `is_immediate` - Indicates that the node matching this step cannot be preceded
  50 *   by other sibling nodes that weren't specified in the pattern.
  51 * - `is_last_child` - Indicates that the node matching this step cannot have any
  52 *   subsequent named siblings.
  53 *
  54 * For simple patterns, steps are matched in sequential order. But in order to
  55 * handle alternative/repeated/optional sub-patterns, query steps are not always
  56 * structured as a linear sequence; they sometimes need to split and merge. This
  57 * is done using the following fields:
  58 *  - `alternative_index` - The index of a different query step that serves as
  59 *    an alternative to this step. A `NONE` value represents no alternative.
  60 *    When a query state reaches a step with an alternative index, the state
  61 *    is duplicated, with one copy remaining at the original step, and one copy
  62 *    moving to the alternative step. The alternative may have its own alternative
  63 *    step, so this splitting is an iterative process.
  64 * - `is_dead_end` - Indicates that this state cannot be passed directly, and
  65 *    exists only in order to redirect to an alternative index, with no splitting.
  66 * - `is_pass_through` - Indicates that state has no matching logic of its own,
  67 *    and exists only to split a state. One copy of the state advances immediately
  68 *    to the next step, and one moves to the alternative step.
  69 * - `alternative_is_immediate` - Indicates that this step's alternative step
  70 *    should be treated as if `is_immediate` is true.
  71 *
  72 * Steps also store some derived state that summarizes how they relate to other
  73 * steps within the same pattern. This is used to optimize the matching process:
  74 *  - `contains_captures` - Indicates that this step or one of its child steps
  75 *     has a non-empty `capture_ids` list.
  76 *  - `parent_pattern_guaranteed` - Indicates that if this step is reached, then
  77 *     it and all of its subsequent sibling steps within the same parent pattern
  78 *     are guaranteed to match.
  79 *  - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but
  80 *     for the entire top-level pattern. When iterating through a query's
  81 *     captures using `ts_query_cursor_next_capture`, this field is used to
  82 *     detect that a capture can safely be returned from a match that has not
  83 *     even completed  yet.
  84 */
  85typedef struct {
  86  TSSymbol symbol;
  87  TSSymbol supertype_symbol;
  88  TSFieldId field;
  89  uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
  90  uint16_t depth;
  91  uint16_t alternative_index;
  92  uint16_t negated_field_list_id;
  93  bool is_named: 1;
  94  bool is_immediate: 1;
  95  bool is_last_child: 1;
  96  bool is_pass_through: 1;
  97  bool is_dead_end: 1;
  98  bool alternative_is_immediate: 1;
  99  bool contains_captures: 1;
 100  bool root_pattern_guaranteed: 1;
 101  bool parent_pattern_guaranteed: 1;
 102} QueryStep;
 103
 104/*
 105 * Slice - A slice of an external array. Within a query, capture names,
 106 * literal string values, and predicate step information are stored in three
 107 * contiguous arrays. Individual captures, string values, and predicates are
 108 * represented as slices of these three arrays.
 109 */
 110typedef struct {
 111  uint32_t offset;
 112  uint32_t length;
 113} Slice;
 114
 115/*
 116 * SymbolTable - a two-way mapping of strings to ids.
 117 */
 118typedef struct {
 119  Array(char) characters;
 120  Array(Slice) slices;
 121} SymbolTable;
 122
 123/**
 124 * CaptureQuantififers - a data structure holding the quantifiers of pattern captures.
 125 */
 126typedef Array(uint8_t) CaptureQuantifiers;
 127
 128/*
 129 * PatternEntry - Information about the starting point for matching a particular
 130 * pattern. These entries are stored in a 'pattern map' - a sorted array that
 131 * makes it possible to efficiently lookup patterns based on the symbol for their
 132 * first step. The entry consists of the following fields:
 133 * - `pattern_index` - the index of the pattern within the query
 134 * - `step_index` - the index of the pattern's first step in the shared `steps` array
 135 * - `is_rooted` - whether or not the pattern has a single root node. This property
 136 *   affects decisions about whether or not to start the pattern for nodes outside
 137 *   of a QueryCursor's range restriction.
 138 */
 139typedef struct {
 140  uint16_t step_index;
 141  uint16_t pattern_index;
 142  bool is_rooted;
 143} PatternEntry;
 144
 145typedef struct {
 146  Slice steps;
 147  Slice predicate_steps;
 148  uint32_t start_byte;
 149  bool is_non_local;
 150} QueryPattern;
 151
 152typedef struct {
 153  uint32_t byte_offset;
 154  uint16_t step_index;
 155} StepOffset;
 156
 157/*
 158 * QueryState - The state of an in-progress match of a particular pattern
 159 * in a query. While executing, a `TSQueryCursor` must keep track of a number
 160 * of possible in-progress matches. Each of those possible matches is
 161 * represented as one of these states. Fields:
 162 * - `id` - A numeric id that is exposed to the public API. This allows the
 163 *    caller to remove a given match, preventing any more of its captures
 164 *    from being returned.
 165 * - `start_depth` - The depth in the tree where the first step of the state's
 166 *    pattern was matched.
 167 * - `pattern_index` - The pattern that the state is matching.
 168 * - `consumed_capture_count` - The number of captures from this match that
 169 *    have already been returned.
 170 * - `capture_list_id` - A numeric id that can be used to retrieve the state's
 171 *    list of captures from the `CaptureListPool`.
 172 * - `seeking_immediate_match` - A flag that indicates that the state's next
 173 *    step must be matched by the very next sibling. This is used when
 174 *    processing repetitions.
 175 * - `has_in_progress_alternatives` - A flag that indicates that there is are
 176 *    other states that have the same captures as this state, but are at
 177 *    different steps in their pattern. This means that in order to obey the
 178 *    'longest-match' rule, this state should not be returned as a match until
 179 *    it is clear that there can be no other alternative match with more captures.
 180 */
 181typedef struct {
 182  uint32_t id;
 183  uint32_t capture_list_id;
 184  uint16_t start_depth;
 185  uint16_t step_index;
 186  uint16_t pattern_index;
 187  uint16_t consumed_capture_count: 12;
 188  bool seeking_immediate_match: 1;
 189  bool has_in_progress_alternatives: 1;
 190  bool dead: 1;
 191  bool needs_parent: 1;
 192} QueryState;
 193
 194typedef Array(TSQueryCapture) CaptureList;
 195
 196/*
 197 * CaptureListPool - A collection of *lists* of captures. Each query state needs
 198 * to maintain its own list of captures. To avoid repeated allocations, this struct
 199 * maintains a fixed set of capture lists, and keeps track of which ones are
 200 * currently in use by a query state.
 201 */
 202typedef struct {
 203  Array(CaptureList) list;
 204  CaptureList empty_list;
 205  // The maximum number of capture lists that we are allowed to allocate. We
 206  // never allow `list` to allocate more entries than this, dropping pending
 207  // matches if needed to stay under the limit.
 208  uint32_t max_capture_list_count;
 209  // The number of capture lists allocated in `list` that are not currently in
 210  // use. We reuse those existing-but-unused capture lists before trying to
 211  // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
 212  // list's length to indicate that it's not in use.
 213  uint32_t free_capture_list_count;
 214} CaptureListPool;
 215
 216/*
 217 * AnalysisState - The state needed for walking the parse table when analyzing
 218 * a query pattern, to determine at which steps the pattern might fail to match.
 219 */
 220typedef struct {
 221  TSStateId parse_state;
 222  TSSymbol parent_symbol;
 223  uint16_t child_index;
 224  TSFieldId field_id: 15;
 225  bool done: 1;
 226} AnalysisStateEntry;
 227
 228typedef struct {
 229  AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH];
 230  uint16_t depth;
 231  uint16_t step_index;
 232  TSSymbol root_symbol;
 233} AnalysisState;
 234
 235typedef Array(AnalysisState *) AnalysisStateSet;
 236
 237typedef struct {
 238  AnalysisStateSet states;
 239  AnalysisStateSet next_states;
 240  AnalysisStateSet deeper_states;
 241  AnalysisStateSet state_pool;
 242  Array(uint16_t) final_step_indices;
 243  Array(TSSymbol) finished_parent_symbols;
 244  bool did_abort;
 245} QueryAnalysis;
 246
 247/*
 248 * AnalysisSubgraph - A subset of the states in the parse table that are used
 249 * in constructing nodes with a certain symbol. Each state is accompanied by
 250 * some information about the possible node that could be produced in
 251 * downstream states.
 252 */
 253typedef struct {
 254  TSStateId state;
 255  uint16_t production_id;
 256  uint8_t child_index: 7;
 257  bool done: 1;
 258} AnalysisSubgraphNode;
 259
 260typedef struct {
 261  TSSymbol symbol;
 262  Array(TSStateId) start_states;
 263  Array(AnalysisSubgraphNode) nodes;
 264} AnalysisSubgraph;
 265
 266typedef Array(AnalysisSubgraph) AnalysisSubgraphArray;
 267
 268/*
 269 * StatePredecessorMap - A map that stores the predecessors of each parse state.
 270 * This is used during query analysis to determine which parse states can lead
 271 * to which reduce actions.
 272 */
 273typedef struct {
 274  TSStateId *contents;
 275} StatePredecessorMap;
 276
 277/*
 278 * TSQuery - A tree query, compiled from a string of S-expressions. The query
 279 * itself is immutable. The mutable state used in the process of executing the
 280 * query is stored in a `TSQueryCursor`.
 281 */
 282struct TSQuery {
 283  SymbolTable captures;
 284  SymbolTable predicate_values;
 285  Array(CaptureQuantifiers) capture_quantifiers;
 286  Array(QueryStep) steps;
 287  Array(PatternEntry) pattern_map;
 288  Array(TSQueryPredicateStep) predicate_steps;
 289  Array(QueryPattern) patterns;
 290  Array(StepOffset) step_offsets;
 291  Array(TSFieldId) negated_fields;
 292  Array(char) string_buffer;
 293  Array(TSSymbol) repeat_symbols_with_rootless_patterns;
 294  const TSLanguage *language;
 295  uint16_t wildcard_root_pattern_count;
 296};
 297
 298/*
 299 * TSQueryCursor - A stateful struct used to execute a query on a tree.
 300 */
 301struct TSQueryCursor {
 302  const TSQuery *query;
 303  TSTreeCursor cursor;
 304  Array(QueryState) states;
 305  Array(QueryState) finished_states;
 306  CaptureListPool capture_list_pool;
 307  uint32_t depth;
 308  uint32_t max_start_depth;
 309  uint32_t start_byte;
 310  uint32_t end_byte;
 311  TSPoint start_point;
 312  TSPoint end_point;
 313  uint32_t next_state_id;
 314  bool on_visible_node;
 315  bool ascending;
 316  bool halted;
 317  bool did_exceed_match_limit;
 318};
 319
 320static const TSQueryError PARENT_DONE = -1;
 321static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
 322static const uint16_t NONE = UINT16_MAX;
 323static const TSSymbol WILDCARD_SYMBOL = 0;
 324
 325/**********
 326 * Stream
 327 **********/
 328
 329// Advance to the next unicode code point in the stream.
 330static bool stream_advance(Stream *self) {
 331  self->input += self->next_size;
 332  if (self->input < self->end) {
 333    uint32_t size = ts_decode_utf8(
 334      (const uint8_t *)self->input,
 335      (uint32_t)(self->end - self->input),
 336      &self->next
 337    );
 338    if (size > 0) {
 339      self->next_size = size;
 340      return true;
 341    }
 342  } else {
 343    self->next_size = 0;
 344    self->next = '\0';
 345  }
 346  return false;
 347}
 348
 349// Reset the stream to the given input position, represented as a pointer
 350// into the input string.
 351static void stream_reset(Stream *self, const char *input) {
 352  self->input = input;
 353  self->next_size = 0;
 354  stream_advance(self);
 355}
 356
 357static Stream stream_new(const char *string, uint32_t length) {
 358  Stream self = {
 359    .next = 0,
 360    .input = string,
 361    .start = string,
 362    .end = string + length,
 363  };
 364  stream_advance(&self);
 365  return self;
 366}
 367
 368static void stream_skip_whitespace(Stream *self) {
 369  for (;;) {
 370    if (iswspace(self->next)) {
 371      stream_advance(self);
 372    } else if (self->next == ';') {
 373      // skip over comments
 374      stream_advance(self);
 375      while (self->next && self->next != '\n') {
 376        if (!stream_advance(self)) break;
 377      }
 378    } else {
 379      break;
 380    }
 381  }
 382}
 383
 384static bool stream_is_ident_start(Stream *self) {
 385  return iswalnum(self->next) || self->next == '_' || self->next == '-';
 386}
 387
 388static void stream_scan_identifier(Stream *stream) {
 389  do {
 390    stream_advance(stream);
 391  } while (
 392    iswalnum(stream->next) ||
 393    stream->next == '_' ||
 394    stream->next == '-' ||
 395    stream->next == '.' ||
 396    stream->next == '?' ||
 397    stream->next == '!'
 398  );
 399}
 400
 401static uint32_t stream_offset(Stream *self) {
 402  return (uint32_t)(self->input - self->start);
 403}
 404
 405/******************
 406 * CaptureListPool
 407 ******************/
 408
 409static CaptureListPool capture_list_pool_new(void) {
 410  return (CaptureListPool) {
 411    .list = array_new(),
 412    .empty_list = array_new(),
 413    .max_capture_list_count = UINT32_MAX,
 414    .free_capture_list_count = 0,
 415  };
 416}
 417
 418static void capture_list_pool_reset(CaptureListPool *self) {
 419  for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
 420    // This invalid size means that the list is not in use.
 421    self->list.contents[i].size = UINT32_MAX;
 422  }
 423  self->free_capture_list_count = self->list.size;
 424}
 425
 426static void capture_list_pool_delete(CaptureListPool *self) {
 427  for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
 428    array_delete(&self->list.contents[i]);
 429  }
 430  array_delete(&self->list);
 431}
 432
 433static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
 434  if (id >= self->list.size) return &self->empty_list;
 435  return &self->list.contents[id];
 436}
 437
 438static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
 439  assert(id < self->list.size);
 440  return &self->list.contents[id];
 441}
 442
 443static bool capture_list_pool_is_empty(const CaptureListPool *self) {
 444  // The capture list pool is empty if all allocated lists are in use, and we
 445  // have reached the maximum allowed number of allocated lists.
 446  return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
 447}
 448
 449static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
 450  // First see if any already allocated capture list is currently unused.
 451  if (self->free_capture_list_count > 0) {
 452    for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
 453      if (self->list.contents[i].size == UINT32_MAX) {
 454        array_clear(&self->list.contents[i]);
 455        self->free_capture_list_count--;
 456        return i;
 457      }
 458    }
 459  }
 460
 461  // Otherwise allocate and initialize a new capture list, as long as that
 462  // doesn't put us over the requested maximum.
 463  uint32_t i = self->list.size;
 464  if (i >= self->max_capture_list_count) {
 465    return NONE;
 466  }
 467  CaptureList list;
 468  array_init(&list);
 469  array_push(&self->list, list);
 470  return i;
 471}
 472
 473static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
 474  if (id >= self->list.size) return;
 475  self->list.contents[id].size = UINT32_MAX;
 476  self->free_capture_list_count++;
 477}
 478
 479/**************
 480 * Quantifiers
 481 **************/
 482
 483static TSQuantifier quantifier_mul(
 484  TSQuantifier left,
 485  TSQuantifier right
 486) {
 487  switch (left)
 488  {
 489    case TSQuantifierZero:
 490      return TSQuantifierZero;
 491    case TSQuantifierZeroOrOne:
 492      switch (right) {
 493        case TSQuantifierZero:
 494          return TSQuantifierZero;
 495        case TSQuantifierZeroOrOne:
 496        case TSQuantifierOne:
 497          return TSQuantifierZeroOrOne;
 498        case TSQuantifierZeroOrMore:
 499        case TSQuantifierOneOrMore:
 500          return TSQuantifierZeroOrMore;
 501      };
 502      break;
 503    case TSQuantifierZeroOrMore:
 504      switch (right) {
 505        case TSQuantifierZero:
 506          return TSQuantifierZero;
 507        case TSQuantifierZeroOrOne:
 508        case TSQuantifierZeroOrMore:
 509        case TSQuantifierOne:
 510        case TSQuantifierOneOrMore:
 511          return TSQuantifierZeroOrMore;
 512      };
 513      break;
 514    case TSQuantifierOne:
 515      return right;
 516    case TSQuantifierOneOrMore:
 517      switch (right) {
 518        case TSQuantifierZero:
 519          return TSQuantifierZero;
 520        case TSQuantifierZeroOrOne:
 521        case TSQuantifierZeroOrMore:
 522          return TSQuantifierZeroOrMore;
 523        case TSQuantifierOne:
 524        case TSQuantifierOneOrMore:
 525          return TSQuantifierOneOrMore;
 526      };
 527      break;
 528  }
 529  return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
 530}
 531
 532static TSQuantifier quantifier_join(
 533  TSQuantifier left,
 534  TSQuantifier right
 535) {
 536  switch (left)
 537  {
 538    case TSQuantifierZero:
 539      switch (right) {
 540        case TSQuantifierZero:
 541          return TSQuantifierZero;
 542        case TSQuantifierZeroOrOne:
 543        case TSQuantifierOne:
 544          return TSQuantifierZeroOrOne;
 545        case TSQuantifierZeroOrMore:
 546        case TSQuantifierOneOrMore:
 547          return TSQuantifierZeroOrMore;
 548      };
 549      break;
 550    case TSQuantifierZeroOrOne:
 551      switch (right) {
 552        case TSQuantifierZero:
 553        case TSQuantifierZeroOrOne:
 554        case TSQuantifierOne:
 555          return TSQuantifierZeroOrOne;
 556          break;
 557        case TSQuantifierZeroOrMore:
 558        case TSQuantifierOneOrMore:
 559          return TSQuantifierZeroOrMore;
 560          break;
 561      };
 562      break;
 563    case TSQuantifierZeroOrMore:
 564      return TSQuantifierZeroOrMore;
 565    case TSQuantifierOne:
 566      switch (right) {
 567        case TSQuantifierZero:
 568        case TSQuantifierZeroOrOne:
 569          return TSQuantifierZeroOrOne;
 570        case TSQuantifierZeroOrMore:
 571          return TSQuantifierZeroOrMore;
 572        case TSQuantifierOne:
 573          return TSQuantifierOne;
 574        case TSQuantifierOneOrMore:
 575          return TSQuantifierOneOrMore;
 576      };
 577      break;
 578    case TSQuantifierOneOrMore:
 579      switch (right) {
 580        case TSQuantifierZero:
 581        case TSQuantifierZeroOrOne:
 582        case TSQuantifierZeroOrMore:
 583          return TSQuantifierZeroOrMore;
 584        case TSQuantifierOne:
 585        case TSQuantifierOneOrMore:
 586          return TSQuantifierOneOrMore;
 587      };
 588      break;
 589  }
 590  return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
 591}
 592
 593static TSQuantifier quantifier_add(
 594  TSQuantifier left,
 595  TSQuantifier right
 596) {
 597  switch (left)
 598  {
 599    case TSQuantifierZero:
 600      return right;
 601    case TSQuantifierZeroOrOne:
 602      switch (right) {
 603        case TSQuantifierZero:
 604          return TSQuantifierZeroOrOne;
 605        case TSQuantifierZeroOrOne:
 606        case TSQuantifierZeroOrMore:
 607          return TSQuantifierZeroOrMore;
 608        case TSQuantifierOne:
 609        case TSQuantifierOneOrMore:
 610          return TSQuantifierOneOrMore;
 611      };
 612      break;
 613    case TSQuantifierZeroOrMore:
 614      switch (right) {
 615        case TSQuantifierZero:
 616          return TSQuantifierZeroOrMore;
 617        case TSQuantifierZeroOrOne:
 618        case TSQuantifierZeroOrMore:
 619          return TSQuantifierZeroOrMore;
 620        case TSQuantifierOne:
 621        case TSQuantifierOneOrMore:
 622          return TSQuantifierOneOrMore;
 623      };
 624      break;
 625    case TSQuantifierOne:
 626      switch (right) {
 627        case TSQuantifierZero:
 628          return TSQuantifierOne;
 629        case TSQuantifierZeroOrOne:
 630        case TSQuantifierZeroOrMore:
 631        case TSQuantifierOne:
 632        case TSQuantifierOneOrMore:
 633          return TSQuantifierOneOrMore;
 634      };
 635      break;
 636    case TSQuantifierOneOrMore:
 637      return TSQuantifierOneOrMore;
 638  }
 639  return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
 640}
 641
 642// Create new capture quantifiers structure
 643static CaptureQuantifiers capture_quantifiers_new(void) {
 644  return (CaptureQuantifiers) array_new();
 645}
 646
 647// Delete capture quantifiers structure
 648static void capture_quantifiers_delete(
 649  CaptureQuantifiers *self
 650) {
 651  array_delete(self);
 652}
 653
 654// Clear capture quantifiers structure
 655static void capture_quantifiers_clear(
 656  CaptureQuantifiers *self
 657) {
 658  array_clear(self);
 659}
 660
 661// Replace capture quantifiers with the given quantifiers
 662static void capture_quantifiers_replace(
 663  CaptureQuantifiers *self,
 664  CaptureQuantifiers *quantifiers
 665) {
 666  array_clear(self);
 667  array_push_all(self, quantifiers);
 668}
 669
 670// Return capture quantifier for the given capture id
 671static TSQuantifier capture_quantifier_for_id(
 672  const CaptureQuantifiers *self,
 673  uint16_t id
 674) {
 675  return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id);
 676}
 677
 678// Add the given quantifier to the current value for id
 679static void capture_quantifiers_add_for_id(
 680  CaptureQuantifiers *self,
 681  uint16_t id,
 682  TSQuantifier quantifier
 683) {
 684  if (self->size <= id) {
 685    array_grow_by(self, id + 1 - self->size);
 686  }
 687  uint8_t *own_quantifier = array_get(self, id);
 688  *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier);
 689}
 690
 691// Point-wise add the given quantifiers to the current values
 692static void capture_quantifiers_add_all(
 693  CaptureQuantifiers *self,
 694  CaptureQuantifiers *quantifiers
 695) {
 696  if (self->size < quantifiers->size) {
 697    array_grow_by(self, quantifiers->size - self->size);
 698  }
 699  for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) {
 700    uint8_t *quantifier = array_get(quantifiers, id);
 701    uint8_t *own_quantifier = array_get(self, id);
 702    *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
 703  }
 704}
 705
 706// Join the given quantifier with the current values
 707static void capture_quantifiers_mul(
 708  CaptureQuantifiers *self,
 709  TSQuantifier quantifier
 710) {
 711  for (uint16_t id = 0; id < (uint16_t)self->size; id++) {
 712    uint8_t *own_quantifier = array_get(self, id);
 713    *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier);
 714  }
 715}
 716
 717// Point-wise join the quantifiers from a list of alternatives with the current values
 718static void capture_quantifiers_join_all(
 719  CaptureQuantifiers *self,
 720  CaptureQuantifiers *quantifiers
 721) {
 722  if (self->size < quantifiers->size) {
 723    array_grow_by(self, quantifiers->size - self->size);
 724  }
 725  for (uint32_t id = 0; id < quantifiers->size; id++) {
 726    uint8_t *quantifier = array_get(quantifiers, id);
 727    uint8_t *own_quantifier = array_get(self, id);
 728    *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
 729  }
 730  for (uint32_t id = quantifiers->size; id < self->size; id++) {
 731    uint8_t *own_quantifier = array_get(self, id);
 732    *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero);
 733  }
 734}
 735
 736/**************
 737 * SymbolTable
 738 **************/
 739
 740static SymbolTable symbol_table_new(void) {
 741  return (SymbolTable) {
 742    .characters = array_new(),
 743    .slices = array_new(),
 744  };
 745}
 746
 747static void symbol_table_delete(SymbolTable *self) {
 748  array_delete(&self->characters);
 749  array_delete(&self->slices);
 750}
 751
 752static int symbol_table_id_for_name(
 753  const SymbolTable *self,
 754  const char *name,
 755  uint32_t length
 756) {
 757  for (unsigned i = 0; i < self->slices.size; i++) {
 758    Slice slice = self->slices.contents[i];
 759    if (
 760      slice.length == length &&
 761      !strncmp(&self->characters.contents[slice.offset], name, length)
 762    ) return i;
 763  }
 764  return -1;
 765}
 766
 767static const char *symbol_table_name_for_id(
 768  const SymbolTable *self,
 769  uint16_t id,
 770  uint32_t *length
 771) {
 772  Slice slice = self->slices.contents[id];
 773  *length = slice.length;
 774  return &self->characters.contents[slice.offset];
 775}
 776
 777static uint16_t symbol_table_insert_name(
 778  SymbolTable *self,
 779  const char *name,
 780  uint32_t length
 781) {
 782  int id = symbol_table_id_for_name(self, name, length);
 783  if (id >= 0) return (uint16_t)id;
 784  Slice slice = {
 785    .offset = self->characters.size,
 786    .length = length,
 787  };
 788  array_grow_by(&self->characters, length + 1);
 789  memcpy(&self->characters.contents[slice.offset], name, length);
 790  self->characters.contents[self->characters.size - 1] = 0;
 791  array_push(&self->slices, slice);
 792  return self->slices.size - 1;
 793}
 794
 795/************
 796 * QueryStep
 797 ************/
 798
 799static QueryStep query_step__new(
 800  TSSymbol symbol,
 801  uint16_t depth,
 802  bool is_immediate
 803) {
 804  QueryStep step = {
 805    .symbol = symbol,
 806    .depth = depth,
 807    .field = 0,
 808    .alternative_index = NONE,
 809    .negated_field_list_id = 0,
 810    .contains_captures = false,
 811    .is_last_child = false,
 812    .is_named = false,
 813    .is_pass_through = false,
 814    .is_dead_end = false,
 815    .root_pattern_guaranteed = false,
 816    .is_immediate = is_immediate,
 817    .alternative_is_immediate = false,
 818  };
 819  for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
 820    step.capture_ids[i] = NONE;
 821  }
 822  return step;
 823}
 824
 825static void query_step__add_capture(QueryStep *self, uint16_t capture_id) {
 826  for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
 827    if (self->capture_ids[i] == NONE) {
 828      self->capture_ids[i] = capture_id;
 829      break;
 830    }
 831  }
 832}
 833
 834static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
 835  for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
 836    if (self->capture_ids[i] == capture_id) {
 837      self->capture_ids[i] = NONE;
 838      while (i + 1 < MAX_STEP_CAPTURE_COUNT) {
 839        if (self->capture_ids[i + 1] == NONE) break;
 840        self->capture_ids[i] = self->capture_ids[i + 1];
 841        self->capture_ids[i + 1] = NONE;
 842        i++;
 843      }
 844      break;
 845    }
 846  }
 847}
 848
 849/**********************
 850 * StatePredecessorMap
 851 **********************/
 852
 853static inline StatePredecessorMap state_predecessor_map_new(
 854  const TSLanguage *language
 855) {
 856  return (StatePredecessorMap) {
 857    .contents = ts_calloc(
 858      (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
 859      sizeof(TSStateId)
 860    ),
 861  };
 862}
 863
 864static inline void state_predecessor_map_delete(StatePredecessorMap *self) {
 865  ts_free(self->contents);
 866}
 867
 868static inline void state_predecessor_map_add(
 869  StatePredecessorMap *self,
 870  TSStateId state,
 871  TSStateId predecessor
 872) {
 873  size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
 874  TSStateId *count = &self->contents[index];
 875  if (
 876    *count == 0 ||
 877    (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)
 878  ) {
 879    (*count)++;
 880    self->contents[index + *count] = predecessor;
 881  }
 882}
 883
 884static inline const TSStateId *state_predecessor_map_get(
 885  const StatePredecessorMap *self,
 886  TSStateId state,
 887  unsigned *count
 888) {
 889  size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
 890  *count = self->contents[index];
 891  return &self->contents[index + 1];
 892}
 893
 894/****************
 895 * AnalysisState
 896 ****************/
 897
 898static unsigned analysis_state__recursion_depth(const AnalysisState *self) {
 899  unsigned result = 0;
 900  for (unsigned i = 0; i < self->depth; i++) {
 901    TSSymbol symbol = self->stack[i].parent_symbol;
 902    for (unsigned j = 0; j < i; j++) {
 903      if (self->stack[j].parent_symbol == symbol) {
 904        result++;
 905        break;
 906      }
 907    }
 908  }
 909  return result;
 910}
 911
 912static inline int analysis_state__compare_position(
 913  AnalysisState *const *self,
 914  AnalysisState *const *other
 915) {
 916  for (unsigned i = 0; i < (*self)->depth; i++) {
 917    if (i >= (*other)->depth) return -1;
 918    if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1;
 919    if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1;
 920  }
 921  if ((*self)->depth < (*other)->depth) return 1;
 922  if ((*self)->step_index < (*other)->step_index) return -1;
 923  if ((*self)->step_index > (*other)->step_index) return 1;
 924  return 0;
 925}
 926
 927static inline int analysis_state__compare(
 928  AnalysisState *const *self,
 929  AnalysisState *const *other
 930) {
 931  int result = analysis_state__compare_position(self, other);
 932  if (result != 0) return result;
 933  for (unsigned i = 0; i < (*self)->depth; i++) {
 934    if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1;
 935    if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1;
 936    if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1;
 937    if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1;
 938    if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1;
 939    if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1;
 940  }
 941  return 0;
 942}
 943
 944static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) {
 945  if (self->depth == 0) {
 946    return &self->stack[0];
 947  }
 948  return &self->stack[self->depth - 1];
 949}
 950
 951static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
 952  for (unsigned i = 0; i < self->depth; i++) {
 953    if (self->stack[i].parent_symbol == symbol) return true;
 954  }
 955  return false;
 956}
 957
 958/******************
 959 * AnalysisStateSet
 960 ******************/
 961
 962// Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by
 963// cloning one from scratch.
 964static inline AnalysisState *analysis_state_pool__clone_or_reuse(
 965  AnalysisStateSet *self,
 966  AnalysisState *borrowed_item
 967) {
 968  AnalysisState *new_item;
 969  if (self->size) {
 970    new_item = array_pop(self);
 971  } else {
 972    new_item = ts_malloc(sizeof(AnalysisState));
 973  }
 974  *new_item = *borrowed_item;
 975  return new_item;
 976}
 977
 978// Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this
 979// set. The set does not contain duplicates, so if the item is already present, it will not be
 980// inserted, and no clone will be made.
 981//
 982// The caller retains ownership of the passed-in memory. However, the clone that is created by this
 983// function will be managed by the state set.
 984static inline void analysis_state_set__insert_sorted(
 985  AnalysisStateSet *self,
 986  AnalysisStateSet *pool,
 987  AnalysisState *borrowed_item
 988) {
 989  unsigned index, exists;
 990  array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists);
 991  if (!exists) {
 992    AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
 993    array_insert(self, index, new_item);
 994  }
 995}
 996
 997// Inserts a clone of the passed-in item at the end position of this list.
 998//
 999// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function
1000// `analysis_state__compare`) than largest item already in this set. If items are inserted in the
1001// wrong order, the set will not function properly for future use.
1002//
1003// The caller retains ownership of the passed-in memory. However, the clone that is created by this
1004// function will be managed by the state set.
1005static inline void analysis_state_set__push(
1006  AnalysisStateSet *self,
1007  AnalysisStateSet *pool,
1008  AnalysisState *borrowed_item
1009) {
1010  AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
1011  array_push(self, new_item);
1012}
1013
1014// Removes all items from this set, returning it to an empty state.
1015static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) {
1016  array_push_all(pool, self);
1017  array_clear(self);
1018}
1019
1020// Releases all memory that is managed with this state set, including any items currently present.
1021// After calling this function, the set is no longer suitable for use.
1022static inline void analysis_state_set__delete(AnalysisStateSet *self) {
1023  for (unsigned i = 0; i < self->size; i++) {
1024    ts_free(self->contents[i]);
1025  }
1026  array_delete(self);
1027}
1028
1029/****************
1030 * QueryAnalyzer
1031 ****************/
1032
1033static inline QueryAnalysis query_analysis__new(void) {
1034  return (QueryAnalysis) {
1035    .states = array_new(),
1036    .next_states = array_new(),
1037    .deeper_states = array_new(),
1038    .state_pool = array_new(),
1039    .final_step_indices = array_new(),
1040    .finished_parent_symbols = array_new(),
1041    .did_abort = false,
1042  };
1043}
1044
1045static inline void query_analysis__delete(QueryAnalysis *self) {
1046  analysis_state_set__delete(&self->states);
1047  analysis_state_set__delete(&self->next_states);
1048  analysis_state_set__delete(&self->deeper_states);
1049  analysis_state_set__delete(&self->state_pool);
1050  array_delete(&self->final_step_indices);
1051  array_delete(&self->finished_parent_symbols);
1052}
1053
1054/***********************
1055 * AnalysisSubgraphNode
1056 ***********************/
1057
1058static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) {
1059  if (self->state < other->state) return -1;
1060  if (self->state > other->state) return 1;
1061  if (self->child_index < other->child_index) return -1;
1062  if (self->child_index > other->child_index) return 1;
1063  if (self->done < other->done) return -1;
1064  if (self->done > other->done) return 1;
1065  if (self->production_id < other->production_id) return -1;
1066  if (self->production_id > other->production_id) return 1;
1067  return 0;
1068}
1069
1070/*********
1071 * Query
1072 *********/
1073
1074// The `pattern_map` contains a mapping from TSSymbol values to indices in the
1075// `steps` array. For a given syntax node, the `pattern_map` makes it possible
1076// to quickly find the starting steps of all of the patterns whose root matches
1077// that node. Each entry has two fields: a `pattern_index`, which identifies one
1078// of the patterns in the query, and a `step_index`, which indicates the start
1079// offset of that pattern's steps within the `steps` array.
1080//
1081// The entries are sorted by the patterns' root symbols, and lookups use a
1082// binary search. This ensures that the cost of this initial lookup step
1083// scales logarithmically with the number of patterns in the query.
1084//
1085// This returns `true` if the symbol is present and `false` otherwise.
1086// If the symbol is not present `*result` is set to the index where the
1087// symbol should be inserted.
1088static inline bool ts_query__pattern_map_search(
1089  const TSQuery *self,
1090  TSSymbol needle,
1091  uint32_t *result
1092) {
1093  uint32_t base_index = self->wildcard_root_pattern_count;
1094  uint32_t size = self->pattern_map.size - base_index;
1095  if (size == 0) {
1096    *result = base_index;
1097    return false;
1098  }
1099  while (size > 1) {
1100    uint32_t half_size = size / 2;
1101    uint32_t mid_index = base_index + half_size;
1102    TSSymbol mid_symbol = self->steps.contents[
1103      self->pattern_map.contents[mid_index].step_index
1104    ].symbol;
1105    if (needle > mid_symbol) base_index = mid_index;
1106    size -= half_size;
1107  }
1108
1109  TSSymbol symbol = self->steps.contents[
1110    self->pattern_map.contents[base_index].step_index
1111  ].symbol;
1112
1113  if (needle > symbol) {
1114    base_index++;
1115    if (base_index < self->pattern_map.size) {
1116      symbol = self->steps.contents[
1117        self->pattern_map.contents[base_index].step_index
1118      ].symbol;
1119    }
1120  }
1121
1122  *result = base_index;
1123  return needle == symbol;
1124}
1125
1126// Insert a new pattern's start index into the pattern map, maintaining
1127// the pattern map's ordering invariant.
1128static inline void ts_query__pattern_map_insert(
1129  TSQuery *self,
1130  TSSymbol symbol,
1131  PatternEntry new_entry
1132) {
1133  uint32_t index;
1134  ts_query__pattern_map_search(self, symbol, &index);
1135
1136  // Ensure that the entries are sorted not only by symbol, but also
1137  // by pattern_index. This way, states for earlier patterns will be
1138  // initiated first, which allows the ordering of the states array
1139  // to be maintained more efficiently.
1140  while (index < self->pattern_map.size) {
1141    PatternEntry *entry = &self->pattern_map.contents[index];
1142    if (
1143      self->steps.contents[entry->step_index].symbol == symbol &&
1144      entry->pattern_index < new_entry.pattern_index
1145    ) {
1146      index++;
1147    } else {
1148      break;
1149    }
1150  }
1151
1152  array_insert(&self->pattern_map, index, new_entry);
1153}
1154
1155// Walk the subgraph for this non-terminal, tracking all of the possible
1156// sequences of progress within the pattern.
1157static void ts_query__perform_analysis(
1158  TSQuery *self,
1159  const AnalysisSubgraphArray *subgraphs,
1160  QueryAnalysis *analysis
1161) {
1162  unsigned recursion_depth_limit = 0;
1163  unsigned prev_final_step_count = 0;
1164  array_clear(&analysis->final_step_indices);
1165  array_clear(&analysis->finished_parent_symbols);
1166
1167  for (unsigned iteration = 0;; iteration++) {
1168    if (iteration == MAX_ANALYSIS_ITERATION_COUNT) {
1169      analysis->did_abort = true;
1170      break;
1171    }
1172
1173    #ifdef DEBUG_ANALYZE_QUERY
1174      printf("Iteration: %u. Final step indices:", iteration);
1175      for (unsigned j = 0; j < analysis->final_step_indices.size; j++) {
1176        printf(" %4u", analysis->final_step_indices.contents[j]);
1177      }
1178      printf("\n");
1179      for (unsigned j = 0; j < analysis->states.size; j++) {
1180        AnalysisState *state = analysis->states.contents[j];
1181        printf("  %3u: step: %u, stack: [", j, state->step_index);
1182        for (unsigned k = 0; k < state->depth; k++) {
1183          printf(
1184            " {%s, child: %u, state: %4u",
1185            self->language->symbol_names[state->stack[k].parent_symbol],
1186            state->stack[k].child_index,
1187            state->stack[k].parse_state
1188          );
1189          if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]);
1190          if (state->stack[k].done) printf(", DONE");
1191          printf("}");
1192        }
1193        printf(" ]\n");
1194      }
1195    #endif
1196
1197    // If no further progress can be made within the current recursion depth limit, then
1198    // bump the depth limit by one, and continue to process the states the exceeded the
1199    // limit. But only allow this if progress has been made since the last time the depth
1200    // limit was increased.
1201    if (analysis->states.size == 0) {
1202      if (
1203        analysis->deeper_states.size > 0 &&
1204        analysis->final_step_indices.size > prev_final_step_count
1205      ) {
1206        #ifdef DEBUG_ANALYZE_QUERY
1207          printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1);
1208        #endif
1209
1210        prev_final_step_count = analysis->final_step_indices.size;
1211        recursion_depth_limit++;
1212        AnalysisStateSet _states = analysis->states;
1213        analysis->states = analysis->deeper_states;
1214        analysis->deeper_states = _states;
1215        continue;
1216      }
1217
1218      break;
1219    }
1220
1221    analysis_state_set__clear(&analysis->next_states, &analysis->state_pool);
1222    for (unsigned j = 0; j < analysis->states.size; j++) {
1223      AnalysisState * const state = analysis->states.contents[j];
1224
1225      // For efficiency, it's important to avoid processing the same analysis state more
1226      // than once. To achieve this, keep the states in order of ascending position within
1227      // their hypothetical syntax trees. In each iteration of this loop, start by advancing
1228      // the states that have made the least progress. Avoid advancing states that have already
1229      // made more progress.
1230      if (analysis->next_states.size > 0) {
1231        int comparison = analysis_state__compare_position(
1232          &state,
1233          array_back(&analysis->next_states)
1234        );
1235        if (comparison == 0) {
1236          analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state);
1237          continue;
1238        } else if (comparison > 0) {
1239          #ifdef DEBUG_ANALYZE_QUERY
1240            printf("Terminate iteration at state %u\n", j);
1241          #endif
1242          while (j < analysis->states.size) {
1243            analysis_state_set__push(
1244              &analysis->next_states,
1245              &analysis->state_pool,
1246              analysis->states.contents[j]
1247            );
1248            j++;
1249          }
1250          break;
1251        }
1252      }
1253
1254      const TSStateId parse_state = analysis_state__top(state)->parse_state;
1255      const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol;
1256      const TSFieldId parent_field_id = analysis_state__top(state)->field_id;
1257      const unsigned child_index = analysis_state__top(state)->child_index;
1258      const QueryStep * const step = &self->steps.contents[state->step_index];
1259
1260      unsigned subgraph_index, exists;
1261      array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1262      if (!exists) continue;
1263      const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index];
1264
1265      // Follow every possible path in the parse table, but only visit states that
1266      // are part of the subgraph for the current symbol.
1267      LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state);
1268      while (ts_lookahead_iterator__next(&lookahead_iterator)) {
1269        TSSymbol sym = lookahead_iterator.symbol;
1270
1271        AnalysisSubgraphNode successor = {
1272          .state = parse_state,
1273          .child_index = child_index,
1274        };
1275        if (lookahead_iterator.action_count) {
1276          const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1];
1277          if (action->type == TSParseActionTypeShift) {
1278            if (!action->shift.extra) {
1279              successor.state = action->shift.state;
1280              successor.child_index++;
1281            }
1282          } else {
1283            continue;
1284          }
1285        } else if (lookahead_iterator.next_state != 0) {
1286          successor.state = lookahead_iterator.next_state;
1287          successor.child_index++;
1288        } else {
1289          continue;
1290        }
1291
1292        unsigned node_index;
1293        array_search_sorted_with(
1294          &subgraph->nodes,
1295          analysis_subgraph_node__compare, &successor,
1296          &node_index, &exists
1297        );
1298        while (node_index < subgraph->nodes.size) {
1299          AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++];
1300          if (node->state != successor.state || node->child_index != successor.child_index) break;
1301
1302          // Use the subgraph to determine what alias and field will eventually be applied
1303          // to this child node.
1304          TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
1305          TSSymbol visible_symbol = alias
1306            ? alias
1307            : self->language->symbol_metadata[sym].visible
1308              ? self->language->public_symbol_map[sym]
1309              : 0;
1310          TSFieldId field_id = parent_field_id;
1311          if (!field_id) {
1312            const TSFieldMapEntry *field_map, *field_map_end;
1313            ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
1314            for (; field_map != field_map_end; field_map++) {
1315              if (!field_map->inherited && field_map->child_index == child_index) {
1316                field_id = field_map->field_id;
1317                break;
1318              }
1319            }
1320          }
1321
1322          // Create a new state that has advanced past this hypothetical subtree.
1323          AnalysisState next_state = *state;
1324          AnalysisStateEntry *next_state_top = analysis_state__top(&next_state);
1325          next_state_top->child_index = successor.child_index;
1326          next_state_top->parse_state = successor.state;
1327          if (node->done) next_state_top->done = true;
1328
1329          // Determine if this hypothetical child node would match the current step
1330          // of the query pattern.
1331          bool does_match = false;
1332          if (visible_symbol) {
1333            does_match = true;
1334            if (step->symbol == WILDCARD_SYMBOL) {
1335              if (
1336                step->is_named &&
1337                !self->language->symbol_metadata[visible_symbol].named
1338              ) does_match = false;
1339            } else if (step->symbol != visible_symbol) {
1340              does_match = false;
1341            }
1342            if (step->field && step->field != field_id) {
1343              does_match = false;
1344            }
1345            if (
1346              step->supertype_symbol &&
1347              !analysis_state__has_supertype(state, step->supertype_symbol)
1348            ) does_match = false;
1349          }
1350
1351          // If this child is hidden, then descend into it and walk through its children.
1352          // If the top entry of the stack is at the end of its rule, then that entry can
1353          // be replaced. Otherwise, push a new entry onto the stack.
1354          else if (sym >= self->language->token_count) {
1355            if (!next_state_top->done) {
1356              if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) {
1357                #ifdef DEBUG_ANALYZE_QUERY
1358                  printf("Exceeded depth limit for state %u\n", j);
1359                #endif
1360
1361                analysis->did_abort = true;
1362                continue;
1363              }
1364
1365              next_state.depth++;
1366              next_state_top = analysis_state__top(&next_state);
1367            }
1368
1369            *next_state_top = (AnalysisStateEntry) {
1370              .parse_state = parse_state,
1371              .parent_symbol = sym,
1372              .child_index = 0,
1373              .field_id = field_id,
1374              .done = false,
1375            };
1376
1377            if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) {
1378              analysis_state_set__insert_sorted(
1379                &analysis->deeper_states,
1380                &analysis->state_pool,
1381                &next_state
1382              );
1383              continue;
1384            }
1385          }
1386
1387          // Pop from the stack when this state reached the end of its current syntax node.
1388          while (next_state.depth > 0 && next_state_top->done) {
1389            next_state.depth--;
1390            next_state_top = analysis_state__top(&next_state);
1391          }
1392
1393          // If this hypothetical child did match the current step of the query pattern,
1394          // then advance to the next step at the current depth. This involves skipping
1395          // over any descendant steps of the current child.
1396          const QueryStep *next_step = step;
1397          if (does_match) {
1398            for (;;) {
1399              next_state.step_index++;
1400              next_step = &self->steps.contents[next_state.step_index];
1401              if (
1402                next_step->depth == PATTERN_DONE_MARKER ||
1403                next_step->depth <= step->depth
1404              ) break;
1405            }
1406          } else if (successor.state == parse_state) {
1407            continue;
1408          }
1409
1410          for (;;) {
1411            // Skip pass-through states. Although these states have alternatives, they are only
1412            // used to implement repetitions, and query analysis does not need to process
1413            // repetitions in order to determine whether steps are possible and definite.
1414            if (next_step->is_pass_through) {
1415              next_state.step_index++;
1416              next_step++;
1417              continue;
1418            }
1419
1420            // If the pattern is finished or hypothetical parent node is complete, then
1421            // record that matching can terminate at this step of the pattern. Otherwise,
1422            // add this state to the list of states to process on the next iteration.
1423            if (!next_step->is_dead_end) {
1424              bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth;
1425              if (did_finish_pattern) {
1426                array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol);
1427              } else if (next_state.depth == 0) {
1428                array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index);
1429              } else {
1430                analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state);
1431              }
1432            }
1433
1434            // If the state has advanced to a step with an alternative step, then add another state
1435            // at that alternative step. This process is simpler than the process of actually matching a
1436            // pattern during query execution, because for the purposes of query analysis, there is no
1437            // need to process repetitions.
1438            if (
1439              does_match &&
1440              next_step->alternative_index != NONE &&
1441              next_step->alternative_index > next_state.step_index
1442            ) {
1443              next_state.step_index = next_step->alternative_index;
1444              next_step = &self->steps.contents[next_state.step_index];
1445            } else {
1446              break;
1447            }
1448          }
1449        }
1450      }
1451    }
1452
1453    AnalysisStateSet _states = analysis->states;
1454    analysis->states = analysis->next_states;
1455    analysis->next_states = _states;
1456  }
1457}
1458
1459static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
1460  Array(uint16_t) non_rooted_pattern_start_steps = array_new();
1461  for (unsigned i = 0; i < self->pattern_map.size; i++) {
1462    PatternEntry *pattern = &self->pattern_map.contents[i];
1463    if (!pattern->is_rooted) {
1464      QueryStep *step = &self->steps.contents[pattern->step_index];
1465      if (step->symbol != WILDCARD_SYMBOL) {
1466        array_push(&non_rooted_pattern_start_steps, i);
1467      }
1468    }
1469  }
1470
1471  // Walk forward through all of the steps in the query, computing some
1472  // basic information about each step. Mark all of the steps that contain
1473  // captures, and record the indices of all of the steps that have child steps.
1474  Array(uint32_t) parent_step_indices = array_new();
1475  for (unsigned i = 0; i < self->steps.size; i++) {
1476    QueryStep *step = &self->steps.contents[i];
1477    if (step->depth == PATTERN_DONE_MARKER) {
1478      step->parent_pattern_guaranteed = true;
1479      step->root_pattern_guaranteed = true;
1480      continue;
1481    }
1482
1483    bool has_children = false;
1484    bool is_wildcard = step->symbol == WILDCARD_SYMBOL;
1485    step->contains_captures = step->capture_ids[0] != NONE;
1486    for (unsigned j = i + 1; j < self->steps.size; j++) {
1487      QueryStep *next_step = &self->steps.contents[j];
1488      if (
1489        next_step->depth == PATTERN_DONE_MARKER ||
1490        next_step->depth <= step->depth
1491      ) break;
1492      if (next_step->capture_ids[0] != NONE) {
1493        step->contains_captures = true;
1494      }
1495      if (!is_wildcard) {
1496        next_step->root_pattern_guaranteed = true;
1497        next_step->parent_pattern_guaranteed = true;
1498      }
1499      has_children = true;
1500    }
1501
1502    if (has_children && !is_wildcard) {
1503      array_push(&parent_step_indices, i);
1504    }
1505  }
1506
1507  // For every parent symbol in the query, initialize an 'analysis subgraph'.
1508  // This subgraph lists all of the states in the parse table that are directly
1509  // involved in building subtrees for this symbol.
1510  //
1511  // In addition to the parent symbols in the query, construct subgraphs for all
1512  // of the hidden symbols in the grammar, because these might occur within
1513  // one of the parent nodes, such that their children appear to belong to the
1514  // parent.
1515  AnalysisSubgraphArray subgraphs = array_new();
1516  for (unsigned i = 0; i < parent_step_indices.size; i++) {
1517    uint32_t parent_step_index = parent_step_indices.contents[i];
1518    TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
1519    AnalysisSubgraph subgraph = { .symbol = parent_symbol };
1520    array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1521  }
1522  for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) {
1523    if (!ts_language_symbol_metadata(self->language, sym).visible) {
1524      AnalysisSubgraph subgraph = { .symbol = sym };
1525      array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1526    }
1527  }
1528
1529  // Scan the parse table to find the data needed to populate these subgraphs.
1530  // Collect three things during this scan:
1531  //   1) All of the parse states where one of these symbols can start.
1532  //   2) All of the parse states where one of these symbols can end, along
1533  //      with information about the node that would be created.
1534  //   3) A list of predecessor states for each state.
1535  StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
1536  for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) {
1537    unsigned subgraph_index, exists;
1538    LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state);
1539    while (ts_lookahead_iterator__next(&lookahead_iterator)) {
1540      if (lookahead_iterator.action_count) {
1541        for (unsigned i = 0; i < lookahead_iterator.action_count; i++) {
1542          const TSParseAction *action = &lookahead_iterator.actions[i];
1543          if (action->type == TSParseActionTypeReduce) {
1544            const TSSymbol *aliases, *aliases_end;
1545            ts_language_aliases_for_symbol(
1546              self->language,
1547              action->reduce.symbol,
1548              &aliases,
1549              &aliases_end
1550            );
1551            for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1552              array_search_sorted_by(
1553                &subgraphs,
1554                .symbol,
1555                *symbol,
1556                &subgraph_index,
1557                &exists
1558              );
1559              if (exists) {
1560                AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1561                if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
1562                  array_push(&subgraph->nodes, ((AnalysisSubgraphNode) {
1563                    .state = state,
1564                    .production_id = action->reduce.production_id,
1565                    .child_index = action->reduce.child_count,
1566                    .done = true,
1567                  }));
1568                }
1569              }
1570            }
1571          } else if (action->type == TSParseActionTypeShift && !action->shift.extra) {
1572            TSStateId next_state = action->shift.state;
1573            state_predecessor_map_add(&predecessor_map, next_state, state);
1574          }
1575        }
1576      } else if (lookahead_iterator.next_state != 0) {
1577        if (lookahead_iterator.next_state != state) {
1578          state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
1579        }
1580        if (ts_language_state_is_primary(self->language, state)) {
1581          const TSSymbol *aliases, *aliases_end;
1582          ts_language_aliases_for_symbol(
1583            self->language,
1584            lookahead_iterator.symbol,
1585            &aliases,
1586            &aliases_end
1587          );
1588          for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1589            array_search_sorted_by(
1590              &subgraphs,
1591              .symbol,
1592              *symbol,
1593              &subgraph_index,
1594              &exists
1595            );
1596            if (exists) {
1597              AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1598              if (
1599                subgraph->start_states.size == 0 ||
1600                *array_back(&subgraph->start_states) != state
1601              )
1602              array_push(&subgraph->start_states, state);
1603            }
1604          }
1605        }
1606      }
1607    }
1608  }
1609
1610  // For each subgraph, compute the preceding states by walking backward
1611  // from the end states using the predecessor map.
1612  Array(AnalysisSubgraphNode) next_nodes = array_new();
1613  for (unsigned i = 0; i < subgraphs.size; i++) {
1614    AnalysisSubgraph *subgraph = &subgraphs.contents[i];
1615    if (subgraph->nodes.size == 0) {
1616      array_delete(&subgraph->start_states);
1617      array_erase(&subgraphs, i);
1618      i--;
1619      continue;
1620    }
1621    array_assign(&next_nodes, &subgraph->nodes);
1622    while (next_nodes.size > 0) {
1623      AnalysisSubgraphNode node = array_pop(&next_nodes);
1624      if (node.child_index > 1) {
1625        unsigned predecessor_count;
1626        const TSStateId *predecessors = state_predecessor_map_get(
1627          &predecessor_map,
1628          node.state,
1629          &predecessor_count
1630        );
1631        for (unsigned j = 0; j < predecessor_count; j++) {
1632          AnalysisSubgraphNode predecessor_node = {
1633            .state = predecessors[j],
1634            .child_index = node.child_index - 1,
1635            .production_id = node.production_id,
1636            .done = false,
1637          };
1638          unsigned index, exists;
1639          array_search_sorted_with(
1640            &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node,
1641            &index, &exists
1642          );
1643          if (!exists) {
1644            array_insert(&subgraph->nodes, index, predecessor_node);
1645            array_push(&next_nodes, predecessor_node);
1646          }
1647        }
1648      }
1649    }
1650  }
1651
1652  #ifdef DEBUG_ANALYZE_QUERY
1653    printf("\nSubgraphs:\n");
1654    for (unsigned i = 0; i < subgraphs.size; i++) {
1655      AnalysisSubgraph *subgraph = &subgraphs.contents[i];
1656      printf("  %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
1657      for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1658        printf(
1659          "    {state: %u}\n",
1660          subgraph->start_states.contents[j]
1661        );
1662      }
1663      for (unsigned j = 0; j < subgraph->nodes.size; j++) {
1664        AnalysisSubgraphNode *node = &subgraph->nodes.contents[j];
1665        printf(
1666          "    {state: %u, child_index: %u, production_id: %u, done: %d}\n",
1667          node->state, node->child_index, node->production_id, node->done
1668        );
1669      }
1670      printf("\n");
1671    }
1672  #endif
1673
1674  // For each non-terminal pattern, determine if the pattern can successfully match,
1675  // and identify all of the possible children within the pattern where matching could fail.
1676  bool all_patterns_are_valid = true;
1677  QueryAnalysis analysis = query_analysis__new();
1678  for (unsigned i = 0; i < parent_step_indices.size; i++) {
1679    uint16_t parent_step_index = parent_step_indices.contents[i];
1680    uint16_t parent_depth = self->steps.contents[parent_step_index].depth;
1681    TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
1682    if (parent_symbol == ts_builtin_sym_error) continue;
1683
1684    // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's
1685    // root symbol is a terminal, then return an error.
1686    unsigned subgraph_index, exists;
1687    array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1688    if (!exists) {
1689      unsigned first_child_step_index = parent_step_index + 1;
1690      uint32_t j, child_exists;
1691      array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists);
1692      assert(child_exists);
1693      *error_offset = self->step_offsets.contents[j].byte_offset;
1694      all_patterns_are_valid = false;
1695      break;
1696    }
1697
1698    // Initialize an analysis state at every parse state in the table where
1699    // this parent symbol can occur.
1700    AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1701    analysis_state_set__clear(&analysis.states, &analysis.state_pool);
1702    analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
1703    for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1704      TSStateId parse_state = subgraph->start_states.contents[j];
1705      analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
1706        .step_index = parent_step_index + 1,
1707        .stack = {
1708          [0] = {
1709            .parse_state = parse_state,
1710            .parent_symbol = parent_symbol,
1711            .child_index = 0,
1712            .field_id = 0,
1713            .done = false,
1714          },
1715        },
1716        .depth = 1,
1717        .root_symbol = parent_symbol,
1718      }));
1719    }
1720
1721    #ifdef DEBUG_ANALYZE_QUERY
1722      printf(
1723        "\nWalk states for %s:\n",
1724        ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol)
1725      );
1726    #endif
1727
1728    analysis.did_abort = false;
1729    ts_query__perform_analysis(self, &subgraphs, &analysis);
1730
1731    // If this pattern could not be fully analyzed, then every step should
1732    // be considered fallible.
1733    if (analysis.did_abort) {
1734      for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
1735        QueryStep *step = &self->steps.contents[j];
1736        if (
1737          step->depth <= parent_depth ||
1738          step->depth == PATTERN_DONE_MARKER
1739        ) break;
1740        if (!step->is_dead_end) {
1741          step->parent_pattern_guaranteed = false;
1742          step->root_pattern_guaranteed = false;
1743        }
1744      }
1745      continue;
1746    }
1747
1748    // If this pattern cannot match, store the pattern index so that it can be
1749    // returned to the caller.
1750    if (analysis.finished_parent_symbols.size == 0) {
1751      assert(analysis.final_step_indices.size > 0);
1752      uint16_t impossible_step_index = *array_back(&analysis.final_step_indices);
1753      uint32_t j, impossible_exists;
1754      array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists);
1755      if (j >= self->step_offsets.size) j = self->step_offsets.size - 1;
1756      *error_offset = self->step_offsets.contents[j].byte_offset;
1757      all_patterns_are_valid = false;
1758      break;
1759    }
1760
1761    // Mark as fallible any step where a match terminated.
1762    // Later, this property will be propagated to all of the step's predecessors.
1763    for (unsigned j = 0; j < analysis.final_step_indices.size; j++) {
1764      uint32_t final_step_index = analysis.final_step_indices.contents[j];
1765      QueryStep *step = &self->steps.contents[final_step_index];
1766      if (
1767        step->depth != PATTERN_DONE_MARKER &&
1768        step->depth > parent_depth &&
1769        !step->is_dead_end
1770      ) {
1771        step->parent_pattern_guaranteed = false;
1772        step->root_pattern_guaranteed = false;
1773      }
1774    }
1775  }
1776
1777  // Mark as indefinite any step with captures that are used in predicates.
1778  Array(uint16_t) predicate_capture_ids = array_new();
1779  for (unsigned i = 0; i < self->patterns.size; i++) {
1780    QueryPattern *pattern = &self->patterns.contents[i];
1781
1782    // Gather all of the captures that are used in predicates for this pattern.
1783    array_clear(&predicate_capture_ids);
1784    for (
1785      unsigned start = pattern->predicate_steps.offset,
1786      end = start + pattern->predicate_steps.length,
1787      j = start; j < end; j++
1788    ) {
1789      TSQueryPredicateStep *step = &self->predicate_steps.contents[j];
1790      if (step->type == TSQueryPredicateStepTypeCapture) {
1791        uint16_t value_id = step->value_id;
1792        array_insert_sorted_by(&predicate_capture_ids, , value_id);
1793      }
1794    }
1795
1796    // Find all of the steps that have these captures.
1797    for (
1798      unsigned start = pattern->steps.offset,
1799      end = start + pattern->steps.length,
1800      j = start; j < end; j++
1801    ) {
1802      QueryStep *step = &self->steps.contents[j];
1803      for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
1804        uint16_t capture_id = step->capture_ids[k];
1805        if (capture_id == NONE) break;
1806        unsigned index, exists;
1807        array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists);
1808        if (exists) {
1809          step->root_pattern_guaranteed = false;
1810          break;
1811        }
1812      }
1813    }
1814  }
1815
1816  // Propagate fallibility. If a pattern is fallible at a given step, then it is
1817  // fallible at all of its preceding steps.
1818  bool done = self->steps.size == 0;
1819  while (!done) {
1820    done = true;
1821    for (unsigned i = self->steps.size - 1; i > 0; i--) {
1822      QueryStep *step = &self->steps.contents[i];
1823      if (step->depth == PATTERN_DONE_MARKER) continue;
1824
1825      // Determine if this step is definite or has definite alternatives.
1826      bool parent_pattern_guaranteed = false;
1827      for (;;) {
1828        if (step->root_pattern_guaranteed) {
1829          parent_pattern_guaranteed = true;
1830          break;
1831        }
1832        if (step->alternative_index == NONE || step->alternative_index < i) {
1833          break;
1834        }
1835        step = &self->steps.contents[step->alternative_index];
1836      }
1837
1838      // If not, mark its predecessor as indefinite.
1839      if (!parent_pattern_guaranteed) {
1840        QueryStep *prev_step = &self->steps.contents[i - 1];
1841        if (
1842          !prev_step->is_dead_end &&
1843          prev_step->depth != PATTERN_DONE_MARKER &&
1844          prev_step->root_pattern_guaranteed
1845        ) {
1846          prev_step->root_pattern_guaranteed = false;
1847          done = false;
1848        }
1849      }
1850    }
1851  }
1852
1853  #ifdef DEBUG_ANALYZE_QUERY
1854    printf("Steps:\n");
1855    for (unsigned i = 0; i < self->steps.size; i++) {
1856      QueryStep *step = &self->steps.contents[i];
1857      if (step->depth == PATTERN_DONE_MARKER) {
1858        printf("  %u: DONE\n", i);
1859      } else {
1860        printf(
1861          "  %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n",
1862          i,
1863          (step->symbol == WILDCARD_SYMBOL)
1864            ? "ANY"
1865            : ts_language_symbol_name(self->language, step->symbol),
1866          (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
1867          step->depth,
1868          step->parent_pattern_guaranteed,
1869          step->root_pattern_guaranteed
1870        );
1871      }
1872    }
1873  #endif
1874
1875  // Determine which repetition symbols in this language have the possibility
1876  // of matching non-rooted patterns in this query. These repetition symbols
1877  // prevent certain optimizations with range restrictions.
1878  analysis.did_abort = false;
1879  for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) {
1880    uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i];
1881    PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index];
1882
1883    analysis_state_set__clear(&analysis.states, &analysis.state_pool);
1884    analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
1885    for (unsigned j = 0; j < subgraphs.size; j++) {
1886      AnalysisSubgraph *subgraph = &subgraphs.contents[j];
1887      TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol);
1888      if (metadata.visible || metadata.named) continue;
1889
1890      for (uint32_t k = 0; k < subgraph->start_states.size; k++) {
1891        TSStateId parse_state = subgraph->start_states.contents[k];
1892        analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
1893          .step_index = pattern_entry->step_index,
1894          .stack = {
1895            [0] = {
1896              .parse_state = parse_state,
1897              .parent_symbol = subgraph->symbol,
1898              .child_index = 0,
1899              .field_id = 0,
1900              .done = false,
1901            },
1902          },
1903          .root_symbol = subgraph->symbol,
1904          .depth = 1,
1905        }));
1906      }
1907    }
1908
1909    #ifdef DEBUG_ANALYZE_QUERY
1910      printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index);
1911    #endif
1912
1913    ts_query__perform_analysis(
1914      self,
1915      &subgraphs,
1916      &analysis
1917    );
1918
1919    if (analysis.finished_parent_symbols.size > 0) {
1920      self->patterns.contents[pattern_entry->pattern_index].is_non_local = true;
1921    }
1922
1923    for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) {
1924      TSSymbol symbol = analysis.finished_parent_symbols.contents[k];
1925      array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol);
1926    }
1927  }
1928
1929  #ifdef DEBUG_ANALYZE_QUERY
1930    if (self->repeat_symbols_with_rootless_patterns.size > 0) {
1931      printf("\nRepetition symbols with rootless patterns:\n");
1932      printf("aborted analysis: %d\n", analysis.did_abort);
1933      for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) {
1934        TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i];
1935        printf("  %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol));
1936      }
1937      printf("\n");
1938    }
1939  #endif
1940
1941  // Cleanup
1942  for (unsigned i = 0; i < subgraphs.size; i++) {
1943    array_delete(&subgraphs.contents[i].start_states);
1944    array_delete(&subgraphs.contents[i].nodes);
1945  }
1946  array_delete(&subgraphs);
1947  query_analysis__delete(&analysis);
1948  array_delete(&next_nodes);
1949  array_delete(&non_rooted_pattern_start_steps);
1950  array_delete(&parent_step_indices);
1951  array_delete(&predicate_capture_ids);
1952  state_predecessor_map_delete(&predecessor_map);
1953
1954  return all_patterns_are_valid;
1955}
1956
1957static void ts_query__add_negated_fields(
1958  TSQuery *self,
1959  uint16_t step_index,
1960  TSFieldId *field_ids,
1961  uint16_t field_count
1962) {
1963  QueryStep *step = &self->steps.contents[step_index];
1964
1965  // The negated field array stores a list of field lists, separated by zeros.
1966  // Try to find the start index of an existing list that matches this new list.
1967  bool failed_match = false;
1968  unsigned match_count = 0;
1969  unsigned start_i = 0;
1970  for (unsigned i = 0; i < self->negated_fields.size; i++) {
1971    TSFieldId existing_field_id = self->negated_fields.contents[i];
1972
1973    // At each zero value, terminate the match attempt. If we've exactly
1974    // matched the new field list, then reuse this index. Otherwise,
1975    // start over the matching process.
1976    if (existing_field_id == 0) {
1977      if (match_count == field_count) {
1978        step->negated_field_list_id = start_i;
1979        return;
1980      } else {
1981        start_i = i + 1;
1982        match_count = 0;
1983        failed_match = false;
1984      }
1985    }
1986
1987    // If the existing list matches our new list so far, then advance
1988    // to the next element of the new list.
1989    else if (
1990      match_count < field_count &&
1991      existing_field_id == field_ids[match_count] &&
1992      !failed_match
1993    ) {
1994      match_count++;
1995    }
1996
1997    // Otherwise, this existing list has failed to match.
1998    else {
1999      match_count = 0;
2000      failed_match = true;
2001    }
2002  }
2003
2004  step->negated_field_list_id = self->negated_fields.size;
2005  array_extend(&self->negated_fields, field_count, field_ids);
2006  array_push(&self->negated_fields, 0);
2007}
2008
2009static TSQueryError ts_query__parse_string_literal(
2010  TSQuery *self,
2011  Stream *stream
2012) {
2013  const char *string_start = stream->input;
2014  if (stream->next != '"') return TSQueryErrorSyntax;
2015  stream_advance(stream);
2016  const char *prev_position = stream->input;
2017
2018  bool is_escaped = false;
2019  array_clear(&self->string_buffer);
2020  for (;;) {
2021    if (is_escaped) {
2022      is_escaped = false;
2023      switch (stream->next) {
2024        case 'n':
2025          array_push(&self->string_buffer, '\n');
2026          break;
2027        case 'r':
2028          array_push(&self->string_buffer, '\r');
2029          break;
2030        case 't':
2031          array_push(&self->string_buffer, '\t');
2032          break;
2033        case '0':
2034          array_push(&self->string_buffer, '\0');
2035          break;
2036        default:
2037          array_extend(&self->string_buffer, stream->next_size, stream->input);
2038          break;
2039      }
2040      prev_position = stream->input + stream->next_size;
2041    } else {
2042      if (stream->next == '\\') {
2043        array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
2044        prev_position = stream->input + 1;
2045        is_escaped = true;
2046      } else if (stream->next == '"') {
2047        array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
2048        stream_advance(stream);
2049        return TSQueryErrorNone;
2050      } else if (stream->next == '\n') {
2051        stream_reset(stream, string_start);
2052        return TSQueryErrorSyntax;
2053      }
2054    }
2055    if (!stream_advance(stream)) {
2056      stream_reset(stream, string_start);
2057      return TSQueryErrorSyntax;
2058    }
2059  }
2060}
2061
2062// Parse a single predicate associated with a pattern, adding it to the
2063// query's internal `predicate_steps` array. Predicates are arbitrary
2064// S-expressions associated with a pattern which are meant to be handled at
2065// a higher level of abstraction, such as the Rust/JavaScript bindings. They
2066// can contain '@'-prefixed capture names, double-quoted strings, and bare
2067// symbols, which also represent strings.
2068static TSQueryError ts_query__parse_predicate(
2069  TSQuery *self,
2070  Stream *stream
2071) {
2072  if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2073  const char *predicate_name = stream->input;
2074  stream_scan_identifier(stream);
2075  uint32_t length = (uint32_t)(stream->input - predicate_name);
2076  uint16_t id = symbol_table_insert_name(
2077    &self->predicate_values,
2078    predicate_name,
2079    length
2080  );
2081  array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2082    .type = TSQueryPredicateStepTypeString,
2083    .value_id = id,
2084  }));
2085  stream_skip_whitespace(stream);
2086
2087  for (;;) {
2088    if (stream->next == ')') {
2089      stream_advance(stream);
2090      stream_skip_whitespace(stream);
2091      array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2092        .type = TSQueryPredicateStepTypeDone,
2093        .value_id = 0,
2094      }));
2095      break;
2096    }
2097
2098    // Parse an '@'-prefixed capture name
2099    else if (stream->next == '@') {
2100      stream_advance(stream);
2101
2102      // Parse the capture name
2103      if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2104      const char *capture_name = stream->input;
2105      stream_scan_identifier(stream);
2106      uint32_t capture_length = (uint32_t)(stream->input - capture_name);
2107
2108      // Add the capture id to the first step of the pattern
2109      int capture_id = symbol_table_id_for_name(
2110        &self->captures,
2111        capture_name,
2112        capture_length
2113      );
2114      if (capture_id == -1) {
2115        stream_reset(stream, capture_name);
2116        return TSQueryErrorCapture;
2117      }
2118
2119      array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2120        .type = TSQueryPredicateStepTypeCapture,
2121        .value_id = capture_id,
2122      }));
2123    }
2124
2125    // Parse a string literal
2126    else if (stream->next == '"') {
2127      TSQueryError e = ts_query__parse_string_literal(self, stream);
2128      if (e) return e;
2129      uint16_t query_id = symbol_table_insert_name(
2130        &self->predicate_values,
2131        self->string_buffer.contents,
2132        self->string_buffer.size
2133      );
2134      array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2135        .type = TSQueryPredicateStepTypeString,
2136        .value_id = query_id,
2137      }));
2138    }
2139
2140    // Parse a bare symbol
2141    else if (stream_is_ident_start(stream)) {
2142      const char *symbol_start = stream->input;
2143      stream_scan_identifier(stream);
2144      uint32_t symbol_length = (uint32_t)(stream->input - symbol_start);
2145      uint16_t query_id = symbol_table_insert_name(
2146        &self->predicate_values,
2147        symbol_start,
2148        symbol_length
2149      );
2150      array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2151        .type = TSQueryPredicateStepTypeString,
2152        .value_id = query_id,
2153      }));
2154    }
2155
2156    else {
2157      return TSQueryErrorSyntax;
2158    }
2159
2160    stream_skip_whitespace(stream);
2161  }
2162
2163  return 0;
2164}
2165
2166// Read one S-expression pattern from the stream, and incorporate it into
2167// the query's internal state machine representation. For nested patterns,
2168// this function calls itself recursively.
2169//
2170// The caller is responsible for passing in a dedicated CaptureQuantifiers.
2171// These should not be shared between different calls to ts_query__parse_pattern!
2172static TSQueryError ts_query__parse_pattern(
2173  TSQuery *self,
2174  Stream *stream,
2175  uint32_t depth,
2176  bool is_immediate,
2177  CaptureQuantifiers *capture_quantifiers
2178) {
2179  if (stream->next == 0) return TSQueryErrorSyntax;
2180  if (stream->next == ')' || stream->next == ']') return PARENT_DONE;
2181
2182  const uint32_t starting_step_index = self->steps.size;
2183
2184  // Store the byte offset of each step in the query.
2185  if (
2186    self->step_offsets.size == 0 ||
2187    array_back(&self->step_offsets)->step_index != starting_step_index
2188  ) {
2189    array_push(&self->step_offsets, ((StepOffset) {
2190      .step_index = starting_step_index,
2191      .byte_offset = stream_offset(stream),
2192    }));
2193  }
2194
2195  // An open bracket is the start of an alternation.
2196  if (stream->next == '[') {
2197    stream_advance(stream);
2198    stream_skip_whitespace(stream);
2199
2200    // Parse each branch, and add a placeholder step in between the branches.
2201    Array(uint32_t) branch_step_indices = array_new();
2202    CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new();
2203    for (;;) {
2204      uint32_t start_index = self->steps.size;
2205      TSQueryError e = ts_query__parse_pattern(
2206        self,
2207        stream,
2208        depth,
2209        is_immediate,
2210        &branch_capture_quantifiers
2211      );
2212
2213      if (e == PARENT_DONE) {
2214        if (stream->next == ']' && branch_step_indices.size > 0) {
2215          stream_advance(stream);
2216          break;
2217        }
2218        e = TSQueryErrorSyntax;
2219      }
2220      if (e) {
2221        capture_quantifiers_delete(&branch_capture_quantifiers);
2222        array_delete(&branch_step_indices);
2223        return e;
2224      }
2225
2226      if (start_index == starting_step_index) {
2227        capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers);
2228      } else {
2229        capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers);
2230      }
2231
2232      array_push(&branch_step_indices, start_index);
2233      array_push(&self->steps, query_step__new(0, depth, false));
2234      capture_quantifiers_clear(&branch_capture_quantifiers);
2235    }
2236    (void)array_pop(&self->steps);
2237
2238    // For all of the branches except for the last one, add the subsequent branch as an
2239    // alternative, and link the end of the branch to the current end of the steps.
2240    for (unsigned i = 0; i < branch_step_indices.size - 1; i++) {
2241      uint32_t step_index = branch_step_indices.contents[i];
2242      uint32_t next_step_index = branch_step_indices.contents[i + 1];
2243      QueryStep *start_step = &self->steps.contents[step_index];
2244      QueryStep *end_step = &self->steps.contents[next_step_index - 1];
2245      start_step->alternative_index = next_step_index;
2246      end_step->alternative_index = self->steps.size;
2247      end_step->is_dead_end = true;
2248    }
2249
2250    capture_quantifiers_delete(&branch_capture_quantifiers);
2251    array_delete(&branch_step_indices);
2252  }
2253
2254  // An open parenthesis can be the start of three possible constructs:
2255  // * A grouped sequence
2256  // * A predicate
2257  // * A named node
2258  else if (stream->next == '(') {
2259    stream_advance(stream);
2260    stream_skip_whitespace(stream);
2261
2262    // If this parenthesis is followed by a node, then it represents a grouped sequence.
2263    if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
2264      bool child_is_immediate = is_immediate;
2265      CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2266      for (;;) {
2267        if (stream->next == '.') {
2268          child_is_immediate = true;
2269          stream_advance(stream);
2270          stream_skip_whitespace(stream);
2271        }
2272        TSQueryError e = ts_query__parse_pattern(
2273          self,
2274          stream,
2275          depth,
2276          child_is_immediate,
2277          &child_capture_quantifiers
2278        );
2279        if (e == PARENT_DONE) {
2280          if (stream->next == ')') {
2281            stream_advance(stream);
2282            break;
2283          }
2284          e = TSQueryErrorSyntax;
2285        }
2286        if (e) {
2287          capture_quantifiers_delete(&child_capture_quantifiers);
2288          return e;
2289        }
2290
2291        capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2292        capture_quantifiers_clear(&child_capture_quantifiers);
2293        child_is_immediate = false;
2294      }
2295
2296      capture_quantifiers_delete(&child_capture_quantifiers);
2297    }
2298
2299    // A dot/pound character indicates the start of a predicate.
2300    else if (stream->next == '.' || stream->next == '#') {
2301      stream_advance(stream);
2302      return ts_query__parse_predicate(self, stream);
2303    }
2304
2305    // Otherwise, this parenthesis is the start of a named node.
2306    else {
2307      TSSymbol symbol;
2308
2309      // Parse a normal node name
2310      if (stream_is_ident_start(stream)) {
2311        const char *node_name = stream->input;
2312        stream_scan_identifier(stream);
2313        uint32_t length = (uint32_t)(stream->input - node_name);
2314
2315        // Parse the wildcard symbol
2316        if (length == 1 && node_name[0] == '_') {
2317          symbol = WILDCARD_SYMBOL;
2318        }
2319
2320        else {
2321          symbol = ts_language_symbol_for_name(
2322            self->language,
2323            node_name,
2324            length,
2325            true
2326          );
2327          if (!symbol) {
2328            stream_reset(stream, node_name);
2329            return TSQueryErrorNodeType;
2330          }
2331        }
2332      } else {
2333        return TSQueryErrorSyntax;
2334      }
2335
2336      // Add a step for the node.
2337      array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2338      QueryStep *step = array_back(&self->steps);
2339      if (ts_language_symbol_metadata(self->language, symbol).supertype) {
2340        step->supertype_symbol = step->symbol;
2341        step->symbol = WILDCARD_SYMBOL;
2342      }
2343      if (symbol == WILDCARD_SYMBOL) {
2344        step->is_named = true;
2345      }
2346
2347      stream_skip_whitespace(stream);
2348
2349      if (stream->next == '/') {
2350        stream_advance(stream);
2351        if (!stream_is_ident_start(stream)) {
2352          return TSQueryErrorSyntax;
2353        }
2354
2355        const char *node_name = stream->input;
2356        stream_scan_identifier(stream);
2357        uint32_t length = (uint32_t)(stream->input - node_name);
2358
2359        step->symbol = ts_language_symbol_for_name(
2360          self->language,
2361          node_name,
2362          length,
2363          true
2364        );
2365        if (!step->symbol) {
2366          stream_reset(stream, node_name);
2367          return TSQueryErrorNodeType;
2368        }
2369
2370        stream_skip_whitespace(stream);
2371      }
2372
2373      // Parse the child patterns
2374      bool child_is_immediate = false;
2375      uint16_t last_child_step_index = 0;
2376      uint16_t negated_field_count = 0;
2377      TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT];
2378      CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2379      for (;;) {
2380        // Parse a negated field assertion
2381        if (stream->next == '!') {
2382          stream_advance(stream);
2383          stream_skip_whitespace(stream);
2384          if (!stream_is_ident_start(stream)) {
2385            capture_quantifiers_delete(&child_capture_quantifiers);
2386            return TSQueryErrorSyntax;
2387          }
2388          const char *field_name = stream->input;
2389          stream_scan_identifier(stream);
2390          uint32_t length = (uint32_t)(stream->input - field_name);
2391          stream_skip_whitespace(stream);
2392
2393          TSFieldId field_id = ts_language_field_id_for_name(
2394            self->language,
2395            field_name,
2396            length
2397          );
2398          if (!field_id) {
2399            stream->input = field_name;
2400            capture_quantifiers_delete(&child_capture_quantifiers);
2401            return TSQueryErrorField;
2402          }
2403
2404          // Keep the field ids sorted.
2405          if (negated_field_count < MAX_NEGATED_FIELD_COUNT) {
2406            negated_field_ids[negated_field_count] = field_id;
2407            negated_field_count++;
2408          }
2409
2410          continue;
2411        }
2412
2413        // Parse a sibling anchor
2414        if (stream->next == '.') {
2415          child_is_immediate = true;
2416          stream_advance(stream);
2417          stream_skip_whitespace(stream);
2418        }
2419
2420        uint16_t step_index = self->steps.size;
2421        TSQueryError e = ts_query__parse_pattern(
2422          self,
2423          stream,
2424          depth + 1,
2425          child_is_immediate,
2426          &child_capture_quantifiers
2427        );
2428        if (e == PARENT_DONE) {
2429          if (stream->next == ')') {
2430            if (child_is_immediate) {
2431              if (last_child_step_index == 0) {
2432                capture_quantifiers_delete(&child_capture_quantifiers);
2433                return TSQueryErrorSyntax;
2434              }
2435              self->steps.contents[last_child_step_index].is_last_child = true;
2436            }
2437
2438            if (negated_field_count) {
2439              ts_query__add_negated_fields(
2440                self,
2441                starting_step_index,
2442                negated_field_ids,
2443                negated_field_count
2444              );
2445            }
2446
2447            stream_advance(stream);
2448            break;
2449          }
2450          e = TSQueryErrorSyntax;
2451        }
2452        if (e) {
2453          capture_quantifiers_delete(&child_capture_quantifiers);
2454          return e;
2455        }
2456
2457        capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2458
2459        last_child_step_index = step_index;
2460        child_is_immediate = false;
2461        capture_quantifiers_clear(&child_capture_quantifiers);
2462      }
2463      capture_quantifiers_delete(&child_capture_quantifiers);
2464    }
2465  }
2466
2467  // Parse a wildcard pattern
2468  else if (stream->next == '_') {
2469    stream_advance(stream);
2470    stream_skip_whitespace(stream);
2471
2472    // Add a step that matches any kind of node
2473    array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate));
2474  }
2475
2476  // Parse a double-quoted anonymous leaf node expression
2477  else if (stream->next == '"') {
2478    const char *string_start = stream->input;
2479    TSQueryError e = ts_query__parse_string_literal(self, stream);
2480    if (e) return e;
2481
2482    // Add a step for the node
2483    TSSymbol symbol = ts_language_symbol_for_name(
2484      self->language,
2485      self->string_buffer.contents,
2486      self->string_buffer.size,
2487      false
2488    );
2489    if (!symbol) {
2490      stream_reset(stream, string_start + 1);
2491      return TSQueryErrorNodeType;
2492    }
2493    array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2494  }
2495
2496  // Parse a field-prefixed pattern
2497  else if (stream_is_ident_start(stream)) {
2498    // Parse the field name
2499    const char *field_name = stream->input;
2500    stream_scan_identifier(stream);
2501    uint32_t length = (uint32_t)(stream->input - field_name);
2502    stream_skip_whitespace(stream);
2503
2504    if (stream->next != ':') {
2505      stream_reset(stream, field_name);
2506      return TSQueryErrorSyntax;
2507    }
2508    stream_advance(stream);
2509    stream_skip_whitespace(stream);
2510
2511    // Parse the pattern
2512    CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new();
2513    TSQueryError e = ts_query__parse_pattern(
2514      self,
2515      stream,
2516      depth,
2517      is_immediate,
2518      &field_capture_quantifiers
2519    );
2520    if (e) {
2521      capture_quantifiers_delete(&field_capture_quantifiers);
2522      if (e == PARENT_DONE) e = TSQueryErrorSyntax;
2523      return e;
2524    }
2525
2526    // Add the field name to the first step of the pattern
2527    TSFieldId field_id = ts_language_field_id_for_name(
2528      self->language,
2529      field_name,
2530      length
2531    );
2532    if (!field_id) {
2533      stream->input = field_name;
2534      return TSQueryErrorField;
2535    }
2536
2537    uint32_t step_index = starting_step_index;
2538    QueryStep *step = &self->steps.contents[step_index];
2539    for (;;) {
2540      step->field = field_id;
2541      if (
2542        step->alternative_index != NONE &&
2543        step->alternative_index > step_index &&
2544        step->alternative_index < self->steps.size
2545      ) {
2546        step_index = step->alternative_index;
2547        step = &self->steps.contents[step_index];
2548      } else {
2549        break;
2550      }
2551    }
2552
2553    capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers);
2554    capture_quantifiers_delete(&field_capture_quantifiers);
2555  }
2556
2557  else {
2558    return TSQueryErrorSyntax;
2559  }
2560
2561  stream_skip_whitespace(stream);
2562
2563  // Parse suffixes modifiers for this pattern
2564  TSQuantifier quantifier = TSQuantifierOne;
2565  for (;;) {
2566    // Parse the one-or-more operator.
2567    if (stream->next == '+') {
2568      quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier);
2569
2570      stream_advance(stream);
2571      stream_skip_whitespace(stream);
2572
2573      QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2574      repeat_step.alternative_index = starting_step_index;
2575      repeat_step.is_pass_through = true;
2576      repeat_step.alternative_is_immediate = true;
2577      array_push(&self->steps, repeat_step);
2578    }
2579
2580    // Parse the zero-or-more repetition operator.
2581    else if (stream->next == '*') {
2582      quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier);
2583
2584      stream_advance(stream);
2585      stream_skip_whitespace(stream);
2586
2587      QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2588      repeat_step.alternative_index = starting_step_index;
2589      repeat_step.is_pass_through = true;
2590      repeat_step.alternative_is_immediate = true;
2591      array_push(&self->steps, repeat_step);
2592
2593      // Stop when `step->alternative_index` is `NONE` or it points to
2594      // `repeat_step` or beyond. Note that having just been pushed,
2595      // `repeat_step` occupies slot `self->steps.size - 1`.
2596      QueryStep *step = &self->steps.contents[starting_step_index];
2597      while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) {
2598        step = &self->steps.contents[step->alternative_index];
2599      }
2600      step->alternative_index = self->steps.size;
2601    }
2602
2603    // Parse the optional operator.
2604    else if (stream->next == '?') {
2605      quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier);
2606
2607      stream_advance(stream);
2608      stream_skip_whitespace(stream);
2609
2610      QueryStep *step = &self->steps.contents[starting_step_index];
2611      while (step->alternative_index != NONE && step->alternative_index < self->steps.size) {
2612        step = &self->steps.contents[step->alternative_index];
2613      }
2614      step->alternative_index = self->steps.size;
2615    }
2616
2617    // Parse an '@'-prefixed capture pattern
2618    else if (stream->next == '@') {
2619      stream_advance(stream);
2620      if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2621      const char *capture_name = stream->input;
2622      stream_scan_identifier(stream);
2623      uint32_t length = (uint32_t)(stream->input - capture_name);
2624      stream_skip_whitespace(stream);
2625
2626      // Add the capture id to the first step of the pattern
2627      uint16_t capture_id = symbol_table_insert_name(
2628        &self->captures,
2629        capture_name,
2630        length
2631      );
2632
2633      // Add the capture quantifier
2634      capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne);
2635
2636      uint32_t step_index = starting_step_index;
2637      for (;;) {
2638        QueryStep *step = &self->steps.contents[step_index];
2639        query_step__add_capture(step, capture_id);
2640        if (
2641          step->alternative_index != NONE &&
2642          step->alternative_index > step_index &&
2643          step->alternative_index < self->steps.size
2644        ) {
2645          step_index = step->alternative_index;
2646        } else {
2647          break;
2648        }
2649      }
2650    }
2651
2652    // No more suffix modifiers
2653    else {
2654      break;
2655    }
2656  }
2657
2658  capture_quantifiers_mul(capture_quantifiers, quantifier);
2659
2660  return 0;
2661}
2662
2663TSQuery *ts_query_new(
2664  const TSLanguage *language,
2665  const char *source,
2666  uint32_t source_len,
2667  uint32_t *error_offset,
2668  TSQueryError *error_type
2669) {
2670  if (
2671    !language ||
2672    language->version > TREE_SITTER_LANGUAGE_VERSION ||
2673    language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
2674  ) {
2675    *error_type = TSQueryErrorLanguage;
2676    return NULL;
2677  }
2678
2679  TSQuery *self = ts_malloc(sizeof(TSQuery));
2680  *self = (TSQuery) {
2681    .steps = array_new(),
2682    .pattern_map = array_new(),
2683    .captures = symbol_table_new(),
2684    .capture_quantifiers = array_new(),
2685    .predicate_values = symbol_table_new(),
2686    .predicate_steps = array_new(),
2687    .patterns = array_new(),
2688    .step_offsets = array_new(),
2689    .string_buffer = array_new(),
2690    .negated_fields = array_new(),
2691    .repeat_symbols_with_rootless_patterns = array_new(),
2692    .wildcard_root_pattern_count = 0,
2693    .language = ts_language_copy(language),
2694  };
2695
2696  array_push(&self->negated_fields, 0);
2697
2698  // Parse all of the S-expressions in the given string.
2699  Stream stream = stream_new(source, source_len);
2700  stream_skip_whitespace(&stream);
2701  while (stream.input < stream.end) {
2702    uint32_t pattern_index = self->patterns.size;
2703    uint32_t start_step_index = self->steps.size;
2704    uint32_t start_predicate_step_index = self->predicate_steps.size;
2705    array_push(&self->patterns, ((QueryPattern) {
2706      .steps = (Slice) {.offset = start_step_index},
2707      .predicate_steps = (Slice) {.offset = start_predicate_step_index},
2708      .start_byte = stream_offset(&stream),
2709      .is_non_local = false,
2710    }));
2711    CaptureQuantifiers capture_quantifiers = capture_quantifiers_new();
2712    *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers);
2713    array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
2714
2715    QueryPattern *pattern = array_back(&self->patterns);
2716    pattern->steps.length = self->steps.size - start_step_index;
2717    pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index;
2718
2719    // If any pattern could not be parsed, then report the error information
2720    // and terminate.
2721    if (*error_type) {
2722      if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax;
2723      *error_offset = stream_offset(&stream);
2724      capture_quantifiers_delete(&capture_quantifiers);
2725      ts_query_delete(self);
2726      return NULL;
2727    }
2728
2729    // Maintain a list of capture quantifiers for each pattern
2730    array_push(&self->capture_quantifiers, capture_quantifiers);
2731
2732    // Maintain a map that can look up patterns for a given root symbol.
2733    uint16_t wildcard_root_alternative_index = NONE;
2734    for (;;) {
2735      QueryStep *step = &self->steps.contents[start_step_index];
2736
2737      // If a pattern has a wildcard at its root, but it has a non-wildcard child,
2738      // then optimize the matching process by skipping matching the wildcard.
2739      // Later, during the matching process, the query cursor will check that
2740      // there is a parent node, and capture it if necessary.
2741      if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) {
2742        QueryStep *second_step = &self->steps.contents[start_step_index + 1];
2743        if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) {
2744          wildcard_root_alternative_index = step->alternative_index;
2745          start_step_index += 1;
2746          step = second_step;
2747        }
2748      }
2749
2750      // Determine whether the pattern has a single root node. This affects
2751      // decisions about whether or not to start matching the pattern when
2752      // a query cursor has a range restriction or when immediately within an
2753      // error node.
2754      uint32_t start_depth = step->depth;
2755      bool is_rooted = start_depth == 0;
2756      for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) {
2757        QueryStep *child_step = &self->steps.contents[step_index];
2758        if (child_step->is_dead_end) break;
2759        if (child_step->depth == start_depth) {
2760          is_rooted = false;
2761          break;
2762        }
2763      }
2764
2765      ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) {
2766        .step_index = start_step_index,
2767        .pattern_index = pattern_index,
2768        .is_rooted = is_rooted
2769      });
2770      if (step->symbol == WILDCARD_SYMBOL) {
2771        self->wildcard_root_pattern_count++;
2772      }
2773
2774      // If there are alternatives or options at the root of the pattern,
2775      // then add multiple entries to the pattern map.
2776      if (step->alternative_index != NONE) {
2777        start_step_index = step->alternative_index;
2778      } else if (wildcard_root_alternative_index != NONE) {
2779        start_step_index = wildcard_root_alternative_index;
2780        wildcard_root_alternative_index = NONE;
2781      } else {
2782        break;
2783      }
2784    }
2785  }
2786
2787  if (!ts_query__analyze_patterns(self, error_offset)) {
2788    *error_type = TSQueryErrorStructure;
2789    ts_query_delete(self);
2790    return NULL;
2791  }
2792
2793  array_delete(&self->string_buffer);
2794  return self;
2795}
2796
2797void ts_query_delete(TSQuery *self) {
2798  if (self) {
2799    array_delete(&self->steps);
2800    array_delete(&self->pattern_map);
2801    array_delete(&self->predicate_steps);
2802    array_delete(&self->patterns);
2803    array_delete(&self->step_offsets);
2804    array_delete(&self->string_buffer);
2805    array_delete(&self->negated_fields);
2806    array_delete(&self->repeat_symbols_with_rootless_patterns);
2807    ts_language_delete(self->language);
2808    symbol_table_delete(&self->captures);
2809    symbol_table_delete(&self->predicate_values);
2810    for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) {
2811      CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index);
2812      capture_quantifiers_delete(capture_quantifiers);
2813    }
2814    array_delete(&self->capture_quantifiers);
2815    ts_free(self);
2816  }
2817}
2818
2819uint32_t ts_query_pattern_count(const TSQuery *self) {
2820  return self->patterns.size;
2821}
2822
2823uint32_t ts_query_capture_count(const TSQuery *self) {
2824  return self->captures.slices.size;
2825}
2826
2827uint32_t ts_query_string_count(const TSQuery *self) {
2828  return self->predicate_values.slices.size;
2829}
2830
2831const char *ts_query_capture_name_for_id(
2832  const TSQuery *self,
2833  uint32_t index,
2834  uint32_t *length
2835) {
2836  return symbol_table_name_for_id(&self->captures, index, length);
2837}
2838
2839TSQuantifier ts_query_capture_quantifier_for_id(
2840  const TSQuery *self,
2841  uint32_t pattern_index,
2842  uint32_t capture_index
2843) {
2844  CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index);
2845  return capture_quantifier_for_id(capture_quantifiers, capture_index);
2846}
2847
2848const char *ts_query_string_value_for_id(
2849  const TSQuery *self,
2850  uint32_t index,
2851  uint32_t *length
2852) {
2853  return symbol_table_name_for_id(&self->predicate_values, index, length);
2854}
2855
2856const TSQueryPredicateStep *ts_query_predicates_for_pattern(
2857  const TSQuery *self,
2858  uint32_t pattern_index,
2859  uint32_t *step_count
2860) {
2861  Slice slice = self->patterns.contents[pattern_index].predicate_steps;
2862  *step_count = slice.length;
2863  if (self->predicate_steps.contents == NULL) {
2864    return NULL;
2865  }
2866  return &self->predicate_steps.contents[slice.offset];
2867}
2868
2869uint32_t ts_query_start_byte_for_pattern(
2870  const TSQuery *self,
2871  uint32_t pattern_index
2872) {
2873  return self->patterns.contents[pattern_index].start_byte;
2874}
2875
2876bool ts_query_is_pattern_rooted(
2877  const TSQuery *self,
2878  uint32_t pattern_index
2879) {
2880  for (unsigned i = 0; i < self->pattern_map.size; i++) {
2881    PatternEntry *entry = &self->pattern_map.contents[i];
2882    if (entry->pattern_index == pattern_index) {
2883      if (!entry->is_rooted) return false;
2884    }
2885  }
2886  return true;
2887}
2888
2889bool ts_query_is_pattern_non_local(
2890  const TSQuery *self,
2891  uint32_t pattern_index
2892) {
2893  if (pattern_index < self->patterns.size) {
2894    return self->patterns.contents[pattern_index].is_non_local;
2895  } else {
2896    return false;
2897  }
2898}
2899
2900bool ts_query_is_pattern_guaranteed_at_step(
2901  const TSQuery *self,
2902  uint32_t byte_offset
2903) {
2904  uint32_t step_index = UINT32_MAX;
2905  for (unsigned i = 0; i < self->step_offsets.size; i++) {
2906    StepOffset *step_offset = &self->step_offsets.contents[i];
2907    if (step_offset->byte_offset > byte_offset) break;
2908    step_index = step_offset->step_index;
2909  }
2910  if (step_index < self->steps.size) {
2911    return self->steps.contents[step_index].root_pattern_guaranteed;
2912  } else {
2913    return false;
2914  }
2915}
2916
2917bool ts_query__step_is_fallible(
2918  const TSQuery *self,
2919  uint16_t step_index
2920) {
2921  assert((uint32_t)step_index + 1 < self->steps.size);
2922  QueryStep *step = &self->steps.contents[step_index];
2923  QueryStep *next_step = &self->steps.contents[step_index + 1];
2924  return (
2925    next_step->depth != PATTERN_DONE_MARKER &&
2926    next_step->depth > step->depth &&
2927    !next_step->parent_pattern_guaranteed
2928  );
2929}
2930
2931void ts_query_disable_capture(
2932  TSQuery *self,
2933  const char *name,
2934  uint32_t length
2935) {
2936  // Remove capture information for any pattern step that previously
2937  // captured with the given name.
2938  int id = symbol_table_id_for_name(&self->captures, name, length);
2939  if (id != -1) {
2940    for (unsigned i = 0; i < self->steps.size; i++) {
2941      QueryStep *step = &self->steps.contents[i];
2942      query_step__remove_capture(step, id);
2943    }
2944  }
2945}
2946
2947void ts_query_disable_pattern(
2948  TSQuery *self,
2949  uint32_t pattern_index
2950) {
2951  // Remove the given pattern from the pattern map. Its steps will still
2952  // be in the `steps` array, but they will never be read.
2953  for (unsigned i = 0; i < self->pattern_map.size; i++) {
2954    PatternEntry *pattern = &self->pattern_map.contents[i];
2955    if (pattern->pattern_index == pattern_index) {
2956      array_erase(&self->pattern_map, i);
2957      i--;
2958    }
2959  }
2960}
2961
2962/***************
2963 * QueryCursor
2964 ***************/
2965
2966TSQueryCursor *ts_query_cursor_new(void) {
2967  TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
2968  *self = (TSQueryCursor) {
2969    .did_exceed_match_limit = false,
2970    .ascending = false,
2971    .halted = false,
2972    .states = array_new(),
2973    .finished_states = array_new(),
2974    .capture_list_pool = capture_list_pool_new(),
2975    .start_byte = 0,
2976    .end_byte = UINT32_MAX,
2977    .start_point = {0, 0},
2978    .end_point = POINT_MAX,
2979    .max_start_depth = UINT32_MAX,
2980  };
2981  array_reserve(&self->states, 8);
2982  array_reserve(&self->finished_states, 8);
2983  return self;
2984}
2985
2986void ts_query_cursor_delete(TSQueryCursor *self) {
2987  array_delete(&self->states);
2988  array_delete(&self->finished_states);
2989  ts_tree_cursor_delete(&self->cursor);
2990  capture_list_pool_delete(&self->capture_list_pool);
2991  ts_free(self);
2992}
2993
2994bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
2995  return self->did_exceed_match_limit;
2996}
2997
2998uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
2999  return self->capture_list_pool.max_capture_list_count;
3000}
3001
3002void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
3003  self->capture_list_pool.max_capture_list_count = limit;
3004}
3005
3006#ifdef DEBUG_EXECUTE_QUERY
3007#define LOG(...) fprintf(stderr, __VA_ARGS__)
3008#else
3009#define LOG(...)
3010#endif
3011
3012void ts_query_cursor_exec(
3013  TSQueryCursor *self,
3014  const TSQuery *query,
3015  TSNode node
3016) {
3017  if  (query) {
3018    LOG("query steps:\n");
3019    for (unsigned i = 0; i < query->steps.size; i++) {
3020      QueryStep *step = &query->steps.contents[i];
3021      LOG("  %u: {", i);
3022      if (step->depth == PATTERN_DONE_MARKER) {
3023        LOG("DONE");
3024      } else if (step->is_dead_end) {
3025        LOG("dead_end");
3026      } else if (step->is_pass_through) {
3027        LOG("pass_through");
3028      } else if (step->symbol != WILDCARD_SYMBOL) {
3029        LOG("symbol: %s", query->language->symbol_names[step->symbol]);
3030      } else {
3031        LOG("symbol: *");
3032      }
3033      if (step->field) {
3034        LOG(", field: %s", query->language->field_names[step->field]);
3035      }
3036      if (step->alternative_index != NONE) {
3037        LOG(", alternative: %u", step->alternative_index);
3038      }
3039      LOG("},\n");
3040    }
3041  }
3042
3043  array_clear(&self->states);
3044  array_clear(&self->finished_states);
3045  ts_tree_cursor_reset(&self->cursor, node);
3046  capture_list_pool_reset(&self->capture_list_pool);
3047  self->on_visible_node = true;
3048  self->next_state_id = 0;
3049  self->depth = 0;
3050  self->ascending = false;
3051  self->halted = false;
3052  self->query = query;
3053  self->did_exceed_match_limit = false;
3054}
3055
3056void ts_query_cursor_set_byte_range(
3057  TSQueryCursor *self,
3058  uint32_t start_byte,
3059  uint32_t end_byte
3060) {
3061  if (end_byte == 0) {
3062    end_byte = UINT32_MAX;
3063  }
3064  self->start_byte = start_byte;
3065  self->end_byte = end_byte;
3066}
3067
3068void ts_query_cursor_set_point_range(
3069  TSQueryCursor *self,
3070  TSPoint start_point,
3071  TSPoint end_point
3072) {
3073  if (end_point.row == 0 && end_point.column == 0) {
3074    end_point = POINT_MAX;
3075  }
3076  self->start_point = start_point;
3077  self->end_point = end_point;
3078}
3079
3080// Search through all of the in-progress states, and find the captured
3081// node that occurs earliest in the document.
3082static bool ts_query_cursor__first_in_progress_capture(
3083  TSQueryCursor *self,
3084  uint32_t *state_index,
3085  uint32_t *byte_offset,
3086  uint32_t *pattern_index,
3087  bool *root_pattern_guaranteed
3088) {
3089  bool result = false;
3090  *state_index = UINT32_MAX;
3091  *byte_offset = UINT32_MAX;
3092  *pattern_index = UINT32_MAX;
3093  for (unsigned i = 0; i < self->states.size; i++) {
3094    QueryState *state = &self->states.contents[i];
3095    if (state->dead) continue;
3096
3097    const CaptureList *captures = capture_list_pool_get(
3098      &self->capture_list_pool,
3099      state->capture_list_id
3100    );
3101    if (state->consumed_capture_count >= captures->size) {
3102      continue;
3103    }
3104
3105    TSNode node = captures->contents[state->consumed_capture_count].node;
3106    if (
3107      ts_node_end_byte(node) <= self->start_byte ||
3108      point_lte(ts_node_end_point(node), self->start_point)
3109    ) {
3110      state->consumed_capture_count++;
3111      i--;
3112      continue;
3113    }
3114
3115    uint32_t node_start_byte = ts_node_start_byte(node);
3116    if (
3117      !result ||
3118      node_start_byte < *byte_offset ||
3119      (node_start_byte == *byte_offset && state->pattern_index < *pattern_index)
3120    ) {
3121      QueryStep *step = &self->query->steps.contents[state->step_index];
3122      if (root_pattern_guaranteed) {
3123        *root_pattern_guaranteed = step->root_pattern_guaranteed;
3124      } else if (step->root_pattern_guaranteed) {
3125        continue;
3126      }
3127
3128      result = true;
3129      *state_index = i;
3130      *byte_offset = node_start_byte;
3131      *pattern_index = state->pattern_index;
3132    }
3133  }
3134  return result;
3135}
3136
3137// Determine which node is first in a depth-first traversal
3138int ts_query_cursor__compare_nodes(TSNode left, TSNode right) {
3139  if (left.id != right.id) {
3140    uint32_t left_start = ts_node_start_byte(left);
3141    uint32_t right_start = ts_node_start_byte(right);
3142    if (left_start < right_start) return -1;
3143    if (left_start > right_start) return 1;
3144    uint32_t left_node_count = ts_node_end_byte(left);
3145    uint32_t right_node_count = ts_node_end_byte(right);
3146    if (left_node_count > right_node_count) return -1;
3147    if (left_node_count < right_node_count) return 1;
3148  }
3149  return 0;
3150}
3151
3152// Determine if either state contains a superset of the other state's captures.
3153void ts_query_cursor__compare_captures(
3154  TSQueryCursor *self,
3155  QueryState *left_state,
3156  QueryState *right_state,
3157  bool *left_contains_right,
3158  bool *right_contains_left
3159) {
3160  const CaptureList *left_captures = capture_list_pool_get(
3161    &self->capture_list_pool,
3162    left_state->capture_list_id
3163  );
3164  const CaptureList *right_captures = capture_list_pool_get(
3165    &self->capture_list_pool,
3166    right_state->capture_list_id
3167  );
3168  *left_contains_right = true;
3169  *right_contains_left = true;
3170  unsigned i = 0, j = 0;
3171  for (;;) {
3172    if (i < left_captures->size) {
3173      if (j < right_captures->size) {
3174        TSQueryCapture *left = &left_captures->contents[i];
3175        TSQueryCapture *right = &right_captures->contents[j];
3176        if (left->node.id == right->node.id && left->index == right->index) {
3177          i++;
3178          j++;
3179        } else {
3180          switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
3181            case -1:
3182              *right_contains_left = false;
3183              i++;
3184              break;
3185            case 1:
3186              *left_contains_right = false;
3187              j++;
3188              break;
3189            default:
3190              *right_contains_left = false;
3191              *left_contains_right = false;
3192              i++;
3193              j++;
3194              break;
3195          }
3196        }
3197      } else {
3198        *right_contains_left = false;
3199        break;
3200      }
3201    } else {
3202      if (j < right_captures->size) {
3203        *left_contains_right = false;
3204      }
3205      break;
3206    }
3207  }
3208}
3209
3210static void ts_query_cursor__add_state(
3211  TSQueryCursor *self,
3212  const PatternEntry *pattern
3213) {
3214  QueryStep *step = &self->query->steps.contents[pattern->step_index];
3215  uint32_t start_depth = self->depth - step->depth;
3216
3217  // Keep the states array in ascending order of start_depth and pattern_index,
3218  // so that it can be processed more efficiently elsewhere. Usually, there is
3219  // no work to do here because of two facts:
3220  // * States with lower start_depth are naturally added first due to the
3221  //   order in which nodes are visited.
3222  // * Earlier patterns are naturally added first because of the ordering of the
3223  //   pattern_map data structure that's used to initiate matches.
3224  //
3225  // This loop is only needed in cases where two conditions hold:
3226  // * A pattern consists of more than one sibling node, so that its states
3227  //   remain in progress after exiting the node that started the match.
3228  // * The first node in the pattern matches against multiple nodes at the
3229  //   same depth.
3230  //
3231  // An example of this is the pattern '((comment)* (function))'. If multiple
3232  // `comment` nodes appear in a row, then we may initiate a new state for this
3233  // pattern while another state for the same pattern is already in progress.
3234  // If there are multiple patterns like this in a query, then this loop will
3235  // need to execute in order to keep the states ordered by pattern_index.
3236  uint32_t index = self->states.size;
3237  while (index > 0) {
3238    QueryState *prev_state = &self->states.contents[index - 1];
3239    if (prev_state->start_depth < start_depth) break;
3240    if (prev_state->start_depth == start_depth) {
3241      // Avoid inserting an unnecessary duplicate state, which would be
3242      // immediately pruned by the longest-match criteria.
3243      if (
3244        prev_state->pattern_index == pattern->pattern_index &&
3245        prev_state->step_index == pattern->step_index
3246      ) return;
3247      if (prev_state->pattern_index <= pattern->pattern_index) break;
3248    }
3249    index--;
3250  }
3251
3252  LOG(
3253    "  start state. pattern:%u, step:%u\n",
3254    pattern->pattern_index,
3255    pattern->step_index
3256  );
3257  array_insert(&self->states, index, ((QueryState) {
3258    .id = UINT32_MAX,
3259    .capture_list_id = NONE,
3260    .step_index = pattern->step_index,
3261    .pattern_index = pattern->pattern_index,
3262    .start_depth = start_depth,
3263    .consumed_capture_count = 0,
3264    .seeking_immediate_match = true,
3265    .has_in_progress_alternatives = false,
3266    .needs_parent = step->depth == 1,
3267    .dead = false,
3268  }));
3269}
3270
3271// Acquire a capture list for this state. If there are no capture lists left in the
3272// pool, this will steal the capture list from another existing state, and mark that
3273// other state as 'dead'.
3274static CaptureList *ts_query_cursor__prepare_to_capture(
3275  TSQueryCursor *self,
3276  QueryState *state,
3277  unsigned state_index_to_preserve
3278) {
3279  if (state->capture_list_id == NONE) {
3280    state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
3281
3282    // If there are no capture lists left in the pool, then terminate whichever
3283    // state has captured the earliest node in the document, and steal its
3284    // capture list.
3285    if (state->capture_list_id == NONE) {
3286      self->did_exceed_match_limit = true;
3287      uint32_t state_index, byte_offset, pattern_index;
3288      if (
3289        ts_query_cursor__first_in_progress_capture(
3290          self,
3291          &state_index,
3292          &byte_offset,
3293          &pattern_index,
3294          NULL
3295        ) &&
3296        state_index != state_index_to_preserve
3297      ) {
3298        LOG(
3299          "  abandon state. index:%u, pattern:%u, offset:%u.\n",
3300          state_index, pattern_index, byte_offset
3301        );
3302        QueryState *other_state = &self->states.contents[state_index];
3303        state->capture_list_id = other_state->capture_list_id;
3304        other_state->capture_list_id = NONE;
3305        other_state->dead = true;
3306        CaptureList *list = capture_list_pool_get_mut(
3307          &self->capture_list_pool,
3308          state->capture_list_id
3309        );
3310        array_clear(list);
3311        return list;
3312      } else {
3313        LOG("  ran out of capture lists");
3314        return NULL;
3315      }
3316    }
3317  }
3318  return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
3319}
3320
3321static void ts_query_cursor__capture(
3322  TSQueryCursor *self,
3323  QueryState *state,
3324  QueryStep *step,
3325  TSNode node
3326) {
3327  if (state->dead) return;
3328  CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
3329  if (!capture_list) {
3330    state->dead = true;
3331    return;
3332  }
3333
3334  for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
3335    uint16_t capture_id = step->capture_ids[j];
3336    if (step->capture_ids[j] == NONE) break;
3337    array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
3338    LOG(
3339      "  capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n",
3340      ts_node_type(node),
3341      state->pattern_index,
3342      capture_id,
3343      capture_list->size
3344    );
3345  }
3346}
3347
3348// Duplicate the given state and insert the newly-created state immediately after
3349// the given state in the `states` array. Ensures that the given state reference is
3350// still valid, even if the states array is reallocated.
3351static QueryState *ts_query_cursor__copy_state(
3352  TSQueryCursor *self,
3353  QueryState **state_ref
3354) {
3355  const QueryState *state = *state_ref;
3356  uint32_t state_index = (uint32_t)(state - self->states.contents);
3357  QueryState copy = *state;
3358  copy.capture_list_id = NONE;
3359
3360  // If the state has captures, copy its capture list.
3361  if (state->capture_list_id != NONE) {
3362    CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, &copy, state_index);
3363    if (!new_captures) return NULL;
3364    const CaptureList *old_captures = capture_list_pool_get(
3365      &self->capture_list_pool,
3366      state->capture_list_id
3367    );
3368    array_push_all(new_captures, old_captures);
3369  }
3370
3371  array_insert(&self->states, state_index + 1, copy);
3372  *state_ref = &self->states.contents[state_index];
3373  return &self->states.contents[state_index + 1];
3374}
3375
3376static inline bool ts_query_cursor__should_descend(
3377  TSQueryCursor *self,
3378  bool node_intersects_range
3379) {
3380
3381  if (node_intersects_range && self->depth < self->max_start_depth) {
3382    return true;
3383  }
3384
3385  // If there are in-progress matches whose remaining steps occur
3386  // deeper in the tree, then descend.
3387  for (unsigned i = 0; i < self->states.size; i++) {
3388    QueryState *state = &self->states.contents[i];;
3389    QueryStep *next_step = &self->query->steps.contents[state->step_index];
3390    if (
3391      next_step->depth != PATTERN_DONE_MARKER &&
3392      state->start_depth + next_step->depth > self->depth
3393    ) {
3394      return true;
3395    }
3396  }
3397
3398  if (self->depth >= self->max_start_depth) {
3399    return false;
3400  }
3401
3402  // If the current node is hidden, then a non-rooted pattern might match
3403  // one if its roots inside of this node, and match another of its roots
3404  // as part of a sibling node, so we may need to descend.
3405  if (!self->on_visible_node) {
3406    // Descending into a repetition node outside of the range can be
3407    // expensive, because these nodes can have many visible children.
3408    // Avoid descending into repetition nodes unless we have already
3409    // determined that this query can match rootless patterns inside
3410    // of this type of repetition node.
3411    Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor);
3412    if (ts_subtree_is_repetition(subtree)) {
3413      bool exists;
3414      uint32_t index;
3415      array_search_sorted_by(
3416        &self->query->repeat_symbols_with_rootless_patterns,,
3417        ts_subtree_symbol(subtree),
3418        &index,
3419        &exists
3420      );
3421      return exists;
3422    }
3423
3424    return true;
3425  }
3426
3427  return false;
3428}
3429
3430// Walk the tree, processing patterns until at least one pattern finishes,
3431// If one or more patterns finish, return `true` and store their states in the
3432// `finished_states` array. Multiple patterns can finish on the same node. If
3433// there are no more matches, return `false`.
3434static inline bool ts_query_cursor__advance(
3435  TSQueryCursor *self,
3436  bool stop_on_definite_step
3437) {
3438  bool did_match = false;
3439  for (;;) {
3440    if (self->halted) {
3441      while (self->states.size > 0) {
3442        QueryState state = array_pop(&self->states);
3443        capture_list_pool_release(
3444          &self->capture_list_pool,
3445          state.capture_list_id
3446        );
3447      }
3448    }
3449
3450    if (did_match || self->halted) return did_match;
3451
3452    // Exit the current node.
3453    if (self->ascending) {
3454      if (self->on_visible_node) {
3455        LOG(
3456          "leave node. depth:%u, type:%s\n",
3457          self->depth,
3458          ts_node_type(ts_tree_cursor_current_node(&self->cursor))
3459        );
3460
3461        // After leaving a node, remove any states that cannot make further progress.
3462        uint32_t deleted_count = 0;
3463        for (unsigned i = 0, n = self->states.size; i < n; i++) {
3464          QueryState *state = &self->states.contents[i];
3465          QueryStep *step = &self->query->steps.contents[state->step_index];
3466
3467          // If a state completed its pattern inside of this node, but was deferred from finishing
3468          // in order to search for longer matches, mark it as finished.
3469          if (
3470            step->depth == PATTERN_DONE_MARKER &&
3471            (state->start_depth > self->depth || self->depth == 0)
3472          ) {
3473            LOG("  finish pattern %u\n", state->pattern_index);
3474            array_push(&self->finished_states, *state);
3475            did_match = true;
3476            deleted_count++;
3477          }
3478
3479          // If a state needed to match something within this node, then remove that state
3480          // as it has failed to match.
3481          else if (
3482            step->depth != PATTERN_DONE_MARKER &&
3483            (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth
3484          ) {
3485            LOG(
3486              "  failed to match. pattern:%u, step:%u\n",
3487              state->pattern_index,
3488              state->step_index
3489            );
3490            capture_list_pool_release(
3491              &self->capture_list_pool,
3492              state->capture_list_id
3493            );
3494            deleted_count++;
3495          }
3496
3497          else if (deleted_count > 0) {
3498            self->states.contents[i - deleted_count] = *state;
3499          }
3500        }
3501        self->states.size -= deleted_count;
3502      }
3503
3504      // Leave this node by stepping to its next sibling or to its parent.
3505      switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) {
3506        case TreeCursorStepVisible:
3507          if (!self->on_visible_node) {
3508            self->depth++;
3509            self->on_visible_node = true;
3510          }
3511          self->ascending = false;
3512          break;
3513        case TreeCursorStepHidden:
3514          if (self->on_visible_node) {
3515            self->depth--;
3516            self->on_visible_node = false;
3517          }
3518          self->ascending = false;
3519          break;
3520        default:
3521          if (ts_tree_cursor_goto_parent(&self->cursor)) {
3522            self->depth--;
3523          } else {
3524            LOG("halt at root\n");
3525            self->halted = true;
3526          }
3527      }
3528    }
3529
3530    // Enter a new node.
3531    else {
3532      // Get the properties of the current node.
3533      TSNode node = ts_tree_cursor_current_node(&self->cursor);
3534      TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor);
3535      bool parent_precedes_range = !ts_node_is_null(parent_node) && (
3536        ts_node_end_byte(parent_node) <= self->start_byte ||
3537        point_lte(ts_node_end_point(parent_node), self->start_point)
3538      );
3539      bool parent_follows_range = !ts_node_is_null(parent_node) && (
3540        ts_node_start_byte(parent_node) >= self->end_byte ||
3541        point_gte(ts_node_start_point(parent_node), self->end_point)
3542      );
3543      bool node_precedes_range = parent_precedes_range || (
3544        ts_node_end_byte(node) <= self->start_byte ||
3545        point_lte(ts_node_end_point(node), self->start_point)
3546      );
3547      bool node_follows_range = parent_follows_range || (
3548        ts_node_start_byte(node) >= self->end_byte ||
3549        point_gte(ts_node_start_point(node), self->end_point)
3550      );
3551      bool parent_intersects_range = !parent_precedes_range && !parent_follows_range;
3552      bool node_intersects_range = !node_precedes_range && !node_follows_range;
3553
3554      if (self->on_visible_node) {
3555        TSSymbol symbol = ts_node_symbol(node);
3556        bool is_named = ts_node_is_named(node);
3557        bool has_later_siblings;
3558        bool has_later_named_siblings;
3559        bool can_have_later_siblings_with_this_field;
3560        TSFieldId field_id = 0;
3561        TSSymbol supertypes[8] = {0};
3562        unsigned supertype_count = 8;
3563        ts_tree_cursor_current_status(
3564          &self->cursor,
3565          &field_id,
3566          &has_later_siblings,
3567          &has_later_named_siblings,
3568          &can_have_later_siblings_with_this_field,
3569          supertypes,
3570          &supertype_count
3571        );
3572        LOG(
3573          "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
3574          self->depth,
3575          ts_node_type(node),
3576          ts_language_field_name_for_id(self->query->language, field_id),
3577          ts_node_start_point(node).row,
3578          self->states.size,
3579          self->finished_states.size
3580        );
3581
3582        bool node_is_error = symbol == ts_builtin_sym_error;
3583        bool parent_is_error =
3584          !ts_node_is_null(parent_node) &&
3585          ts_node_symbol(parent_node) == ts_builtin_sym_error;
3586
3587        // Add new states for any patterns whose root node is a wildcard.
3588        if (!node_is_error) {
3589          for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
3590            PatternEntry *pattern = &self->query->pattern_map.contents[i];
3591
3592            // If this node matches the first step of the pattern, then add a new
3593            // state at the start of this pattern.
3594            QueryStep *step = &self->query->steps.contents[pattern->step_index];
3595            uint32_t start_depth = self->depth - step->depth;
3596            if (
3597              (pattern->is_rooted ?
3598                node_intersects_range :
3599                (parent_intersects_range && !parent_is_error)) &&
3600              (!step->field || field_id == step->field) &&
3601              (!step->supertype_symbol || supertype_count > 0) &&
3602              (start_depth <= self->max_start_depth)
3603            ) {
3604              ts_query_cursor__add_state(self, pattern);
3605            }
3606          }
3607        }
3608
3609        // Add new states for any patterns whose root node matches this node.
3610        unsigned i;
3611        if (ts_query__pattern_map_search(self->query, symbol, &i)) {
3612          PatternEntry *pattern = &self->query->pattern_map.contents[i];
3613
3614          QueryStep *step = &self->query->steps.contents[pattern->step_index];
3615          uint32_t start_depth = self->depth - step->depth;
3616          do {
3617            // If this node matches the first step of the pattern, then add a new
3618            // state at the start of this pattern.
3619            if (
3620              (pattern->is_rooted ?
3621                node_intersects_range :
3622                (parent_intersects_range && !parent_is_error)) &&
3623              (!step->field || field_id == step->field) &&
3624              (start_depth <= self->max_start_depth)
3625            ) {
3626              ts_query_cursor__add_state(self, pattern);
3627            }
3628
3629            // Advance to the next pattern whose root node matches this node.
3630            i++;
3631            if (i == self->query->pattern_map.size) break;
3632            pattern = &self->query->pattern_map.contents[i];
3633            step = &self->query->steps.contents[pattern->step_index];
3634          } while (step->symbol == symbol);
3635        }
3636
3637        // Update all of the in-progress states with current node.
3638        for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) {
3639          QueryState *state = &self->states.contents[j];
3640          QueryStep *step = &self->query->steps.contents[state->step_index];
3641          state->has_in_progress_alternatives = false;
3642          copy_count = 0;
3643
3644          // Check that the node matches all of the criteria for the next
3645          // step of the pattern.
3646          if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
3647
3648          // Determine if this node matches this step of the pattern, and also
3649          // if this node can have later siblings that match this step of the
3650          // pattern.
3651          bool node_does_match = false;
3652          if (step->symbol == WILDCARD_SYMBOL) {
3653            node_does_match = !node_is_error && (is_named || !step->is_named);
3654          } else {
3655            node_does_match = symbol == step->symbol;
3656          }
3657          bool later_sibling_can_match = has_later_siblings;
3658          if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
3659            later_sibling_can_match = false;
3660          }
3661          if (step->is_last_child && has_later_named_siblings) {
3662            node_does_match = false;
3663          }
3664          if (step->supertype_symbol) {
3665            bool has_supertype = false;
3666            for (unsigned k = 0; k < supertype_count; k++) {
3667              if (supertypes[k] == step->supertype_symbol) {
3668                has_supertype = true;
3669                break;
3670              }
3671            }
3672            if (!has_supertype) node_does_match = false;
3673          }
3674          if (step->field) {
3675            if (step->field == field_id) {
3676              if (!can_have_later_siblings_with_this_field) {
3677                later_sibling_can_match = false;
3678              }
3679            } else {
3680              node_does_match = false;
3681            }
3682          }
3683
3684          if (step->negated_field_list_id) {
3685            TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id];
3686            for (;;) {
3687              TSFieldId negated_field_id = *negated_field_ids;
3688              if (negated_field_id) {
3689                negated_field_ids++;
3690                if (ts_node_child_by_field_id(node, negated_field_id).id) {
3691                  node_does_match = false;
3692                  break;
3693                }
3694              } else {
3695                break;
3696              }
3697            }
3698          }
3699
3700          // Remove states immediately if it is ever clear that they cannot match.
3701          if (!node_does_match) {
3702            if (!later_sibling_can_match) {
3703              LOG(
3704                "  discard state. pattern:%u, step:%u\n",
3705                state->pattern_index,
3706                state->step_index
3707              );
3708              capture_list_pool_release(
3709                &self->capture_list_pool,
3710                state->capture_list_id
3711              );
3712              array_erase(&self->states, j);
3713              j--;
3714            }
3715            continue;
3716          }
3717
3718          // Some patterns can match their root node in multiple ways, capturing different
3719          // children. If this pattern step could match later children within the same
3720          // parent, then this query state cannot simply be updated in place. It must be
3721          // split into two states: one that matches this node, and one which skips over
3722          // this node, to preserve the possibility of matching later siblings.
3723          if (later_sibling_can_match && (
3724            step->contains_captures ||
3725            ts_query__step_is_fallible(self->query, state->step_index)
3726          )) {
3727            if (ts_query_cursor__copy_state(self, &state)) {
3728              LOG(
3729                "  split state for capture. pattern:%u, step:%u\n",
3730                state->pattern_index,
3731                state->step_index
3732              );
3733              copy_count++;
3734            }
3735          }
3736
3737          // If this pattern started with a wildcard, such that the pattern map
3738          // actually points to the *second* step of the pattern, then check
3739          // that the node has a parent, and capture the parent node if necessary.
3740          if (state->needs_parent) {
3741            TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
3742            if (ts_node_is_null(parent)) {
3743              LOG("  missing parent node\n");
3744              state->dead = true;
3745            } else {
3746              state->needs_parent = false;
3747              QueryStep *skipped_wildcard_step = step;
3748              do {
3749                skipped_wildcard_step--;
3750              } while (
3751                skipped_wildcard_step->is_dead_end ||
3752                skipped_wildcard_step->is_pass_through ||
3753                skipped_wildcard_step->depth > 0
3754              );
3755              if (skipped_wildcard_step->capture_ids[0] != NONE) {
3756                LOG("  capture wildcard parent\n");
3757                ts_query_cursor__capture(
3758                  self,
3759                  state,
3760                  skipped_wildcard_step,
3761                  parent
3762                );
3763              }
3764            }
3765          }
3766
3767          // If the current node is captured in this pattern, add it to the capture list.
3768          if (step->capture_ids[0] != NONE) {
3769            ts_query_cursor__capture(self, state, step, node);
3770          }
3771
3772          if (state->dead) {
3773            array_erase(&self->states, j);
3774            j--;
3775            continue;
3776          }
3777
3778          // Advance this state to the next step of its pattern.
3779          state->step_index++;
3780          state->seeking_immediate_match = false;
3781          LOG(
3782            "  advance state. pattern:%u, step:%u\n",
3783            state->pattern_index,
3784            state->step_index
3785          );
3786
3787          QueryStep *next_step = &self->query->steps.contents[state->step_index];
3788          if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true;
3789
3790          // If this state's next step has an alternative step, then copy the state in order
3791          // to pursue both alternatives. The alternative step itself may have an alternative,
3792          // so this is an interactive process.
3793          unsigned end_index = j + 1;
3794          for (unsigned k = j; k < end_index; k++) {
3795            QueryState *child_state = &self->states.contents[k];
3796            QueryStep *child_step = &self->query->steps.contents[child_state->step_index];
3797            if (child_step->alternative_index != NONE) {
3798              // A "dead-end" step exists only to add a non-sequential jump into the step sequence,
3799              // via its alternative index. When a state reaches a dead-end step, it jumps straight
3800              // to the step's alternative.
3801              if (child_step->is_dead_end) {
3802                child_state->step_index = child_step->alternative_index;
3803                k--;
3804                continue;
3805              }
3806
3807              // A "pass-through" step exists only to add a branch into the step sequence,
3808              // via its alternative_index. When a state reaches a pass-through step, it splits
3809              // in order to process the alternative step, and then it advances to the next step.
3810              if (child_step->is_pass_through) {
3811                child_state->step_index++;
3812                k--;
3813              }
3814
3815              QueryState *copy = ts_query_cursor__copy_state(self, &child_state);
3816              if (copy) {
3817                LOG(
3818                  "  split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
3819                  copy->pattern_index,
3820                  copy->step_index,
3821                  next_step->alternative_index,
3822                  next_step->alternative_is_immediate,
3823                  capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
3824                );
3825                end_index++;
3826                copy_count++;
3827                copy->step_index = child_step->alternative_index;
3828                if (child_step->alternative_is_immediate) {
3829                  copy->seeking_immediate_match = true;
3830                }
3831              }
3832            }
3833          }
3834        }
3835
3836        for (unsigned j = 0; j < self->states.size; j++) {
3837          QueryState *state = &self->states.contents[j];
3838          if (state->dead) {
3839            array_erase(&self->states, j);
3840            j--;
3841            continue;
3842          }
3843
3844          // Enforce the longest-match criteria. When a query pattern contains optional or
3845          // repeated nodes, this is necessary to avoid multiple redundant states, where
3846          // one state has a strict subset of another state's captures.
3847          bool did_remove = false;
3848          for (unsigned k = j + 1; k < self->states.size; k++) {
3849            QueryState *other_state = &self->states.contents[k];
3850
3851            // Query states are kept in ascending order of start_depth and pattern_index.
3852            // Since the longest-match criteria is only used for deduping matches of the same
3853            // pattern and root node, we only need to perform pairwise comparisons within a
3854            // small slice of the states array.
3855            if (
3856              other_state->start_depth != state->start_depth ||
3857              other_state->pattern_index != state->pattern_index
3858            ) break;
3859
3860            bool left_contains_right, right_contains_left;
3861            ts_query_cursor__compare_captures(
3862              self,
3863              state,
3864              other_state,
3865              &left_contains_right,
3866              &right_contains_left
3867            );
3868            if (left_contains_right) {
3869              if (state->step_index == other_state->step_index) {
3870                LOG(
3871                  "  drop shorter state. pattern: %u, step_index: %u\n",
3872                  state->pattern_index,
3873                  state->step_index
3874                );
3875                capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
3876                array_erase(&self->states, k);
3877                k--;
3878                continue;
3879              }
3880              other_state->has_in_progress_alternatives = true;
3881            }
3882            if (right_contains_left) {
3883              if (state->step_index == other_state->step_index) {
3884                LOG(
3885                  "  drop shorter state. pattern: %u, step_index: %u\n",
3886                  state->pattern_index,
3887                  state->step_index
3888                );
3889                capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
3890                array_erase(&self->states, j);
3891                j--;
3892                did_remove = true;
3893                break;
3894              }
3895              state->has_in_progress_alternatives = true;
3896            }
3897          }
3898
3899          // If the state is at the end of its pattern, remove it from the list
3900          // of in-progress states and add it to the list of finished states.
3901          if (!did_remove) {
3902            LOG(
3903              "  keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
3904              state->pattern_index,
3905              state->start_depth,
3906              state->step_index,
3907              capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
3908            );
3909            QueryStep *next_step = &self->query->steps.contents[state->step_index];
3910            if (next_step->depth == PATTERN_DONE_MARKER) {
3911              if (state->has_in_progress_alternatives) {
3912                LOG("  defer finishing pattern %u\n", state->pattern_index);
3913              } else {
3914                LOG("  finish pattern %u\n", state->pattern_index);
3915                array_push(&self->finished_states, *state);
3916                array_erase(&self->states, (uint32_t)(state - self->states.contents));
3917                did_match = true;
3918                j--;
3919              }
3920            }
3921          }
3922        }
3923      }
3924
3925      if (ts_query_cursor__should_descend(self, node_intersects_range)) {
3926        switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) {
3927          case TreeCursorStepVisible:
3928            self->depth++;
3929            self->on_visible_node = true;
3930            continue;
3931          case TreeCursorStepHidden:
3932            self->on_visible_node = false;
3933            continue;
3934          default:
3935            break;
3936        }
3937      }
3938
3939      self->ascending = true;
3940    }
3941  }
3942}
3943
3944bool ts_query_cursor_next_match(
3945  TSQueryCursor *self,
3946  TSQueryMatch *match
3947) {
3948  if (self->finished_states.size == 0) {
3949    if (!ts_query_cursor__advance(self, false)) {
3950      return false;
3951    }
3952  }
3953
3954  QueryState *state = &self->finished_states.contents[0];
3955  if (state->id == UINT32_MAX) state->id = self->next_state_id++;
3956  match->id = state->id;
3957  match->pattern_index = state->pattern_index;
3958  const CaptureList *captures = capture_list_pool_get(
3959    &self->capture_list_pool,
3960    state->capture_list_id
3961  );
3962  match->captures = captures->contents;
3963  match->capture_count = captures->size;
3964  capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
3965  array_erase(&self->finished_states, 0);
3966  return true;
3967}
3968
3969void ts_query_cursor_remove_match(
3970  TSQueryCursor *self,
3971  uint32_t match_id
3972) {
3973  for (unsigned i = 0; i < self->finished_states.size; i++) {
3974    const QueryState *state = &self->finished_states.contents[i];
3975    if (state->id == match_id) {
3976      capture_list_pool_release(
3977        &self->capture_list_pool,
3978        state->capture_list_id
3979      );
3980      array_erase(&self->finished_states, i);
3981      return;
3982    }
3983  }
3984
3985  // Remove unfinished query states as well to prevent future
3986  // captures for a match being removed.
3987  for (unsigned i = 0; i < self->states.size; i++) {
3988    const QueryState *state = &self->states.contents[i];
3989    if (state->id == match_id) {
3990      capture_list_pool_release(
3991        &self->capture_list_pool,
3992        state->capture_list_id
3993      );
3994      array_erase(&self->states, i);
3995      return;
3996    }
3997  }
3998}
3999
4000bool ts_query_cursor_next_capture(
4001  TSQueryCursor *self,
4002  TSQueryMatch *match,
4003  uint32_t *capture_index
4004) {
4005  // The goal here is to return captures in order, even though they may not
4006  // be discovered in order, because patterns can overlap. Search for matches
4007  // until there is a finished capture that is before any unfinished capture.
4008  for (;;) {
4009    // First, find the earliest capture in an unfinished match.
4010    uint32_t first_unfinished_capture_byte;
4011    uint32_t first_unfinished_pattern_index;
4012    uint32_t first_unfinished_state_index;
4013    bool first_unfinished_state_is_definite = false;
4014    ts_query_cursor__first_in_progress_capture(
4015      self,
4016      &first_unfinished_state_index,
4017      &first_unfinished_capture_byte,
4018      &first_unfinished_pattern_index,
4019      &first_unfinished_state_is_definite
4020    );
4021
4022    // Then find the earliest capture in a finished match. It must occur
4023    // before the first capture in an *unfinished* match.
4024    QueryState *first_finished_state = NULL;
4025    uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
4026    uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
4027    for (unsigned i = 0; i < self->finished_states.size;) {
4028      QueryState *state = &self->finished_states.contents[i];
4029      const CaptureList *captures = capture_list_pool_get(
4030        &self->capture_list_pool,
4031        state->capture_list_id
4032      );
4033
4034      // Remove states whose captures are all consumed.
4035      if (state->consumed_capture_count >= captures->size) {
4036        capture_list_pool_release(
4037          &self->capture_list_pool,
4038          state->capture_list_id
4039        );
4040        array_erase(&self->finished_states, i);
4041        continue;
4042      }
4043
4044      TSNode node = captures->contents[state->consumed_capture_count].node;
4045
4046      bool node_precedes_range = (
4047        ts_node_end_byte(node) <= self->start_byte ||
4048        point_lte(ts_node_end_point(node), self->start_point)
4049      );
4050      bool node_follows_range = (
4051        ts_node_start_byte(node) >= self->end_byte ||
4052        point_gte(ts_node_start_point(node), self->end_point)
4053      );
4054      bool node_outside_of_range = node_precedes_range || node_follows_range;
4055
4056      // Skip captures that are outside of the cursor's range.
4057      if (node_outside_of_range) {
4058        state->consumed_capture_count++;
4059        continue;
4060      }
4061
4062      uint32_t node_start_byte = ts_node_start_byte(node);
4063      if (
4064        node_start_byte < first_finished_capture_byte ||
4065        (
4066          node_start_byte == first_finished_capture_byte &&
4067          state->pattern_index < first_finished_pattern_index
4068        )
4069      ) {
4070        first_finished_state = state;
4071        first_finished_capture_byte = node_start_byte;
4072        first_finished_pattern_index = state->pattern_index;
4073      }
4074      i++;
4075    }
4076
4077    // If there is finished capture that is clearly before any unfinished
4078    // capture, then return its match, and its capture index. Internally
4079    // record the fact that the capture has been 'consumed'.
4080    QueryState *state;
4081    if (first_finished_state) {
4082      state = first_finished_state;
4083    } else if (first_unfinished_state_is_definite) {
4084      state = &self->states.contents[first_unfinished_state_index];
4085    } else {
4086      state = NULL;
4087    }
4088
4089    if (state) {
4090      if (state->id == UINT32_MAX) state->id = self->next_state_id++;
4091      match->id = state->id;
4092      match->pattern_index = state->pattern_index;
4093      const CaptureList *captures = capture_list_pool_get(
4094        &self->capture_list_pool,
4095        state->capture_list_id
4096      );
4097      match->captures = captures->contents;
4098      match->capture_count = captures->size;
4099      *capture_index = state->consumed_capture_count;
4100      state->consumed_capture_count++;
4101      return true;
4102    }
4103
4104    if (capture_list_pool_is_empty(&self->capture_list_pool)) {
4105      LOG(
4106        "  abandon state. index:%u, pattern:%u, offset:%u.\n",
4107        first_unfinished_state_index,
4108        first_unfinished_pattern_index,
4109        first_unfinished_capture_byte
4110      );
4111      capture_list_pool_release(
4112        &self->capture_list_pool,
4113        self->states.contents[first_unfinished_state_index].capture_list_id
4114      );
4115      array_erase(&self->states, first_unfinished_state_index);
4116    }
4117
4118    // If there are no finished matches that are ready to be returned, then
4119    // continue finding more matches.
4120    if (
4121      !ts_query_cursor__advance(self, true) &&
4122      self->finished_states.size == 0
4123    ) return false;
4124  }
4125}
4126
4127void ts_query_cursor_set_max_start_depth(
4128  TSQueryCursor *self,
4129  uint32_t max_start_depth
4130) {
4131  self->max_start_depth = max_start_depth;
4132}
4133
4134#undef LOG