1#define _POSIX_C_SOURCE 200112L
   2
   3#include <time.h>
   4#include <assert.h>
   5#include <stdio.h>
   6#include <limits.h>
   7#include <stdbool.h>
   8#include <inttypes.h>
   9#include "api.h"
  10#include "./alloc.h"
  11#include "./array.h"
  12#include "./atomic.h"
  13#include "./clock.h"
  14#include "./error_costs.h"
  15#include "./get_changed_ranges.h"
  16#include "./language.h"
  17#include "./length.h"
  18#include "./lexer.h"
  19#include "./reduce_action.h"
  20#include "./reusable_node.h"
  21#include "./stack.h"
  22#include "./subtree.h"
  23#include "./tree.h"
  24#include "./wasm_store.h"
  25
  26#define LOG(...)                                                                            \
  27  if (self->lexer.logger.log || self->dot_graph_file) {                                     \
  28    snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
  29    ts_parser__log(self);                                                                   \
  30  }
  31
  32#define LOG_LOOKAHEAD(symbol_name, size)                      \
  33  if (self->lexer.logger.log || self->dot_graph_file) {       \
  34    char *buf = self->lexer.debug_buffer;                     \
  35    const char *symbol = symbol_name;                         \
  36    int off = sprintf(buf, "lexed_lookahead sym:");           \
  37    for (                                                     \
  38      int i = 0;                                              \
  39      symbol[i] != '\0'                                       \
  40      && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;         \
  41      i++                                                     \
  42    ) {                                                       \
  43      switch (symbol[i]) {                                    \
  44      case '\t': buf[off++] = '\\'; buf[off++] = 't'; break;  \
  45      case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break;  \
  46      case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break;  \
  47      case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break;  \
  48      case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break;  \
  49      case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \
  50      default:   buf[off++] = symbol[i]; break;               \
  51      }                                                       \
  52    }                                                         \
  53    snprintf(                                                 \
  54      buf + off,                                              \
  55      TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off,            \
  56      ", size:%u",                                            \
  57      size                                                    \
  58    );                                                        \
  59    ts_parser__log(self);                                     \
  60  }
  61
  62#define LOG_STACK()                                                              \
  63  if (self->dot_graph_file) {                                                    \
  64    ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \
  65    fputs("\n\n", self->dot_graph_file);                                         \
  66  }
  67
  68#define LOG_TREE(tree)                                                      \
  69  if (self->dot_graph_file) {                                               \
  70    ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
  71    fputs("\n", self->dot_graph_file);                                      \
  72  }
  73
  74#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
  75
  76#define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree))
  77
  78static const unsigned MAX_VERSION_COUNT = 6;
  79static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
  80static const unsigned MAX_SUMMARY_DEPTH = 16;
  81static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
  82static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
  83
  84typedef struct {
  85  Subtree token;
  86  Subtree last_external_token;
  87  uint32_t byte_index;
  88} TokenCache;
  89
  90struct TSParser {
  91  Lexer lexer;
  92  Stack *stack;
  93  SubtreePool tree_pool;
  94  const TSLanguage *language;
  95  TSWasmStore *wasm_store;
  96  ReduceActionSet reduce_actions;
  97  Subtree finished_tree;
  98  SubtreeArray trailing_extras;
  99  SubtreeArray trailing_extras2;
 100  SubtreeArray scratch_trees;
 101  TokenCache token_cache;
 102  ReusableNode reusable_node;
 103  void *external_scanner_payload;
 104  FILE *dot_graph_file;
 105  TSClock end_clock;
 106  TSDuration timeout_duration;
 107  unsigned accept_count;
 108  unsigned operation_count;
 109  const volatile size_t *cancellation_flag;
 110  Subtree old_tree;
 111  TSRangeArray included_range_differences;
 112  unsigned included_range_difference_index;
 113  bool has_scanner_error;
 114};
 115
 116typedef struct {
 117  unsigned cost;
 118  unsigned node_count;
 119  int dynamic_precedence;
 120  bool is_in_error;
 121} ErrorStatus;
 122
 123typedef enum {
 124  ErrorComparisonTakeLeft,
 125  ErrorComparisonPreferLeft,
 126  ErrorComparisonNone,
 127  ErrorComparisonPreferRight,
 128  ErrorComparisonTakeRight,
 129} ErrorComparison;
 130
 131typedef struct {
 132  const char *string;
 133  uint32_t length;
 134} TSStringInput;
 135
 136// StringInput
 137
 138static const char *ts_string_input_read(
 139  void *_self,
 140  uint32_t byte,
 141  TSPoint point,
 142  uint32_t *length
 143) {
 144  (void)point;
 145  TSStringInput *self = (TSStringInput *)_self;
 146  if (byte >= self->length) {
 147    *length = 0;
 148    return "";
 149  } else {
 150    *length = self->length - byte;
 151    return self->string + byte;
 152  }
 153}
 154
 155// Parser - Private
 156
 157static void ts_parser__log(TSParser *self) {
 158  if (self->lexer.logger.log) {
 159    self->lexer.logger.log(
 160      self->lexer.logger.payload,
 161      TSLogTypeParse,
 162      self->lexer.debug_buffer
 163    );
 164  }
 165
 166  if (self->dot_graph_file) {
 167    fprintf(self->dot_graph_file, "graph {\nlabel=\"");
 168    for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) {
 169      if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file);
 170      fputc(*chr, self->dot_graph_file);
 171    }
 172    fprintf(self->dot_graph_file, "\"\n}\n\n");
 173  }
 174}
 175
 176static bool ts_parser__breakdown_top_of_stack(
 177  TSParser *self,
 178  StackVersion version
 179) {
 180  bool did_break_down = false;
 181  bool pending = false;
 182
 183  do {
 184    StackSliceArray pop = ts_stack_pop_pending(self->stack, version);
 185    if (!pop.size) break;
 186
 187    did_break_down = true;
 188    pending = false;
 189    for (uint32_t i = 0; i < pop.size; i++) {
 190      StackSlice slice = pop.contents[i];
 191      TSStateId state = ts_stack_state(self->stack, slice.version);
 192      Subtree parent = *array_front(&slice.subtrees);
 193
 194      for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
 195        Subtree child = ts_subtree_children(parent)[j];
 196        pending = ts_subtree_child_count(child) > 0;
 197
 198        if (ts_subtree_is_error(child)) {
 199          state = ERROR_STATE;
 200        } else if (!ts_subtree_extra(child)) {
 201          state = ts_language_next_state(self->language, state, ts_subtree_symbol(child));
 202        }
 203
 204        ts_subtree_retain(child);
 205        ts_stack_push(self->stack, slice.version, child, pending, state);
 206      }
 207
 208      for (uint32_t j = 1; j < slice.subtrees.size; j++) {
 209        Subtree tree = slice.subtrees.contents[j];
 210        ts_stack_push(self->stack, slice.version, tree, false, state);
 211      }
 212
 213      ts_subtree_release(&self->tree_pool, parent);
 214      array_delete(&slice.subtrees);
 215
 216      LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent));
 217      LOG_STACK();
 218    }
 219  } while (pending);
 220
 221  return did_break_down;
 222}
 223
 224static void ts_parser__breakdown_lookahead(
 225  TSParser *self,
 226  Subtree *lookahead,
 227  TSStateId state,
 228  ReusableNode *reusable_node
 229) {
 230  bool did_descend = false;
 231  Subtree tree = reusable_node_tree(reusable_node);
 232  while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) {
 233    LOG("state_mismatch sym:%s", TREE_NAME(tree));
 234    reusable_node_descend(reusable_node);
 235    tree = reusable_node_tree(reusable_node);
 236    did_descend = true;
 237  }
 238
 239  if (did_descend) {
 240    ts_subtree_release(&self->tree_pool, *lookahead);
 241    *lookahead = tree;
 242    ts_subtree_retain(*lookahead);
 243  }
 244}
 245
 246static ErrorComparison ts_parser__compare_versions(
 247  TSParser *self,
 248  ErrorStatus a,
 249  ErrorStatus b
 250) {
 251  (void)self;
 252  if (!a.is_in_error && b.is_in_error) {
 253    if (a.cost < b.cost) {
 254      return ErrorComparisonTakeLeft;
 255    } else {
 256      return ErrorComparisonPreferLeft;
 257    }
 258  }
 259
 260  if (a.is_in_error && !b.is_in_error) {
 261    if (b.cost < a.cost) {
 262      return ErrorComparisonTakeRight;
 263    } else {
 264      return ErrorComparisonPreferRight;
 265    }
 266  }
 267
 268  if (a.cost < b.cost) {
 269    if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) {
 270      return ErrorComparisonTakeLeft;
 271    } else {
 272      return ErrorComparisonPreferLeft;
 273    }
 274  }
 275
 276  if (b.cost < a.cost) {
 277    if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) {
 278      return ErrorComparisonTakeRight;
 279    } else {
 280      return ErrorComparisonPreferRight;
 281    }
 282  }
 283
 284  if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
 285  if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
 286  return ErrorComparisonNone;
 287}
 288
 289static ErrorStatus ts_parser__version_status(
 290  TSParser *self,
 291  StackVersion version
 292) {
 293  unsigned cost = ts_stack_error_cost(self->stack, version);
 294  bool is_paused = ts_stack_is_paused(self->stack, version);
 295  if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE;
 296  return (ErrorStatus) {
 297    .cost = cost,
 298    .node_count = ts_stack_node_count_since_error(self->stack, version),
 299    .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
 300    .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE
 301  };
 302}
 303
 304static bool ts_parser__better_version_exists(
 305  TSParser *self,
 306  StackVersion version,
 307  bool is_in_error,
 308  unsigned cost
 309) {
 310  if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) {
 311    return true;
 312  }
 313
 314  Length position = ts_stack_position(self->stack, version);
 315  ErrorStatus status = {
 316    .cost = cost,
 317    .is_in_error = is_in_error,
 318    .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
 319    .node_count = ts_stack_node_count_since_error(self->stack, version),
 320  };
 321
 322  for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
 323    if (i == version ||
 324        !ts_stack_is_active(self->stack, i) ||
 325        ts_stack_position(self->stack, i).bytes < position.bytes) continue;
 326    ErrorStatus status_i = ts_parser__version_status(self, i);
 327    switch (ts_parser__compare_versions(self, status, status_i)) {
 328      case ErrorComparisonTakeRight:
 329        return true;
 330      case ErrorComparisonPreferRight:
 331        if (ts_stack_can_merge(self->stack, i, version)) return true;
 332        break;
 333      default:
 334        break;
 335    }
 336  }
 337
 338  return false;
 339}
 340
 341static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) {
 342  if (ts_language_is_wasm(self->language)) {
 343    return ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state);
 344  } else {
 345    return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
 346  }
 347}
 348
 349static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) {
 350  if (ts_language_is_wasm(self->language)) {
 351    return ts_wasm_store_call_lex_keyword(self->wasm_store, 0);
 352  } else {
 353    return self->language->keyword_lex_fn(&self->lexer.data, 0);
 354  }
 355}
 356
 357static void ts_parser__external_scanner_create(
 358  TSParser *self
 359) {
 360  if (self->language && self->language->external_scanner.states) {
 361    if (ts_language_is_wasm(self->language)) {
 362      self->external_scanner_payload = (void *)(uintptr_t)ts_wasm_store_call_scanner_create(
 363        self->wasm_store
 364      );
 365      if (ts_wasm_store_has_error(self->wasm_store)) {
 366        self->has_scanner_error = true;
 367      }
 368    } else if (self->language->external_scanner.create) {
 369      self->external_scanner_payload = self->language->external_scanner.create();
 370    }
 371  }
 372}
 373
 374static void ts_parser__external_scanner_destroy(
 375  TSParser *self
 376) {
 377  if (
 378    self->language &&
 379    self->external_scanner_payload &&
 380    self->language->external_scanner.destroy &&
 381    !ts_language_is_wasm(self->language)
 382  ) {
 383    self->language->external_scanner.destroy(
 384      self->external_scanner_payload
 385    );
 386  }
 387  self->external_scanner_payload = NULL;
 388}
 389
 390static unsigned ts_parser__external_scanner_serialize(
 391  TSParser *self
 392) {
 393  if (ts_language_is_wasm(self->language)) {
 394    return ts_wasm_store_call_scanner_serialize(
 395      self->wasm_store,
 396      (uintptr_t)self->external_scanner_payload,
 397      self->lexer.debug_buffer
 398    );
 399  } else {
 400    return self->language->external_scanner.serialize(
 401      self->external_scanner_payload,
 402      self->lexer.debug_buffer
 403    );
 404  }
 405}
 406
 407static void ts_parser__external_scanner_deserialize(
 408  TSParser *self,
 409  Subtree external_token
 410) {
 411  const char *data = NULL;
 412  uint32_t length = 0;
 413  if (external_token.ptr) {
 414    data = ts_external_scanner_state_data(&external_token.ptr->external_scanner_state);
 415    length = external_token.ptr->external_scanner_state.length;
 416  }
 417
 418  if (ts_language_is_wasm(self->language)) {
 419    ts_wasm_store_call_scanner_deserialize(
 420      self->wasm_store,
 421      (uintptr_t)self->external_scanner_payload,
 422      data,
 423      length
 424    );
 425    if (ts_wasm_store_has_error(self->wasm_store)) {
 426      self->has_scanner_error = true;
 427    }
 428  } else {
 429    self->language->external_scanner.deserialize(
 430      self->external_scanner_payload,
 431      data,
 432      length
 433    );
 434  }
 435}
 436
 437static bool ts_parser__external_scanner_scan(
 438  TSParser *self,
 439  TSStateId external_lex_state
 440) {
 441  if (ts_language_is_wasm(self->language)) {
 442    bool result = ts_wasm_store_call_scanner_scan(
 443      self->wasm_store,
 444      (uintptr_t)self->external_scanner_payload,
 445      external_lex_state * self->language->external_token_count
 446    );
 447    if (ts_wasm_store_has_error(self->wasm_store)) {
 448      self->has_scanner_error = true;
 449    }
 450    return result;
 451  } else {
 452    const bool *valid_external_tokens = ts_language_enabled_external_tokens(
 453      self->language,
 454      external_lex_state
 455    );
 456    return self->language->external_scanner.scan(
 457      self->external_scanner_payload,
 458      &self->lexer.data,
 459      valid_external_tokens
 460    );
 461  }
 462}
 463
 464static bool ts_parser__can_reuse_first_leaf(
 465  TSParser *self,
 466  TSStateId state,
 467  Subtree tree,
 468  TableEntry *table_entry
 469) {
 470  TSLexMode current_lex_mode = self->language->lex_modes[state];
 471  TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree);
 472  TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
 473  TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state];
 474
 475  // At the end of a non-terminal extra node, the lexer normally returns
 476  // NULL, which indicates that the parser should look for a reduce action
 477  // at symbol `0`. Avoid reusing tokens in this situation to ensure that
 478  // the same thing happens when incrementally reparsing.
 479  if (current_lex_mode.lex_state == (uint16_t)(-1)) return false;
 480
 481  // If the token was created in a state with the same set of lookaheads, it is reusable.
 482  if (
 483    table_entry->action_count > 0 &&
 484    memcmp(&leaf_lex_mode, &current_lex_mode, sizeof(TSLexMode)) == 0 &&
 485    (
 486      leaf_symbol != self->language->keyword_capture_token ||
 487      (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state)
 488    )
 489  ) return true;
 490
 491  // Empty tokens are not reusable in states with different lookaheads.
 492  if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false;
 493
 494  // If the current state allows external tokens or other tokens that conflict with this
 495  // token, this token is not reusable.
 496  return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
 497}
 498
 499static Subtree ts_parser__lex(
 500  TSParser *self,
 501  StackVersion version,
 502  TSStateId parse_state
 503) {
 504  TSLexMode lex_mode = self->language->lex_modes[parse_state];
 505  if (lex_mode.lex_state == (uint16_t)-1) {
 506    LOG("no_lookahead_after_non_terminal_extra");
 507    return NULL_SUBTREE;
 508  }
 509
 510  const Length start_position = ts_stack_position(self->stack, version);
 511  const Subtree external_token = ts_stack_last_external_token(self->stack, version);
 512
 513  bool found_external_token = false;
 514  bool error_mode = parse_state == ERROR_STATE;
 515  bool skipped_error = false;
 516  bool called_get_column = false;
 517  int32_t first_error_character = 0;
 518  Length error_start_position = length_zero();
 519  Length error_end_position = length_zero();
 520  uint32_t lookahead_end_byte = 0;
 521  uint32_t external_scanner_state_len = 0;
 522  bool external_scanner_state_changed = false;
 523  ts_lexer_reset(&self->lexer, start_position);
 524
 525  for (;;) {
 526    bool found_token = false;
 527    Length current_position = self->lexer.current_position;
 528
 529    if (lex_mode.external_lex_state != 0) {
 530      LOG(
 531        "lex_external state:%d, row:%u, column:%u",
 532        lex_mode.external_lex_state,
 533        current_position.extent.row,
 534        current_position.extent.column
 535      );
 536      ts_lexer_start(&self->lexer);
 537      ts_parser__external_scanner_deserialize(self, external_token);
 538      found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state);
 539      if (self->has_scanner_error) return NULL_SUBTREE;
 540      ts_lexer_finish(&self->lexer, &lookahead_end_byte);
 541
 542      if (found_token) {
 543        external_scanner_state_len = ts_parser__external_scanner_serialize(self);
 544        external_scanner_state_changed = !ts_external_scanner_state_eq(
 545          ts_subtree_external_scanner_state(external_token),
 546          self->lexer.debug_buffer,
 547          external_scanner_state_len
 548        );
 549
 550        // When recovering from an error, ignore any zero-length external tokens
 551        // unless they have changed the external scanner's state. This helps to
 552        // avoid infinite loops which could otherwise occur, because the lexer is
 553        // looking for any possible token, instead of looking for the specific set of
 554        // tokens that are valid in some parse state.
 555        //
 556        // Note that it's possible that the token end position may be *before* the
 557        // original position of the lexer because of the way that tokens are positioned
 558        // at included range boundaries: when a token is terminated at the start of
 559        // an included range, it is marked as ending at the *end* of the preceding
 560        // included range.
 561        if (
 562          self->lexer.token_end_position.bytes <= current_position.bytes &&
 563          (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) &&
 564          !external_scanner_state_changed
 565        ) {
 566          LOG(
 567            "ignore_empty_external_token symbol:%s",
 568            SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol])
 569          )
 570          found_token = false;
 571        }
 572      }
 573
 574      if (found_token) {
 575        found_external_token = true;
 576        called_get_column = self->lexer.did_get_column;
 577        break;
 578      }
 579
 580      ts_lexer_reset(&self->lexer, current_position);
 581    }
 582
 583    LOG(
 584      "lex_internal state:%d, row:%u, column:%u",
 585      lex_mode.lex_state,
 586      current_position.extent.row,
 587      current_position.extent.column
 588    );
 589    ts_lexer_start(&self->lexer);
 590    found_token = ts_parser__call_main_lex_fn(self, lex_mode);
 591    ts_lexer_finish(&self->lexer, &lookahead_end_byte);
 592    if (found_token) break;
 593
 594    if (!error_mode) {
 595      error_mode = true;
 596      lex_mode = self->language->lex_modes[ERROR_STATE];
 597      ts_lexer_reset(&self->lexer, start_position);
 598      continue;
 599    }
 600
 601    if (!skipped_error) {
 602      LOG("skip_unrecognized_character");
 603      skipped_error = true;
 604      error_start_position = self->lexer.token_start_position;
 605      error_end_position = self->lexer.token_start_position;
 606      first_error_character = self->lexer.data.lookahead;
 607    }
 608
 609    if (self->lexer.current_position.bytes == error_end_position.bytes) {
 610      if (self->lexer.data.eof(&self->lexer.data)) {
 611        self->lexer.data.result_symbol = ts_builtin_sym_error;
 612        break;
 613      }
 614      self->lexer.data.advance(&self->lexer.data, false);
 615    }
 616
 617    error_end_position = self->lexer.current_position;
 618  }
 619
 620  Subtree result;
 621  if (skipped_error) {
 622    Length padding = length_sub(error_start_position, start_position);
 623    Length size = length_sub(error_end_position, error_start_position);
 624    uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes;
 625    result = ts_subtree_new_error(
 626      &self->tree_pool,
 627      first_error_character,
 628      padding,
 629      size,
 630      lookahead_bytes,
 631      parse_state,
 632      self->language
 633    );
 634  } else {
 635    bool is_keyword = false;
 636    TSSymbol symbol = self->lexer.data.result_symbol;
 637    Length padding = length_sub(self->lexer.token_start_position, start_position);
 638    Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
 639    uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes;
 640
 641    if (found_external_token) {
 642      symbol = self->language->external_scanner.symbol_map[symbol];
 643    } else if (symbol == self->language->keyword_capture_token && symbol != 0) {
 644      uint32_t end_byte = self->lexer.token_end_position.bytes;
 645      ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
 646      ts_lexer_start(&self->lexer);
 647
 648      is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode);
 649
 650      if (
 651        is_keyword &&
 652        self->lexer.token_end_position.bytes == end_byte &&
 653        ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol)
 654      ) {
 655        symbol = self->lexer.data.result_symbol;
 656      }
 657    }
 658
 659    result = ts_subtree_new_leaf(
 660      &self->tree_pool,
 661      symbol,
 662      padding,
 663      size,
 664      lookahead_bytes,
 665      parse_state,
 666      found_external_token,
 667      called_get_column,
 668      is_keyword,
 669      self->language
 670    );
 671
 672    if (found_external_token) {
 673      MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result);
 674      ts_external_scanner_state_init(
 675        &mut_result.ptr->external_scanner_state,
 676        self->lexer.debug_buffer,
 677        external_scanner_state_len
 678      );
 679      mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed;
 680    }
 681  }
 682
 683  LOG_LOOKAHEAD(
 684    SYM_NAME(ts_subtree_symbol(result)),
 685    ts_subtree_total_size(result).bytes
 686  );
 687  return result;
 688}
 689
 690static Subtree ts_parser__get_cached_token(
 691  TSParser *self,
 692  TSStateId state,
 693  size_t position,
 694  Subtree last_external_token,
 695  TableEntry *table_entry
 696) {
 697  TokenCache *cache = &self->token_cache;
 698  if (
 699    cache->token.ptr && cache->byte_index == position &&
 700    ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)
 701  ) {
 702    ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry);
 703    if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) {
 704      ts_subtree_retain(cache->token);
 705      return cache->token;
 706    }
 707  }
 708  return NULL_SUBTREE;
 709}
 710
 711static void ts_parser__set_cached_token(
 712  TSParser *self,
 713  uint32_t byte_index,
 714  Subtree last_external_token,
 715  Subtree token
 716) {
 717  TokenCache *cache = &self->token_cache;
 718  if (token.ptr) ts_subtree_retain(token);
 719  if (last_external_token.ptr) ts_subtree_retain(last_external_token);
 720  if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token);
 721  if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token);
 722  cache->token = token;
 723  cache->byte_index = byte_index;
 724  cache->last_external_token = last_external_token;
 725}
 726
 727static bool ts_parser__has_included_range_difference(
 728  const TSParser *self,
 729  uint32_t start_position,
 730  uint32_t end_position
 731) {
 732  return ts_range_array_intersects(
 733    &self->included_range_differences,
 734    self->included_range_difference_index,
 735    start_position,
 736    end_position
 737  );
 738}
 739
 740static Subtree ts_parser__reuse_node(
 741  TSParser *self,
 742  StackVersion version,
 743  TSStateId *state,
 744  uint32_t position,
 745  Subtree last_external_token,
 746  TableEntry *table_entry
 747) {
 748  Subtree result;
 749  while ((result = reusable_node_tree(&self->reusable_node)).ptr) {
 750    uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
 751    uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
 752
 753    // Do not reuse an EOF node if the included ranges array has changes
 754    // later on in the file.
 755    if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX;
 756
 757    if (byte_offset > position) {
 758      LOG("before_reusable_node symbol:%s", TREE_NAME(result));
 759      break;
 760    }
 761
 762    if (byte_offset < position) {
 763      LOG("past_reusable_node symbol:%s", TREE_NAME(result));
 764      if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) {
 765        reusable_node_advance(&self->reusable_node);
 766      }
 767      continue;
 768    }
 769
 770    if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) {
 771      LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result));
 772      reusable_node_advance(&self->reusable_node);
 773      continue;
 774    }
 775
 776    const char *reason = NULL;
 777    if (ts_subtree_has_changes(result)) {
 778      reason = "has_changes";
 779    } else if (ts_subtree_is_error(result)) {
 780      reason = "is_error";
 781    } else if (ts_subtree_missing(result)) {
 782      reason = "is_missing";
 783    } else if (ts_subtree_is_fragile(result)) {
 784      reason = "is_fragile";
 785    } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) {
 786      reason = "contains_different_included_range";
 787    }
 788
 789    if (reason) {
 790      LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result));
 791      if (!reusable_node_descend(&self->reusable_node)) {
 792        reusable_node_advance(&self->reusable_node);
 793        ts_parser__breakdown_top_of_stack(self, version);
 794        *state = ts_stack_state(self->stack, version);
 795      }
 796      continue;
 797    }
 798
 799    TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result);
 800    ts_language_table_entry(self->language, *state, leaf_symbol, table_entry);
 801    if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
 802      LOG(
 803        "cant_reuse_node symbol:%s, first_leaf_symbol:%s",
 804        TREE_NAME(result),
 805        SYM_NAME(leaf_symbol)
 806      );
 807      reusable_node_advance_past_leaf(&self->reusable_node);
 808      break;
 809    }
 810
 811    LOG("reuse_node symbol:%s", TREE_NAME(result));
 812    ts_subtree_retain(result);
 813    return result;
 814  }
 815
 816  return NULL_SUBTREE;
 817}
 818
 819// Determine if a given tree should be replaced by an alternative tree.
 820//
 821// The decision is based on the trees' error costs (if any), their dynamic precedence,
 822// and finally, as a default, by a recursive comparison of the trees' symbols.
 823static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
 824  if (!left.ptr) return true;
 825  if (!right.ptr) return false;
 826
 827  if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) {
 828    LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
 829    return true;
 830  }
 831
 832  if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) {
 833    LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
 834    return false;
 835  }
 836
 837  if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) {
 838    LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32,
 839        TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left),
 840        ts_subtree_dynamic_precedence(left));
 841    return true;
 842  }
 843
 844  if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) {
 845    LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32,
 846        TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right),
 847        ts_subtree_dynamic_precedence(right));
 848    return false;
 849  }
 850
 851  if (ts_subtree_error_cost(left) > 0) return true;
 852
 853  int comparison = ts_subtree_compare(left, right, &self->tree_pool);
 854  switch (comparison) {
 855    case -1:
 856      LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
 857      return false;
 858      break;
 859    case 1:
 860      LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
 861      return true;
 862    default:
 863      LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
 864      return false;
 865  }
 866}
 867
 868// Determine if a given tree's children should be replaced by an alternative
 869// array of children.
 870static bool ts_parser__select_children(
 871  TSParser *self,
 872  Subtree left,
 873  const SubtreeArray *children
 874) {
 875  array_assign(&self->scratch_trees, children);
 876
 877  // Create a temporary subtree using the scratch trees array. This node does
 878  // not perform any allocation except for possibly growing the array to make
 879  // room for its own heap data. The scratch tree is never explicitly released,
 880  // so the same 'scratch trees' array can be reused again later.
 881  MutableSubtree scratch_tree = ts_subtree_new_node(
 882    ts_subtree_symbol(left),
 883    &self->scratch_trees,
 884    0,
 885    self->language
 886  );
 887
 888  return ts_parser__select_tree(
 889    self,
 890    left,
 891    ts_subtree_from_mut(scratch_tree)
 892  );
 893}
 894
 895static void ts_parser__shift(
 896  TSParser *self,
 897  StackVersion version,
 898  TSStateId state,
 899  Subtree lookahead,
 900  bool extra
 901) {
 902  bool is_leaf = ts_subtree_child_count(lookahead) == 0;
 903  Subtree subtree_to_push = lookahead;
 904  if (extra != ts_subtree_extra(lookahead) && is_leaf) {
 905    MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead);
 906    ts_subtree_set_extra(&result, extra);
 907    subtree_to_push = ts_subtree_from_mut(result);
 908  }
 909
 910  ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state);
 911  if (ts_subtree_has_external_tokens(subtree_to_push)) {
 912    ts_stack_set_last_external_token(
 913      self->stack, version, ts_subtree_last_external_token(subtree_to_push)
 914    );
 915  }
 916}
 917
 918static StackVersion ts_parser__reduce(
 919  TSParser *self,
 920  StackVersion version,
 921  TSSymbol symbol,
 922  uint32_t count,
 923  int dynamic_precedence,
 924  uint16_t production_id,
 925  bool is_fragile,
 926  bool end_of_non_terminal_extra
 927) {
 928  uint32_t initial_version_count = ts_stack_version_count(self->stack);
 929
 930  // Pop the given number of nodes from the given version of the parse stack.
 931  // If stack versions have previously merged, then there may be more than one
 932  // path back through the stack. For each path, create a new parent node to
 933  // contain the popped children, and push it onto the stack in place of the
 934  // children.
 935  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
 936  uint32_t removed_version_count = 0;
 937  for (uint32_t i = 0; i < pop.size; i++) {
 938    StackSlice slice = pop.contents[i];
 939    StackVersion slice_version = slice.version - removed_version_count;
 940
 941    // This is where new versions are added to the parse stack. The versions
 942    // will all be sorted and truncated at the end of the outer parsing loop.
 943    // Allow the maximum version count to be temporarily exceeded, but only
 944    // by a limited threshold.
 945    if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
 946      ts_stack_remove_version(self->stack, slice_version);
 947      ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
 948      removed_version_count++;
 949      while (i + 1 < pop.size) {
 950        StackSlice next_slice = pop.contents[i + 1];
 951        if (next_slice.version != slice.version) break;
 952        ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
 953        i++;
 954      }
 955      continue;
 956    }
 957
 958    // Extra tokens on top of the stack should not be included in this new parent
 959    // node. They will be re-pushed onto the stack after the parent node is
 960    // created and pushed.
 961    SubtreeArray children = slice.subtrees;
 962    ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras);
 963
 964    MutableSubtree parent = ts_subtree_new_node(
 965      symbol, &children, production_id, self->language
 966    );
 967
 968    // This pop operation may have caused multiple stack versions to collapse
 969    // into one, because they all diverged from a common state. In that case,
 970    // choose one of the arrays of trees to be the parent node's children, and
 971    // delete the rest of the tree arrays.
 972    while (i + 1 < pop.size) {
 973      StackSlice next_slice = pop.contents[i + 1];
 974      if (next_slice.version != slice.version) break;
 975      i++;
 976
 977      SubtreeArray next_slice_children = next_slice.subtrees;
 978      ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2);
 979
 980      if (ts_parser__select_children(
 981        self,
 982        ts_subtree_from_mut(parent),
 983        &next_slice_children
 984      )) {
 985        ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras);
 986        ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent));
 987        array_swap(&self->trailing_extras, &self->trailing_extras2);
 988        parent = ts_subtree_new_node(
 989          symbol, &next_slice_children, production_id, self->language
 990        );
 991      } else {
 992        array_clear(&self->trailing_extras2);
 993        ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
 994      }
 995    }
 996
 997    TSStateId state = ts_stack_state(self->stack, slice_version);
 998    TSStateId next_state = ts_language_next_state(self->language, state, symbol);
 999    if (end_of_non_terminal_extra && next_state == state) {
1000      parent.ptr->extra = true;
1001    }
1002    if (is_fragile || pop.size > 1 || initial_version_count > 1) {
1003      parent.ptr->fragile_left = true;
1004      parent.ptr->fragile_right = true;
1005      parent.ptr->parse_state = TS_TREE_STATE_NONE;
1006    } else {
1007      parent.ptr->parse_state = state;
1008    }
1009    parent.ptr->dynamic_precedence += dynamic_precedence;
1010
1011    // Push the parent node onto the stack, along with any extra tokens that
1012    // were previously on top of the stack.
1013    ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
1014    for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
1015      ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state);
1016    }
1017
1018    for (StackVersion j = 0; j < slice_version; j++) {
1019      if (j == version) continue;
1020      if (ts_stack_merge(self->stack, j, slice_version)) {
1021        removed_version_count++;
1022        break;
1023      }
1024    }
1025  }
1026
1027  // Return the first new stack version that was created.
1028  return ts_stack_version_count(self->stack) > initial_version_count
1029    ? initial_version_count
1030    : STACK_VERSION_NONE;
1031}
1032
1033static void ts_parser__accept(
1034  TSParser *self,
1035  StackVersion version,
1036  Subtree lookahead
1037) {
1038  assert(ts_subtree_is_eof(lookahead));
1039  ts_stack_push(self->stack, version, lookahead, false, 1);
1040
1041  StackSliceArray pop = ts_stack_pop_all(self->stack, version);
1042  for (uint32_t i = 0; i < pop.size; i++) {
1043    SubtreeArray trees = pop.contents[i].subtrees;
1044
1045    Subtree root = NULL_SUBTREE;
1046    for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
1047      Subtree tree = trees.contents[j];
1048      if (!ts_subtree_extra(tree)) {
1049        assert(!tree.data.is_inline);
1050        uint32_t child_count = ts_subtree_child_count(tree);
1051        const Subtree *children = ts_subtree_children(tree);
1052        for (uint32_t k = 0; k < child_count; k++) {
1053          ts_subtree_retain(children[k]);
1054        }
1055        array_splice(&trees, j, 1, child_count, children);
1056        root = ts_subtree_from_mut(ts_subtree_new_node(
1057          ts_subtree_symbol(tree),
1058          &trees,
1059          tree.ptr->production_id,
1060          self->language
1061        ));
1062        ts_subtree_release(&self->tree_pool, tree);
1063        break;
1064      }
1065    }
1066
1067    assert(root.ptr);
1068    self->accept_count++;
1069
1070    if (self->finished_tree.ptr) {
1071      if (ts_parser__select_tree(self, self->finished_tree, root)) {
1072        ts_subtree_release(&self->tree_pool, self->finished_tree);
1073        self->finished_tree = root;
1074      } else {
1075        ts_subtree_release(&self->tree_pool, root);
1076      }
1077    } else {
1078      self->finished_tree = root;
1079    }
1080  }
1081
1082  ts_stack_remove_version(self->stack, pop.contents[0].version);
1083  ts_stack_halt(self->stack, version);
1084}
1085
1086static bool ts_parser__do_all_potential_reductions(
1087  TSParser *self,
1088  StackVersion starting_version,
1089  TSSymbol lookahead_symbol
1090) {
1091  uint32_t initial_version_count = ts_stack_version_count(self->stack);
1092
1093  bool can_shift_lookahead_symbol = false;
1094  StackVersion version = starting_version;
1095  for (unsigned i = 0; true; i++) {
1096    uint32_t version_count = ts_stack_version_count(self->stack);
1097    if (version >= version_count) break;
1098
1099    bool merged = false;
1100    for (StackVersion j = initial_version_count; j < version; j++) {
1101      if (ts_stack_merge(self->stack, j, version)) {
1102        merged = true;
1103        break;
1104      }
1105    }
1106    if (merged) continue;
1107
1108    TSStateId state = ts_stack_state(self->stack, version);
1109    bool has_shift_action = false;
1110    array_clear(&self->reduce_actions);
1111
1112    TSSymbol first_symbol, end_symbol;
1113    if (lookahead_symbol != 0) {
1114      first_symbol = lookahead_symbol;
1115      end_symbol = lookahead_symbol + 1;
1116    } else {
1117      first_symbol = 1;
1118      end_symbol = self->language->token_count;
1119    }
1120
1121    for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
1122      TableEntry entry;
1123      ts_language_table_entry(self->language, state, symbol, &entry);
1124      for (uint32_t j = 0; j < entry.action_count; j++) {
1125        TSParseAction action = entry.actions[j];
1126        switch (action.type) {
1127          case TSParseActionTypeShift:
1128          case TSParseActionTypeRecover:
1129            if (!action.shift.extra && !action.shift.repetition) has_shift_action = true;
1130            break;
1131          case TSParseActionTypeReduce:
1132            if (action.reduce.child_count > 0)
1133              ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) {
1134                .symbol = action.reduce.symbol,
1135                .count = action.reduce.child_count,
1136                .dynamic_precedence = action.reduce.dynamic_precedence,
1137                .production_id = action.reduce.production_id,
1138              });
1139            break;
1140          default:
1141            break;
1142        }
1143      }
1144    }
1145
1146    StackVersion reduction_version = STACK_VERSION_NONE;
1147    for (uint32_t j = 0; j < self->reduce_actions.size; j++) {
1148      ReduceAction action = self->reduce_actions.contents[j];
1149
1150      reduction_version = ts_parser__reduce(
1151        self, version, action.symbol, action.count,
1152        action.dynamic_precedence, action.production_id,
1153        true, false
1154      );
1155    }
1156
1157    if (has_shift_action) {
1158      can_shift_lookahead_symbol = true;
1159    } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) {
1160      ts_stack_renumber_version(self->stack, reduction_version, version);
1161      continue;
1162    } else if (lookahead_symbol != 0) {
1163      ts_stack_remove_version(self->stack, version);
1164    }
1165
1166    if (version == starting_version) {
1167      version = version_count;
1168    } else {
1169      version++;
1170    }
1171  }
1172
1173  return can_shift_lookahead_symbol;
1174}
1175
1176static bool ts_parser__recover_to_state(
1177  TSParser *self,
1178  StackVersion version,
1179  unsigned depth,
1180  TSStateId goal_state
1181) {
1182  StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth);
1183  StackVersion previous_version = STACK_VERSION_NONE;
1184
1185  for (unsigned i = 0; i < pop.size; i++) {
1186    StackSlice slice = pop.contents[i];
1187
1188    if (slice.version == previous_version) {
1189      ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1190      array_erase(&pop, i--);
1191      continue;
1192    }
1193
1194    if (ts_stack_state(self->stack, slice.version) != goal_state) {
1195      ts_stack_halt(self->stack, slice.version);
1196      ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1197      array_erase(&pop, i--);
1198      continue;
1199    }
1200
1201    SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version);
1202    if (error_trees.size > 0) {
1203      assert(error_trees.size == 1);
1204      Subtree error_tree = error_trees.contents[0];
1205      uint32_t error_child_count = ts_subtree_child_count(error_tree);
1206      if (error_child_count > 0) {
1207        array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree));
1208        for (unsigned j = 0; j < error_child_count; j++) {
1209          ts_subtree_retain(slice.subtrees.contents[j]);
1210        }
1211      }
1212      ts_subtree_array_delete(&self->tree_pool, &error_trees);
1213    }
1214
1215    ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras);
1216
1217    if (slice.subtrees.size > 0) {
1218      Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
1219      ts_stack_push(self->stack, slice.version, error, false, goal_state);
1220    } else {
1221      array_delete(&slice.subtrees);
1222    }
1223
1224    for (unsigned j = 0; j < self->trailing_extras.size; j++) {
1225      Subtree tree = self->trailing_extras.contents[j];
1226      ts_stack_push(self->stack, slice.version, tree, false, goal_state);
1227    }
1228
1229    previous_version = slice.version;
1230  }
1231
1232  return previous_version != STACK_VERSION_NONE;
1233}
1234
1235static void ts_parser__recover(
1236  TSParser *self,
1237  StackVersion version,
1238  Subtree lookahead
1239) {
1240  bool did_recover = false;
1241  unsigned previous_version_count = ts_stack_version_count(self->stack);
1242  Length position = ts_stack_position(self->stack, version);
1243  StackSummary *summary = ts_stack_get_summary(self->stack, version);
1244  unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version);
1245  unsigned current_error_cost = ts_stack_error_cost(self->stack, version);
1246
1247  // When the parser is in the error state, there are two strategies for recovering with a
1248  // given lookahead token:
1249  // 1. Find a previous state on the stack in which that lookahead token would be valid. Then,
1250  //    create a new stack version that is in that state again. This entails popping all of the
1251  //    subtrees that have been pushed onto the stack since that previous state, and wrapping
1252  //    them in an ERROR node.
1253  // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and
1254  //    move on to the next lookahead token, remaining in the error state.
1255  //
1256  // First, try the strategy 1. Upon entering the error state, the parser recorded a summary
1257  // of the previous parse states and their depths. Look at each state in the summary, to see
1258  // if the current lookahead token would be valid in that state.
1259  if (summary && !ts_subtree_is_error(lookahead)) {
1260    for (unsigned i = 0; i < summary->size; i++) {
1261      StackSummaryEntry entry = summary->contents[i];
1262
1263      if (entry.state == ERROR_STATE) continue;
1264      if (entry.position.bytes == position.bytes) continue;
1265      unsigned depth = entry.depth;
1266      if (node_count_since_error > 0) depth++;
1267
1268      // Do not recover in ways that create redundant stack versions.
1269      bool would_merge = false;
1270      for (unsigned j = 0; j < previous_version_count; j++) {
1271        if (
1272          ts_stack_state(self->stack, j) == entry.state &&
1273          ts_stack_position(self->stack, j).bytes == position.bytes
1274        ) {
1275          would_merge = true;
1276          break;
1277        }
1278      }
1279      if (would_merge) continue;
1280
1281      // Do not recover if the result would clearly be worse than some existing stack version.
1282      unsigned new_cost =
1283        current_error_cost +
1284        entry.depth * ERROR_COST_PER_SKIPPED_TREE +
1285        (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR +
1286        (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
1287      if (ts_parser__better_version_exists(self, version, false, new_cost)) break;
1288
1289      // If the current lookahead token is valid in some previous state, recover to that state.
1290      // Then stop looking for further recoveries.
1291      if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) {
1292        if (ts_parser__recover_to_state(self, version, depth, entry.state)) {
1293          did_recover = true;
1294          LOG("recover_to_previous state:%u, depth:%u", entry.state, depth);
1295          LOG_STACK();
1296          break;
1297        }
1298      }
1299    }
1300  }
1301
1302  // In the process of attempting to recover, some stack versions may have been created
1303  // and subsequently halted. Remove those versions.
1304  for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
1305    if (!ts_stack_is_active(self->stack, i)) {
1306      ts_stack_remove_version(self->stack, i--);
1307    }
1308  }
1309
1310  // If strategy 1 succeeded, a new stack version will have been created which is able to handle
1311  // the current lookahead token. Now, in addition, try strategy 2 described above: skip the
1312  // current lookahead token by wrapping it in an ERROR node.
1313
1314  // Don't pursue this additional strategy if there are already too many stack versions.
1315  if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1316    ts_stack_halt(self->stack, version);
1317    ts_subtree_release(&self->tree_pool, lookahead);
1318    return;
1319  }
1320
1321  if (
1322    did_recover &&
1323    ts_subtree_has_external_scanner_state_change(lookahead)
1324  ) {
1325    ts_stack_halt(self->stack, version);
1326    ts_subtree_release(&self->tree_pool, lookahead);
1327    return;
1328  }
1329
1330  // If the parser is still in the error state at the end of the file, just wrap everything
1331  // in an ERROR node and terminate.
1332  if (ts_subtree_is_eof(lookahead)) {
1333    LOG("recover_eof");
1334    SubtreeArray children = array_new();
1335    Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
1336    ts_stack_push(self->stack, version, parent, false, 1);
1337    ts_parser__accept(self, version, lookahead);
1338    return;
1339  }
1340
1341  // Do not recover if the result would clearly be worse than some existing stack version.
1342  unsigned new_cost =
1343    current_error_cost + ERROR_COST_PER_SKIPPED_TREE +
1344    ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR +
1345    ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE;
1346  if (ts_parser__better_version_exists(self, version, false, new_cost)) {
1347    ts_stack_halt(self->stack, version);
1348    ts_subtree_release(&self->tree_pool, lookahead);
1349    return;
1350  }
1351
1352  // If the current lookahead token is an extra token, mark it as extra. This means it won't
1353  // be counted in error cost calculations.
1354  unsigned n;
1355  const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
1356  if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) {
1357    MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1358    ts_subtree_set_extra(&mutable_lookahead, true);
1359    lookahead = ts_subtree_from_mut(mutable_lookahead);
1360  }
1361
1362  // Wrap the lookahead token in an ERROR.
1363  LOG("skip_token symbol:%s", TREE_NAME(lookahead));
1364  SubtreeArray children = array_new();
1365  array_reserve(&children, 1);
1366  array_push(&children, lookahead);
1367  MutableSubtree error_repeat = ts_subtree_new_node(
1368    ts_builtin_sym_error_repeat,
1369    &children,
1370    0,
1371    self->language
1372  );
1373
1374  // If other tokens have already been skipped, so there is already an ERROR at the top of the
1375  // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger
1376  // ERROR.
1377  if (node_count_since_error > 0) {
1378    StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1);
1379
1380    // TODO: Figure out how to make this condition occur.
1381    // See https://github.com/atom/atom/issues/18450#issuecomment-439579778
1382    // If multiple stack versions have merged at this point, just pick one of the errors
1383    // arbitrarily and discard the rest.
1384    if (pop.size > 1) {
1385      for (unsigned i = 1; i < pop.size; i++) {
1386        ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees);
1387      }
1388      while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) {
1389        ts_stack_remove_version(self->stack, pop.contents[0].version + 1);
1390      }
1391    }
1392
1393    ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
1394    array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
1395    error_repeat = ts_subtree_new_node(
1396      ts_builtin_sym_error_repeat,
1397      &pop.contents[0].subtrees,
1398      0,
1399      self->language
1400    );
1401  }
1402
1403  // Push the new ERROR onto the stack.
1404  ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
1405  if (ts_subtree_has_external_tokens(lookahead)) {
1406    ts_stack_set_last_external_token(
1407      self->stack, version, ts_subtree_last_external_token(lookahead)
1408    );
1409  }
1410}
1411
1412static void ts_parser__handle_error(
1413  TSParser *self,
1414  StackVersion version,
1415  Subtree lookahead
1416) {
1417  uint32_t previous_version_count = ts_stack_version_count(self->stack);
1418
1419  // Perform any reductions that can happen in this state, regardless of the lookahead. After
1420  // skipping one or more invalid tokens, the parser might find a token that would have allowed
1421  // a reduction to take place.
1422  ts_parser__do_all_potential_reductions(self, version, 0);
1423  uint32_t version_count = ts_stack_version_count(self->stack);
1424  Length position = ts_stack_position(self->stack, version);
1425
1426  // Push a discontinuity onto the stack. Merge all of the stack versions that
1427  // were created in the previous step.
1428  bool did_insert_missing_token = false;
1429  for (StackVersion v = version; v < version_count;) {
1430    if (!did_insert_missing_token) {
1431      TSStateId state = ts_stack_state(self->stack, v);
1432      for (
1433        TSSymbol missing_symbol = 1;
1434        missing_symbol < (uint16_t)self->language->token_count;
1435        missing_symbol++
1436      ) {
1437        TSStateId state_after_missing_symbol = ts_language_next_state(
1438          self->language, state, missing_symbol
1439        );
1440        if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
1441          continue;
1442        }
1443
1444        if (ts_language_has_reduce_action(
1445          self->language,
1446          state_after_missing_symbol,
1447          ts_subtree_leaf_symbol(lookahead)
1448        )) {
1449          // In case the parser is currently outside of any included range, the lexer will
1450          // snap to the beginning of the next included range. The missing token's padding
1451          // must be assigned to position it within the next included range.
1452          ts_lexer_reset(&self->lexer, position);
1453          ts_lexer_mark_end(&self->lexer);
1454          Length padding = length_sub(self->lexer.token_end_position, position);
1455          uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead);
1456
1457          StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
1458          Subtree missing_tree = ts_subtree_new_missing_leaf(
1459            &self->tree_pool, missing_symbol,
1460            padding, lookahead_bytes,
1461            self->language
1462          );
1463          ts_stack_push(
1464            self->stack, version_with_missing_tree,
1465            missing_tree, false,
1466            state_after_missing_symbol
1467          );
1468
1469          if (ts_parser__do_all_potential_reductions(
1470            self, version_with_missing_tree,
1471            ts_subtree_leaf_symbol(lookahead)
1472          )) {
1473            LOG(
1474              "recover_with_missing symbol:%s, state:%u",
1475              SYM_NAME(missing_symbol),
1476              ts_stack_state(self->stack, version_with_missing_tree)
1477            );
1478            did_insert_missing_token = true;
1479            break;
1480          }
1481        }
1482      }
1483    }
1484
1485    ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
1486    v = (v == version) ? previous_version_count : v + 1;
1487  }
1488
1489  for (unsigned i = previous_version_count; i < version_count; i++) {
1490    bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
1491    assert(did_merge);
1492    (void)did_merge;	//	fix warning/error with clang -Os
1493  }
1494
1495  ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH);
1496
1497  // Begin recovery with the current lookahead node, rather than waiting for the
1498  // next turn of the parse loop. This ensures that the tree accounts for the
1499  // current lookahead token's "lookahead bytes" value, which describes how far
1500  // the lexer needed to look ahead beyond the content of the token in order to
1501  // recognize it.
1502  if (ts_subtree_child_count(lookahead) > 0) {
1503    ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1504  }
1505  ts_parser__recover(self, version, lookahead);
1506
1507  LOG_STACK();
1508}
1509
1510static bool ts_parser__advance(
1511  TSParser *self,
1512  StackVersion version,
1513  bool allow_node_reuse
1514) {
1515  TSStateId state = ts_stack_state(self->stack, version);
1516  uint32_t position = ts_stack_position(self->stack, version).bytes;
1517  Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
1518
1519  bool did_reuse = true;
1520  Subtree lookahead = NULL_SUBTREE;
1521  TableEntry table_entry = {.action_count = 0};
1522
1523  // If possible, reuse a node from the previous syntax tree.
1524  if (allow_node_reuse) {
1525    lookahead = ts_parser__reuse_node(
1526      self, version, &state, position, last_external_token, &table_entry
1527    );
1528  }
1529
1530  // If no node from the previous syntax tree could be reused, then try to
1531  // reuse the token previously returned by the lexer.
1532  if (!lookahead.ptr) {
1533    did_reuse = false;
1534    lookahead = ts_parser__get_cached_token(
1535      self, state, position, last_external_token, &table_entry
1536    );
1537  }
1538
1539  bool needs_lex = !lookahead.ptr;
1540  for (;;) {
1541    // Otherwise, re-run the lexer.
1542    if (needs_lex) {
1543      needs_lex = false;
1544      lookahead = ts_parser__lex(self, version, state);
1545      if (self->has_scanner_error) return false;
1546
1547      if (lookahead.ptr) {
1548        ts_parser__set_cached_token(self, position, last_external_token, lookahead);
1549        ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
1550      }
1551
1552      // When parsing a non-terminal extra, a null lookahead indicates the
1553      // end of the rule. The reduction is stored in the EOF table entry.
1554      // After the reduction, the lexer needs to be run again.
1555      else {
1556        ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
1557      }
1558    }
1559
1560    // If a cancellation flag or a timeout was provided, then check every
1561    // time a fixed number of parse actions has been processed.
1562    if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
1563      self->operation_count = 0;
1564    }
1565    if (
1566      self->operation_count == 0 &&
1567      ((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
1568       (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
1569    ) {
1570      if (lookahead.ptr) {
1571        ts_subtree_release(&self->tree_pool, lookahead);
1572      }
1573      return false;
1574    }
1575
1576    // Process each parse action for the current lookahead token in
1577    // the current state. If there are multiple actions, then this is
1578    // an ambiguous state. REDUCE actions always create a new stack
1579    // version, whereas SHIFT actions update the existing stack version
1580    // and terminate this loop.
1581    StackVersion last_reduction_version = STACK_VERSION_NONE;
1582    for (uint32_t i = 0; i < table_entry.action_count; i++) {
1583      TSParseAction action = table_entry.actions[i];
1584
1585      switch (action.type) {
1586        case TSParseActionTypeShift: {
1587          if (action.shift.repetition) break;
1588          TSStateId next_state;
1589          if (action.shift.extra) {
1590            next_state = state;
1591            LOG("shift_extra");
1592          } else {
1593            next_state = action.shift.state;
1594            LOG("shift state:%u", next_state);
1595          }
1596
1597          if (ts_subtree_child_count(lookahead) > 0) {
1598            ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
1599            next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
1600          }
1601
1602          ts_parser__shift(self, version, next_state, lookahead, action.shift.extra);
1603          if (did_reuse) reusable_node_advance(&self->reusable_node);
1604          return true;
1605        }
1606
1607        case TSParseActionTypeReduce: {
1608          bool is_fragile = table_entry.action_count > 1;
1609          bool end_of_non_terminal_extra = lookahead.ptr == NULL;
1610          LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count);
1611          StackVersion reduction_version = ts_parser__reduce(
1612            self, version, action.reduce.symbol, action.reduce.child_count,
1613            action.reduce.dynamic_precedence, action.reduce.production_id,
1614            is_fragile, end_of_non_terminal_extra
1615          );
1616          if (reduction_version != STACK_VERSION_NONE) {
1617            last_reduction_version = reduction_version;
1618          }
1619          break;
1620        }
1621
1622        case TSParseActionTypeAccept: {
1623          LOG("accept");
1624          ts_parser__accept(self, version, lookahead);
1625          return true;
1626        }
1627
1628        case TSParseActionTypeRecover: {
1629          if (ts_subtree_child_count(lookahead) > 0) {
1630            ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1631          }
1632
1633          ts_parser__recover(self, version, lookahead);
1634          if (did_reuse) reusable_node_advance(&self->reusable_node);
1635          return true;
1636        }
1637      }
1638    }
1639
1640    // If a reduction was performed, then replace the current stack version
1641    // with one of the stack versions created by a reduction, and continue
1642    // processing this version of the stack with the same lookahead symbol.
1643    if (last_reduction_version != STACK_VERSION_NONE) {
1644      ts_stack_renumber_version(self->stack, last_reduction_version, version);
1645      LOG_STACK();
1646      state = ts_stack_state(self->stack, version);
1647
1648      // At the end of a non-terminal extra rule, the lexer will return a
1649      // null subtree, because the parser needs to perform a fixed reduction
1650      // regardless of the lookahead node. After performing that reduction,
1651      // (and completing the non-terminal extra rule) run the lexer again based
1652      // on the current parse state.
1653      if (!lookahead.ptr) {
1654        needs_lex = true;
1655      } else {
1656        ts_language_table_entry(
1657          self->language,
1658          state,
1659          ts_subtree_leaf_symbol(lookahead),
1660          &table_entry
1661        );
1662      }
1663
1664      continue;
1665    }
1666
1667    // A non-terminal extra rule was reduced and merged into an existing
1668    // stack version. This version can be discarded.
1669    if (!lookahead.ptr) {
1670      ts_stack_halt(self->stack, version);
1671      return true;
1672    }
1673
1674    // If there were no parse actions for the current lookahead token, then
1675    // it is not valid in this state. If the current lookahead token is a
1676    // keyword, then switch to treating it as the normal word token if that
1677    // token is valid in this state.
1678    if (
1679      ts_subtree_is_keyword(lookahead) &&
1680      ts_subtree_symbol(lookahead) != self->language->keyword_capture_token
1681    ) {
1682      ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry);
1683      if (table_entry.action_count > 0) {
1684        LOG(
1685          "switch from_keyword:%s, to_word_token:%s",
1686          TREE_NAME(lookahead),
1687          SYM_NAME(self->language->keyword_capture_token)
1688        );
1689
1690        MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1691        ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
1692        lookahead = ts_subtree_from_mut(mutable_lookahead);
1693        continue;
1694      }
1695    }
1696
1697    // If the current lookahead token is not valid and the parser is
1698    // already in the error state, restart the error recovery process.
1699    // TODO - can this be unified with the other `RECOVER` case above?
1700    if (state == ERROR_STATE) {
1701      ts_parser__recover(self, version, lookahead);
1702      return true;
1703    }
1704
1705    // If the current lookahead token is not valid and the previous
1706    // subtree on the stack was reused from an old tree, it isn't actually
1707    // valid to reuse it. Remove it from the stack, and in its place,
1708    // push each of its children. Then try again to process the current
1709    // lookahead.
1710    if (ts_parser__breakdown_top_of_stack(self, version)) {
1711      state = ts_stack_state(self->stack, version);
1712      ts_subtree_release(&self->tree_pool, lookahead);
1713      needs_lex = true;
1714      continue;
1715    }
1716
1717    // At this point, the current lookahead token is definitely not valid
1718    // for this parse stack version. Mark this version as paused and continue
1719    // processing any other stack versions that might exist. If some other
1720    // version advances successfully, then this version can simply be removed.
1721    // But if all versions end up paused, then error recovery is needed.
1722    LOG("detect_error");
1723    ts_stack_pause(self->stack, version, lookahead);
1724    return true;
1725  }
1726}
1727
1728static unsigned ts_parser__condense_stack(TSParser *self) {
1729  bool made_changes = false;
1730  unsigned min_error_cost = UINT_MAX;
1731  for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
1732    // Prune any versions that have been marked for removal.
1733    if (ts_stack_is_halted(self->stack, i)) {
1734      ts_stack_remove_version(self->stack, i);
1735      i--;
1736      continue;
1737    }
1738
1739    // Keep track of the minimum error cost of any stack version so
1740    // that it can be returned.
1741    ErrorStatus status_i = ts_parser__version_status(self, i);
1742    if (!status_i.is_in_error && status_i.cost < min_error_cost) {
1743      min_error_cost = status_i.cost;
1744    }
1745
1746    // Examine each pair of stack versions, removing any versions that
1747    // are clearly worse than another version. Ensure that the versions
1748    // are ordered from most promising to least promising.
1749    for (StackVersion j = 0; j < i; j++) {
1750      ErrorStatus status_j = ts_parser__version_status(self, j);
1751
1752      switch (ts_parser__compare_versions(self, status_j, status_i)) {
1753        case ErrorComparisonTakeLeft:
1754          made_changes = true;
1755          ts_stack_remove_version(self->stack, i);
1756          i--;
1757          j = i;
1758          break;
1759
1760        case ErrorComparisonPreferLeft:
1761        case ErrorComparisonNone:
1762          if (ts_stack_merge(self->stack, j, i)) {
1763            made_changes = true;
1764            i--;
1765            j = i;
1766          }
1767          break;
1768
1769        case ErrorComparisonPreferRight:
1770          made_changes = true;
1771          if (ts_stack_merge(self->stack, j, i)) {
1772            i--;
1773            j = i;
1774          } else {
1775            ts_stack_swap_versions(self->stack, i, j);
1776          }
1777          break;
1778
1779        case ErrorComparisonTakeRight:
1780          made_changes = true;
1781          ts_stack_remove_version(self->stack, j);
1782          i--;
1783          j--;
1784          break;
1785      }
1786    }
1787  }
1788
1789  // Enforce a hard upper bound on the number of stack versions by
1790  // discarding the least promising versions.
1791  while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1792    ts_stack_remove_version(self->stack, MAX_VERSION_COUNT);
1793    made_changes = true;
1794  }
1795
1796  // If the best-performing stack version is currently paused, or all
1797  // versions are paused, then resume the best paused version and begin
1798  // the error recovery process. Otherwise, remove the paused versions.
1799  if (ts_stack_version_count(self->stack) > 0) {
1800    bool has_unpaused_version = false;
1801    for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
1802      if (ts_stack_is_paused(self->stack, i)) {
1803        if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) {
1804          LOG("resume version:%u", i);
1805          min_error_cost = ts_stack_error_cost(self->stack, i);
1806          Subtree lookahead = ts_stack_resume(self->stack, i);
1807          ts_parser__handle_error(self, i, lookahead);
1808          has_unpaused_version = true;
1809        } else {
1810          ts_stack_remove_version(self->stack, i);
1811          i--;
1812          n--;
1813        }
1814      } else {
1815        has_unpaused_version = true;
1816      }
1817    }
1818  }
1819
1820  if (made_changes) {
1821    LOG("condense");
1822    LOG_STACK();
1823  }
1824
1825  return min_error_cost;
1826}
1827
1828static bool ts_parser_has_outstanding_parse(TSParser *self) {
1829  return (
1830    self->external_scanner_payload ||
1831    ts_stack_state(self->stack, 0) != 1 ||
1832    ts_stack_node_count_since_error(self->stack, 0) != 0
1833  );
1834}
1835
1836// Parser - Public
1837
1838TSParser *ts_parser_new(void) {
1839  TSParser *self = ts_calloc(1, sizeof(TSParser));
1840  ts_lexer_init(&self->lexer);
1841  array_init(&self->reduce_actions);
1842  array_reserve(&self->reduce_actions, 4);
1843  self->tree_pool = ts_subtree_pool_new(32);
1844  self->stack = ts_stack_new(&self->tree_pool);
1845  self->finished_tree = NULL_SUBTREE;
1846  self->reusable_node = reusable_node_new();
1847  self->dot_graph_file = NULL;
1848  self->cancellation_flag = NULL;
1849  self->timeout_duration = 0;
1850  self->language = NULL;
1851  self->has_scanner_error = false;
1852  self->external_scanner_payload = NULL;
1853  self->end_clock = clock_null();
1854  self->operation_count = 0;
1855  self->old_tree = NULL_SUBTREE;
1856  self->included_range_differences = (TSRangeArray) array_new();
1857  self->included_range_difference_index = 0;
1858  ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1859  return self;
1860}
1861
1862void ts_parser_delete(TSParser *self) {
1863  if (!self) return;
1864
1865  ts_parser_set_language(self, NULL);
1866  ts_stack_delete(self->stack);
1867  if (self->reduce_actions.contents) {
1868    array_delete(&self->reduce_actions);
1869  }
1870  if (self->included_range_differences.contents) {
1871    array_delete(&self->included_range_differences);
1872  }
1873  if (self->old_tree.ptr) {
1874    ts_subtree_release(&self->tree_pool, self->old_tree);
1875    self->old_tree = NULL_SUBTREE;
1876  }
1877  ts_wasm_store_delete(self->wasm_store);
1878  ts_lexer_delete(&self->lexer);
1879  ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1880  ts_subtree_pool_delete(&self->tree_pool);
1881  reusable_node_delete(&self->reusable_node);
1882  array_delete(&self->trailing_extras);
1883  array_delete(&self->trailing_extras2);
1884  array_delete(&self->scratch_trees);
1885  ts_free(self);
1886}
1887
1888const TSLanguage *ts_parser_language(const TSParser *self) {
1889  return self->language;
1890}
1891
1892bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
1893  ts_parser_reset(self);
1894  ts_language_delete(self->language);
1895  self->language = NULL;
1896
1897  if (language) {
1898    if (
1899      language->version > TREE_SITTER_LANGUAGE_VERSION ||
1900      language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
1901    ) return false;
1902
1903    if (ts_language_is_wasm(language)) {
1904      if (
1905        !self->wasm_store ||
1906        !ts_wasm_store_start(self->wasm_store, &self->lexer.data, language)
1907      ) return false;
1908    }
1909  }
1910
1911  self->language = ts_language_copy(language);
1912  return true;
1913}
1914
1915TSLogger ts_parser_logger(const TSParser *self) {
1916  return self->lexer.logger;
1917}
1918
1919void ts_parser_set_logger(TSParser *self, TSLogger logger) {
1920  self->lexer.logger = logger;
1921}
1922
1923void ts_parser_print_dot_graphs(TSParser *self, int fd) {
1924  if (self->dot_graph_file) {
1925    fclose(self->dot_graph_file);
1926  }
1927
1928  if (fd >= 0) {
1929    #ifdef _WIN32
1930    self->dot_graph_file = _fdopen(fd, "a");
1931    #else
1932    self->dot_graph_file = fdopen(fd, "a");
1933    #endif
1934  } else {
1935    self->dot_graph_file = NULL;
1936  }
1937}
1938
1939const size_t *ts_parser_cancellation_flag(const TSParser *self) {
1940  return (const size_t *)self->cancellation_flag;
1941}
1942
1943void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) {
1944  self->cancellation_flag = (const volatile size_t *)flag;
1945}
1946
1947uint64_t ts_parser_timeout_micros(const TSParser *self) {
1948  return duration_to_micros(self->timeout_duration);
1949}
1950
1951void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
1952  self->timeout_duration = duration_from_micros(timeout_micros);
1953}
1954
1955bool ts_parser_set_included_ranges(
1956  TSParser *self,
1957  const TSRange *ranges,
1958  uint32_t count
1959) {
1960  return ts_lexer_set_included_ranges(&self->lexer, ranges, count);
1961}
1962
1963const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) {
1964  return ts_lexer_included_ranges(&self->lexer, count);
1965}
1966
1967void ts_parser_reset(TSParser *self) {
1968  ts_parser__external_scanner_destroy(self);
1969  if (self->wasm_store) {
1970    ts_wasm_store_reset(self->wasm_store);
1971  }
1972
1973  if (self->old_tree.ptr) {
1974    ts_subtree_release(&self->tree_pool, self->old_tree);
1975    self->old_tree = NULL_SUBTREE;
1976  }
1977
1978  reusable_node_clear(&self->reusable_node);
1979  ts_lexer_reset(&self->lexer, length_zero());
1980  ts_stack_clear(self->stack);
1981  ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1982  if (self->finished_tree.ptr) {
1983    ts_subtree_release(&self->tree_pool, self->finished_tree);
1984    self->finished_tree = NULL_SUBTREE;
1985  }
1986  self->accept_count = 0;
1987  self->has_scanner_error = false;
1988}
1989
1990TSTree *ts_parser_parse(
1991  TSParser *self,
1992  const TSTree *old_tree,
1993  TSInput input
1994) {
1995  TSTree *result = NULL;
1996  if (!self->language || !input.read) return NULL;
1997
1998  if (ts_language_is_wasm(self->language)) {
1999    if (!self->wasm_store) return NULL;
2000    ts_wasm_store_start(self->wasm_store, &self->lexer.data, self->language);
2001  }
2002
2003  ts_lexer_set_input(&self->lexer, input);
2004  array_clear(&self->included_range_differences);
2005  self->included_range_difference_index = 0;
2006
2007  if (ts_parser_has_outstanding_parse(self)) {
2008    LOG("resume_parsing");
2009  } else {
2010    ts_parser__external_scanner_create(self);
2011    if (self->has_scanner_error) goto exit;
2012
2013    if (old_tree) {
2014      ts_subtree_retain(old_tree->root);
2015      self->old_tree = old_tree->root;
2016      ts_range_array_get_changed_ranges(
2017        old_tree->included_ranges, old_tree->included_range_count,
2018        self->lexer.included_ranges, self->lexer.included_range_count,
2019        &self->included_range_differences
2020      );
2021      reusable_node_reset(&self->reusable_node, old_tree->root);
2022      LOG("parse_after_edit");
2023      LOG_TREE(self->old_tree);
2024      for (unsigned i = 0; i < self->included_range_differences.size; i++) {
2025        TSRange *range = &self->included_range_differences.contents[i];
2026        LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
2027      }
2028    } else {
2029      reusable_node_clear(&self->reusable_node);
2030      LOG("new_parse");
2031    }
2032  }
2033
2034  self->operation_count = 0;
2035  if (self->timeout_duration) {
2036    self->end_clock = clock_after(clock_now(), self->timeout_duration);
2037  } else {
2038    self->end_clock = clock_null();
2039  }
2040
2041  uint32_t position = 0, last_position = 0, version_count = 0;
2042  do {
2043    for (
2044      StackVersion version = 0;
2045      version_count = ts_stack_version_count(self->stack),
2046      version < version_count;
2047      version++
2048    ) {
2049      bool allow_node_reuse = version_count == 1;
2050      while (ts_stack_is_active(self->stack, version)) {
2051        LOG(
2052          "process version:%u, version_count:%u, state:%d, row:%u, col:%u",
2053          version,
2054          ts_stack_version_count(self->stack),
2055          ts_stack_state(self->stack, version),
2056          ts_stack_position(self->stack, version).extent.row,
2057          ts_stack_position(self->stack, version).extent.column
2058        );
2059
2060        if (!ts_parser__advance(self, version, allow_node_reuse)) {
2061          if (self->has_scanner_error) goto exit;
2062          return NULL;
2063        }
2064
2065        LOG_STACK();
2066
2067        position = ts_stack_position(self->stack, version).bytes;
2068        if (position > last_position || (version > 0 && position == last_position)) {
2069          last_position = position;
2070          break;
2071        }
2072      }
2073    }
2074
2075    // After advancing each version of the stack, re-sort the versions by their cost,
2076    // removing any versions that are no longer worth pursuing.
2077    unsigned min_error_cost = ts_parser__condense_stack(self);
2078
2079    // If there's already a finished parse tree that's better than any in-progress version,
2080    // then terminate parsing. Clear the parse stack to remove any extra references to subtrees
2081    // within the finished tree, ensuring that these subtrees can be safely mutated in-place
2082    // for rebalancing.
2083    if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
2084      ts_stack_clear(self->stack);
2085      break;
2086    }
2087
2088    while (self->included_range_difference_index < self->included_range_differences.size) {
2089      TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index];
2090      if (range->end_byte <= position) {
2091        self->included_range_difference_index++;
2092      } else {
2093        break;
2094      }
2095    }
2096  } while (version_count != 0);
2097
2098  assert(self->finished_tree.ptr);
2099  ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
2100  LOG("done");
2101  LOG_TREE(self->finished_tree);
2102
2103  result = ts_tree_new(
2104    self->finished_tree,
2105    self->language,
2106    self->lexer.included_ranges,
2107    self->lexer.included_range_count
2108  );
2109  self->finished_tree = NULL_SUBTREE;
2110
2111exit:
2112  ts_parser_reset(self);
2113  return result;
2114}
2115
2116TSTree *ts_parser_parse_string(
2117  TSParser *self,
2118  const TSTree *old_tree,
2119  const char *string,
2120  uint32_t length
2121) {
2122  return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8);
2123}
2124
2125TSTree *ts_parser_parse_string_encoding(
2126  TSParser *self,
2127  const TSTree *old_tree,
2128  const char *string,
2129  uint32_t length,
2130  TSInputEncoding encoding
2131) {
2132  TSStringInput input = {string, length};
2133  return ts_parser_parse(self, old_tree, (TSInput) {
2134    &input,
2135    ts_string_input_read,
2136    encoding,
2137  });
2138}
2139
2140void ts_parser_set_wasm_store(TSParser *self, TSWasmStore *store) {
2141  ts_wasm_store_delete(self->wasm_store);
2142  self->wasm_store = store;
2143}
2144
2145TSWasmStore *ts_parser_take_wasm_store(TSParser *self) {
2146  TSWasmStore *result = self->wasm_store;
2147  self->wasm_store = NULL;
2148  return result;
2149}
2150
2151#undef LOG