1#include <time.h>
   2#include <assert.h>
   3#include <stdio.h>
   4#include <limits.h>
   5#include <stdbool.h>
   6#include "tree_sitter/api.h"
   7#include "./alloc.h"
   8#include "./array.h"
   9#include "./atomic.h"
  10#include "./clock.h"
  11#include "./error_costs.h"
  12#include "./get_changed_ranges.h"
  13#include "./language.h"
  14#include "./length.h"
  15#include "./lexer.h"
  16#include "./reduce_action.h"
  17#include "./reusable_node.h"
  18#include "./stack.h"
  19#include "./subtree.h"
  20#include "./tree.h"
  21
  22#define LOG(...)                                                                            \
  23  if (self->lexer.logger.log || self->dot_graph_file) {                                     \
  24    snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
  25    ts_parser__log(self);                                                                   \
  26  }
  27
  28#define LOG_LOOKAHEAD(symbol_name, size)                      \
  29  if (self->lexer.logger.log || self->dot_graph_file) {       \
  30    char *buf = self->lexer.debug_buffer;                     \
  31    const char *symbol = symbol_name;                         \
  32    int off = sprintf(buf, "lexed_lookahead sym:");           \
  33    for (                                                     \
  34      int i = 0;                                              \
  35      symbol[i] != '\0'                                       \
  36      && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;         \
  37      i++                                                     \
  38    ) {                                                       \
  39      switch (symbol[i]) {                                    \
  40      case '\t': buf[off++] = '\\'; buf[off++] = 't'; break;  \
  41      case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break;  \
  42      case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break;  \
  43      case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break;  \
  44      case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break;  \
  45      case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \
  46      default:   buf[off++] = symbol[i]; break;               \
  47      }                                                       \
  48    }                                                         \
  49    snprintf(                                                 \
  50      buf + off,                                              \
  51      TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off,            \
  52      ", size:%u",                                            \
  53      size                                                    \
  54    );                                                        \
  55    ts_parser__log(self);                                     \
  56  }
  57
  58#define LOG_STACK()                                                              \
  59  if (self->dot_graph_file) {                                                    \
  60    ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \
  61    fputs("\n\n", self->dot_graph_file);                                         \
  62  }
  63
  64#define LOG_TREE(tree)                                                      \
  65  if (self->dot_graph_file) {                                               \
  66    ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
  67    fputs("\n", self->dot_graph_file);                                      \
  68  }
  69
  70#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
  71
  72#define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree))
  73
  74static const unsigned MAX_VERSION_COUNT = 6;
  75static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
  76static const unsigned MAX_SUMMARY_DEPTH = 16;
  77static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
  78static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
  79
  80typedef struct {
  81  Subtree token;
  82  Subtree last_external_token;
  83  uint32_t byte_index;
  84} TokenCache;
  85
  86struct TSParser {
  87  Lexer lexer;
  88  Stack *stack;
  89  SubtreePool tree_pool;
  90  const TSLanguage *language;
  91  ReduceActionSet reduce_actions;
  92  Subtree finished_tree;
  93  SubtreeArray trailing_extras;
  94  SubtreeArray trailing_extras2;
  95  SubtreeArray scratch_trees;
  96  TokenCache token_cache;
  97  ReusableNode reusable_node;
  98  void *external_scanner_payload;
  99  FILE *dot_graph_file;
 100  TSClock end_clock;
 101  TSDuration timeout_duration;
 102  unsigned accept_count;
 103  unsigned operation_count;
 104  const volatile size_t *cancellation_flag;
 105  Subtree old_tree;
 106  TSRangeArray included_range_differences;
 107  unsigned included_range_difference_index;
 108};
 109
 110typedef struct {
 111  unsigned cost;
 112  unsigned node_count;
 113  int dynamic_precedence;
 114  bool is_in_error;
 115} ErrorStatus;
 116
 117typedef enum {
 118  ErrorComparisonTakeLeft,
 119  ErrorComparisonPreferLeft,
 120  ErrorComparisonNone,
 121  ErrorComparisonPreferRight,
 122  ErrorComparisonTakeRight,
 123} ErrorComparison;
 124
 125typedef struct {
 126  const char *string;
 127  uint32_t length;
 128} TSStringInput;
 129
 130// StringInput
 131
 132static const char *ts_string_input_read(
 133  void *_self,
 134  uint32_t byte,
 135  TSPoint point,
 136  uint32_t *length
 137) {
 138  (void)point;
 139  TSStringInput *self = (TSStringInput *)_self;
 140  if (byte >= self->length) {
 141    *length = 0;
 142    return "";
 143  } else {
 144    *length = self->length - byte;
 145    return self->string + byte;
 146  }
 147}
 148
 149// Parser - Private
 150
 151static void ts_parser__log(TSParser *self) {
 152  if (self->lexer.logger.log) {
 153    self->lexer.logger.log(
 154      self->lexer.logger.payload,
 155      TSLogTypeParse,
 156      self->lexer.debug_buffer
 157    );
 158  }
 159
 160  if (self->dot_graph_file) {
 161    fprintf(self->dot_graph_file, "graph {\nlabel=\"");
 162    for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) {
 163      if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file);
 164      fputc(*chr, self->dot_graph_file);
 165    }
 166    fprintf(self->dot_graph_file, "\"\n}\n\n");
 167  }
 168}
 169
 170static bool ts_parser__breakdown_top_of_stack(
 171  TSParser *self,
 172  StackVersion version
 173) {
 174  bool did_break_down = false;
 175  bool pending = false;
 176
 177  do {
 178    StackSliceArray pop = ts_stack_pop_pending(self->stack, version);
 179    if (!pop.size) break;
 180
 181    did_break_down = true;
 182    pending = false;
 183    for (uint32_t i = 0; i < pop.size; i++) {
 184      StackSlice slice = pop.contents[i];
 185      TSStateId state = ts_stack_state(self->stack, slice.version);
 186      Subtree parent = *array_front(&slice.subtrees);
 187
 188      for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
 189        Subtree child = ts_subtree_children(parent)[j];
 190        pending = ts_subtree_child_count(child) > 0;
 191
 192        if (ts_subtree_is_error(child)) {
 193          state = ERROR_STATE;
 194        } else if (!ts_subtree_extra(child)) {
 195          state = ts_language_next_state(self->language, state, ts_subtree_symbol(child));
 196        }
 197
 198        ts_subtree_retain(child);
 199        ts_stack_push(self->stack, slice.version, child, pending, state);
 200      }
 201
 202      for (uint32_t j = 1; j < slice.subtrees.size; j++) {
 203        Subtree tree = slice.subtrees.contents[j];
 204        ts_stack_push(self->stack, slice.version, tree, false, state);
 205      }
 206
 207      ts_subtree_release(&self->tree_pool, parent);
 208      array_delete(&slice.subtrees);
 209
 210      LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent));
 211      LOG_STACK();
 212    }
 213  } while (pending);
 214
 215  return did_break_down;
 216}
 217
 218static void ts_parser__breakdown_lookahead(
 219  TSParser *self,
 220  Subtree *lookahead,
 221  TSStateId state,
 222  ReusableNode *reusable_node
 223) {
 224  bool did_descend = false;
 225  Subtree tree = reusable_node_tree(reusable_node);
 226  while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) {
 227    LOG("state_mismatch sym:%s", TREE_NAME(tree));
 228    reusable_node_descend(reusable_node);
 229    tree = reusable_node_tree(reusable_node);
 230    did_descend = true;
 231  }
 232
 233  if (did_descend) {
 234    ts_subtree_release(&self->tree_pool, *lookahead);
 235    *lookahead = tree;
 236    ts_subtree_retain(*lookahead);
 237  }
 238}
 239
 240static ErrorComparison ts_parser__compare_versions(
 241  TSParser *self,
 242  ErrorStatus a,
 243  ErrorStatus b
 244) {
 245  (void)self;
 246  if (!a.is_in_error && b.is_in_error) {
 247    if (a.cost < b.cost) {
 248      return ErrorComparisonTakeLeft;
 249    } else {
 250      return ErrorComparisonPreferLeft;
 251    }
 252  }
 253
 254  if (a.is_in_error && !b.is_in_error) {
 255    if (b.cost < a.cost) {
 256      return ErrorComparisonTakeRight;
 257    } else {
 258      return ErrorComparisonPreferRight;
 259    }
 260  }
 261
 262  if (a.cost < b.cost) {
 263    if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) {
 264      return ErrorComparisonTakeLeft;
 265    } else {
 266      return ErrorComparisonPreferLeft;
 267    }
 268  }
 269
 270  if (b.cost < a.cost) {
 271    if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) {
 272      return ErrorComparisonTakeRight;
 273    } else {
 274      return ErrorComparisonPreferRight;
 275    }
 276  }
 277
 278  if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
 279  if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
 280  return ErrorComparisonNone;
 281}
 282
 283static ErrorStatus ts_parser__version_status(
 284  TSParser *self,
 285  StackVersion version
 286) {
 287  unsigned cost = ts_stack_error_cost(self->stack, version);
 288  bool is_paused = ts_stack_is_paused(self->stack, version);
 289  if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE;
 290  return (ErrorStatus) {
 291    .cost = cost,
 292    .node_count = ts_stack_node_count_since_error(self->stack, version),
 293    .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
 294    .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE
 295  };
 296}
 297
 298static bool ts_parser__better_version_exists(
 299  TSParser *self,
 300  StackVersion version,
 301  bool is_in_error,
 302  unsigned cost
 303) {
 304  if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) {
 305    return true;
 306  }
 307
 308  Length position = ts_stack_position(self->stack, version);
 309  ErrorStatus status = {
 310    .cost = cost,
 311    .is_in_error = is_in_error,
 312    .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
 313    .node_count = ts_stack_node_count_since_error(self->stack, version),
 314  };
 315
 316  for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
 317    if (i == version ||
 318        !ts_stack_is_active(self->stack, i) ||
 319        ts_stack_position(self->stack, i).bytes < position.bytes) continue;
 320    ErrorStatus status_i = ts_parser__version_status(self, i);
 321    switch (ts_parser__compare_versions(self, status, status_i)) {
 322      case ErrorComparisonTakeRight:
 323        return true;
 324      case ErrorComparisonPreferRight:
 325        if (ts_stack_can_merge(self->stack, i, version)) return true;
 326        break;
 327      default:
 328        break;
 329    }
 330  }
 331
 332  return false;
 333}
 334
 335static void ts_parser__restore_external_scanner(
 336  TSParser *self,
 337  Subtree external_token
 338) {
 339  if (external_token.ptr) {
 340    self->language->external_scanner.deserialize(
 341      self->external_scanner_payload,
 342      ts_external_scanner_state_data(&external_token.ptr->external_scanner_state),
 343      external_token.ptr->external_scanner_state.length
 344    );
 345  } else {
 346    self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
 347  }
 348}
 349
 350static bool ts_parser__can_reuse_first_leaf(
 351  TSParser *self,
 352  TSStateId state,
 353  Subtree tree,
 354  TableEntry *table_entry
 355) {
 356  TSLexMode current_lex_mode = self->language->lex_modes[state];
 357  TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree);
 358  TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
 359  TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state];
 360
 361  // At the end of a non-terminal extra node, the lexer normally returns
 362  // NULL, which indicates that the parser should look for a reduce action
 363  // at symbol `0`. Avoid reusing tokens in this situation to ensure that
 364  // the same thing happens when incrementally reparsing.
 365  if (current_lex_mode.lex_state == (uint16_t)(-1)) return false;
 366
 367  // If the token was created in a state with the same set of lookaheads, it is reusable.
 368  if (
 369    table_entry->action_count > 0 &&
 370    memcmp(&leaf_lex_mode, &current_lex_mode, sizeof(TSLexMode)) == 0 &&
 371    (
 372      leaf_symbol != self->language->keyword_capture_token ||
 373      (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state)
 374    )
 375  ) return true;
 376
 377  // Empty tokens are not reusable in states with different lookaheads.
 378  if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false;
 379
 380  // If the current state allows external tokens or other tokens that conflict with this
 381  // token, this token is not reusable.
 382  return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
 383}
 384
 385static Subtree ts_parser__lex(
 386  TSParser *self,
 387  StackVersion version,
 388  TSStateId parse_state
 389) {
 390  TSLexMode lex_mode = self->language->lex_modes[parse_state];
 391  if (lex_mode.lex_state == (uint16_t)-1) {
 392    LOG("no_lookahead_after_non_terminal_extra");
 393    return NULL_SUBTREE;
 394  }
 395
 396  const Length start_position = ts_stack_position(self->stack, version);
 397  const Subtree external_token = ts_stack_last_external_token(self->stack, version);
 398  const bool *valid_external_tokens = ts_language_enabled_external_tokens(
 399    self->language,
 400    lex_mode.external_lex_state
 401  );
 402
 403  bool found_external_token = false;
 404  bool error_mode = parse_state == ERROR_STATE;
 405  bool skipped_error = false;
 406  bool called_get_column = false;
 407  int32_t first_error_character = 0;
 408  Length error_start_position = length_zero();
 409  Length error_end_position = length_zero();
 410  uint32_t lookahead_end_byte = 0;
 411  uint32_t external_scanner_state_len = 0;
 412  bool external_scanner_state_changed = false;
 413  ts_lexer_reset(&self->lexer, start_position);
 414
 415  for (;;) {
 416    Length current_position = self->lexer.current_position;
 417
 418    if (valid_external_tokens) {
 419      LOG(
 420        "lex_external state:%d, row:%u, column:%u",
 421        lex_mode.external_lex_state,
 422        current_position.extent.row,
 423        current_position.extent.column
 424      );
 425      ts_lexer_start(&self->lexer);
 426      ts_parser__restore_external_scanner(self, external_token);
 427      bool found_token = self->language->external_scanner.scan(
 428        self->external_scanner_payload,
 429        &self->lexer.data,
 430        valid_external_tokens
 431      );
 432      ts_lexer_finish(&self->lexer, &lookahead_end_byte);
 433
 434      if (found_token) {
 435        external_scanner_state_len = self->language->external_scanner.serialize(
 436          self->external_scanner_payload,
 437          self->lexer.debug_buffer
 438        );
 439        external_scanner_state_changed = !ts_external_scanner_state_eq(
 440          ts_subtree_external_scanner_state(external_token),
 441          self->lexer.debug_buffer,
 442          external_scanner_state_len
 443        );
 444
 445        // When recovering from an error, ignore any zero-length external tokens
 446        // unless they have changed the external scanner's state. This helps to
 447        // avoid infinite loops which could otherwise occur, because the lexer is
 448        // looking for any possible token, instead of looking for the specific set of
 449        // tokens that are valid in some parse state.
 450        //
 451        // Note that it's possible that the token end position may be *before* the
 452        // original position of the lexer because of the way that tokens are positioned
 453        // at included range boundaries: when a token is terminated at the start of
 454        // an included range, it is marked as ending at the *end* of the preceding
 455        // included range.
 456        if (
 457          self->lexer.token_end_position.bytes <= current_position.bytes &&
 458          (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) &&
 459          !external_scanner_state_changed
 460        ) {
 461          LOG(
 462            "ignore_empty_external_token symbol:%s",
 463            SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol])
 464          )
 465          found_token = false;
 466        }
 467      }
 468
 469      if (found_token) {
 470        found_external_token = true;
 471        called_get_column = self->lexer.did_get_column;
 472        break;
 473      }
 474
 475      ts_lexer_reset(&self->lexer, current_position);
 476    }
 477
 478    LOG(
 479      "lex_internal state:%d, row:%u, column:%u",
 480      lex_mode.lex_state,
 481      current_position.extent.row,
 482      current_position.extent.column
 483    );
 484    ts_lexer_start(&self->lexer);
 485    bool found_token = self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
 486    ts_lexer_finish(&self->lexer, &lookahead_end_byte);
 487    if (found_token) break;
 488
 489    if (!error_mode) {
 490      error_mode = true;
 491      lex_mode = self->language->lex_modes[ERROR_STATE];
 492      valid_external_tokens = ts_language_enabled_external_tokens(
 493        self->language,
 494        lex_mode.external_lex_state
 495      );
 496      ts_lexer_reset(&self->lexer, start_position);
 497      continue;
 498    }
 499
 500    if (!skipped_error) {
 501      LOG("skip_unrecognized_character");
 502      skipped_error = true;
 503      error_start_position = self->lexer.token_start_position;
 504      error_end_position = self->lexer.token_start_position;
 505      first_error_character = self->lexer.data.lookahead;
 506    }
 507
 508    if (self->lexer.current_position.bytes == error_end_position.bytes) {
 509      if (self->lexer.data.eof(&self->lexer.data)) {
 510        self->lexer.data.result_symbol = ts_builtin_sym_error;
 511        break;
 512      }
 513      self->lexer.data.advance(&self->lexer.data, false);
 514    }
 515
 516    error_end_position = self->lexer.current_position;
 517  }
 518
 519  Subtree result;
 520  if (skipped_error) {
 521    Length padding = length_sub(error_start_position, start_position);
 522    Length size = length_sub(error_end_position, error_start_position);
 523    uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes;
 524    result = ts_subtree_new_error(
 525      &self->tree_pool,
 526      first_error_character,
 527      padding,
 528      size,
 529      lookahead_bytes,
 530      parse_state,
 531      self->language
 532    );
 533  } else {
 534    bool is_keyword = false;
 535    TSSymbol symbol = self->lexer.data.result_symbol;
 536    Length padding = length_sub(self->lexer.token_start_position, start_position);
 537    Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
 538    uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes;
 539
 540    if (found_external_token) {
 541      symbol = self->language->external_scanner.symbol_map[symbol];
 542    } else if (symbol == self->language->keyword_capture_token && symbol != 0) {
 543      uint32_t end_byte = self->lexer.token_end_position.bytes;
 544      ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
 545      ts_lexer_start(&self->lexer);
 546      if (
 547        self->language->keyword_lex_fn(&self->lexer.data, 0) &&
 548        self->lexer.token_end_position.bytes == end_byte &&
 549        ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol)
 550      ) {
 551        is_keyword = true;
 552        symbol = self->lexer.data.result_symbol;
 553      }
 554    }
 555
 556    result = ts_subtree_new_leaf(
 557      &self->tree_pool,
 558      symbol,
 559      padding,
 560      size,
 561      lookahead_bytes,
 562      parse_state,
 563      found_external_token,
 564      called_get_column,
 565      is_keyword,
 566      self->language
 567    );
 568
 569    if (found_external_token) {
 570      MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result);
 571      ts_external_scanner_state_init(
 572        &mut_result.ptr->external_scanner_state,
 573        self->lexer.debug_buffer,
 574        external_scanner_state_len
 575      );
 576      mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed;
 577    }
 578  }
 579
 580  LOG_LOOKAHEAD(
 581    SYM_NAME(ts_subtree_symbol(result)),
 582    ts_subtree_total_size(result).bytes
 583  );
 584  return result;
 585}
 586
 587static Subtree ts_parser__get_cached_token(
 588  TSParser *self,
 589  TSStateId state,
 590  size_t position,
 591  Subtree last_external_token,
 592  TableEntry *table_entry
 593) {
 594  TokenCache *cache = &self->token_cache;
 595  if (
 596    cache->token.ptr && cache->byte_index == position &&
 597    ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)
 598  ) {
 599    ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry);
 600    if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) {
 601      ts_subtree_retain(cache->token);
 602      return cache->token;
 603    }
 604  }
 605  return NULL_SUBTREE;
 606}
 607
 608static void ts_parser__set_cached_token(
 609  TSParser *self,
 610  uint32_t byte_index,
 611  Subtree last_external_token,
 612  Subtree token
 613) {
 614  TokenCache *cache = &self->token_cache;
 615  if (token.ptr) ts_subtree_retain(token);
 616  if (last_external_token.ptr) ts_subtree_retain(last_external_token);
 617  if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token);
 618  if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token);
 619  cache->token = token;
 620  cache->byte_index = byte_index;
 621  cache->last_external_token = last_external_token;
 622}
 623
 624static bool ts_parser__has_included_range_difference(
 625  const TSParser *self,
 626  uint32_t start_position,
 627  uint32_t end_position
 628) {
 629  return ts_range_array_intersects(
 630    &self->included_range_differences,
 631    self->included_range_difference_index,
 632    start_position,
 633    end_position
 634  );
 635}
 636
 637static Subtree ts_parser__reuse_node(
 638  TSParser *self,
 639  StackVersion version,
 640  TSStateId *state,
 641  uint32_t position,
 642  Subtree last_external_token,
 643  TableEntry *table_entry
 644) {
 645  Subtree result;
 646  while ((result = reusable_node_tree(&self->reusable_node)).ptr) {
 647    uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
 648    uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
 649
 650    // Do not reuse an EOF node if the included ranges array has changes
 651    // later on in the file.
 652    if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX;
 653
 654    if (byte_offset > position) {
 655      LOG("before_reusable_node symbol:%s", TREE_NAME(result));
 656      break;
 657    }
 658
 659    if (byte_offset < position) {
 660      LOG("past_reusable_node symbol:%s", TREE_NAME(result));
 661      if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) {
 662        reusable_node_advance(&self->reusable_node);
 663      }
 664      continue;
 665    }
 666
 667    if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) {
 668      LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result));
 669      reusable_node_advance(&self->reusable_node);
 670      continue;
 671    }
 672
 673    const char *reason = NULL;
 674    if (ts_subtree_has_changes(result)) {
 675      reason = "has_changes";
 676    } else if (ts_subtree_is_error(result)) {
 677      reason = "is_error";
 678    } else if (ts_subtree_missing(result)) {
 679      reason = "is_missing";
 680    } else if (ts_subtree_is_fragile(result)) {
 681      reason = "is_fragile";
 682    } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) {
 683      reason = "contains_different_included_range";
 684    }
 685
 686    if (reason) {
 687      LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result));
 688      if (!reusable_node_descend(&self->reusable_node)) {
 689        reusable_node_advance(&self->reusable_node);
 690        ts_parser__breakdown_top_of_stack(self, version);
 691        *state = ts_stack_state(self->stack, version);
 692      }
 693      continue;
 694    }
 695
 696    TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result);
 697    ts_language_table_entry(self->language, *state, leaf_symbol, table_entry);
 698    if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
 699      LOG(
 700        "cant_reuse_node symbol:%s, first_leaf_symbol:%s",
 701        TREE_NAME(result),
 702        SYM_NAME(leaf_symbol)
 703      );
 704      reusable_node_advance_past_leaf(&self->reusable_node);
 705      break;
 706    }
 707
 708    LOG("reuse_node symbol:%s", TREE_NAME(result));
 709    ts_subtree_retain(result);
 710    return result;
 711  }
 712
 713  return NULL_SUBTREE;
 714}
 715
 716// Determine if a given tree should be replaced by an alternative tree.
 717//
 718// The decision is based on the trees' error costs (if any), their dynamic precedence,
 719// and finally, as a default, by a recursive comparison of the trees' symbols.
 720static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
 721  if (!left.ptr) return true;
 722  if (!right.ptr) return false;
 723
 724  if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) {
 725    LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
 726    return true;
 727  }
 728
 729  if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) {
 730    LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
 731    return false;
 732  }
 733
 734  if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) {
 735    LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
 736        TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left),
 737        ts_subtree_dynamic_precedence(left));
 738    return true;
 739  }
 740
 741  if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) {
 742    LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
 743        TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right),
 744        ts_subtree_dynamic_precedence(right));
 745    return false;
 746  }
 747
 748  if (ts_subtree_error_cost(left) > 0) return true;
 749
 750  int comparison = ts_subtree_compare(left, right);
 751  switch (comparison) {
 752    case -1:
 753      LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
 754      return false;
 755      break;
 756    case 1:
 757      LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
 758      return true;
 759    default:
 760      LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
 761      return false;
 762  }
 763}
 764
 765// Determine if a given tree's children should be replaced by an alternative
 766// array of children.
 767static bool ts_parser__select_children(
 768  TSParser *self,
 769  Subtree left,
 770  const SubtreeArray *children
 771) {
 772  array_assign(&self->scratch_trees, children);
 773
 774  // Create a temporary subtree using the scratch trees array. This node does
 775  // not perform any allocation except for possibly growing the array to make
 776  // room for its own heap data. The scratch tree is never explicitly released,
 777  // so the same 'scratch trees' array can be reused again later.
 778  MutableSubtree scratch_tree = ts_subtree_new_node(
 779    ts_subtree_symbol(left),
 780    &self->scratch_trees,
 781    0,
 782    self->language
 783  );
 784
 785  return ts_parser__select_tree(
 786    self,
 787    left,
 788    ts_subtree_from_mut(scratch_tree)
 789  );
 790}
 791
 792static void ts_parser__shift(
 793  TSParser *self,
 794  StackVersion version,
 795  TSStateId state,
 796  Subtree lookahead,
 797  bool extra
 798) {
 799  bool is_leaf = ts_subtree_child_count(lookahead) == 0;
 800  Subtree subtree_to_push = lookahead;
 801  if (extra != ts_subtree_extra(lookahead) && is_leaf) {
 802    MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead);
 803    ts_subtree_set_extra(&result, extra);
 804    subtree_to_push = ts_subtree_from_mut(result);
 805  }
 806
 807  ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state);
 808  if (ts_subtree_has_external_tokens(subtree_to_push)) {
 809    ts_stack_set_last_external_token(
 810      self->stack, version, ts_subtree_last_external_token(subtree_to_push)
 811    );
 812  }
 813}
 814
 815static StackVersion ts_parser__reduce(
 816  TSParser *self,
 817  StackVersion version,
 818  TSSymbol symbol,
 819  uint32_t count,
 820  int dynamic_precedence,
 821  uint16_t production_id,
 822  bool is_fragile,
 823  bool end_of_non_terminal_extra
 824) {
 825  uint32_t initial_version_count = ts_stack_version_count(self->stack);
 826
 827  // Pop the given number of nodes from the given version of the parse stack.
 828  // If stack versions have previously merged, then there may be more than one
 829  // path back through the stack. For each path, create a new parent node to
 830  // contain the popped children, and push it onto the stack in place of the
 831  // children.
 832  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
 833  uint32_t removed_version_count = 0;
 834  for (uint32_t i = 0; i < pop.size; i++) {
 835    StackSlice slice = pop.contents[i];
 836    StackVersion slice_version = slice.version - removed_version_count;
 837
 838    // This is where new versions are added to the parse stack. The versions
 839    // will all be sorted and truncated at the end of the outer parsing loop.
 840    // Allow the maximum version count to be temporarily exceeded, but only
 841    // by a limited threshold.
 842    if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
 843      ts_stack_remove_version(self->stack, slice_version);
 844      ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
 845      removed_version_count++;
 846      while (i + 1 < pop.size) {
 847        StackSlice next_slice = pop.contents[i + 1];
 848        if (next_slice.version != slice.version) break;
 849        ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
 850        i++;
 851      }
 852      continue;
 853    }
 854
 855    // Extra tokens on top of the stack should not be included in this new parent
 856    // node. They will be re-pushed onto the stack after the parent node is
 857    // created and pushed.
 858    SubtreeArray children = slice.subtrees;
 859    ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras);
 860
 861    MutableSubtree parent = ts_subtree_new_node(
 862      symbol, &children, production_id, self->language
 863    );
 864
 865    // This pop operation may have caused multiple stack versions to collapse
 866    // into one, because they all diverged from a common state. In that case,
 867    // choose one of the arrays of trees to be the parent node's children, and
 868    // delete the rest of the tree arrays.
 869    while (i + 1 < pop.size) {
 870      StackSlice next_slice = pop.contents[i + 1];
 871      if (next_slice.version != slice.version) break;
 872      i++;
 873
 874      SubtreeArray next_slice_children = next_slice.subtrees;
 875      ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2);
 876
 877      if (ts_parser__select_children(
 878        self,
 879        ts_subtree_from_mut(parent),
 880        &next_slice_children
 881      )) {
 882        ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras);
 883        ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent));
 884        array_swap(&self->trailing_extras, &self->trailing_extras2);
 885        parent = ts_subtree_new_node(
 886          symbol, &next_slice_children, production_id, self->language
 887        );
 888      } else {
 889        array_clear(&self->trailing_extras2);
 890        ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
 891      }
 892    }
 893
 894    TSStateId state = ts_stack_state(self->stack, slice_version);
 895    TSStateId next_state = ts_language_next_state(self->language, state, symbol);
 896    if (end_of_non_terminal_extra && next_state == state) {
 897      parent.ptr->extra = true;
 898    }
 899    if (is_fragile || pop.size > 1 || initial_version_count > 1) {
 900      parent.ptr->fragile_left = true;
 901      parent.ptr->fragile_right = true;
 902      parent.ptr->parse_state = TS_TREE_STATE_NONE;
 903    } else {
 904      parent.ptr->parse_state = state;
 905    }
 906    parent.ptr->dynamic_precedence += dynamic_precedence;
 907
 908    // Push the parent node onto the stack, along with any extra tokens that
 909    // were previously on top of the stack.
 910    ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
 911    for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
 912      ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state);
 913    }
 914
 915    for (StackVersion j = 0; j < slice_version; j++) {
 916      if (j == version) continue;
 917      if (ts_stack_merge(self->stack, j, slice_version)) {
 918        removed_version_count++;
 919        break;
 920      }
 921    }
 922  }
 923
 924  // Return the first new stack version that was created.
 925  return ts_stack_version_count(self->stack) > initial_version_count
 926    ? initial_version_count
 927    : STACK_VERSION_NONE;
 928}
 929
 930static void ts_parser__accept(
 931  TSParser *self,
 932  StackVersion version,
 933  Subtree lookahead
 934) {
 935  assert(ts_subtree_is_eof(lookahead));
 936  ts_stack_push(self->stack, version, lookahead, false, 1);
 937
 938  StackSliceArray pop = ts_stack_pop_all(self->stack, version);
 939  for (uint32_t i = 0; i < pop.size; i++) {
 940    SubtreeArray trees = pop.contents[i].subtrees;
 941
 942    Subtree root = NULL_SUBTREE;
 943    for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
 944      Subtree tree = trees.contents[j];
 945      if (!ts_subtree_extra(tree)) {
 946        assert(!tree.data.is_inline);
 947        uint32_t child_count = ts_subtree_child_count(tree);
 948        const Subtree *children = ts_subtree_children(tree);
 949        for (uint32_t k = 0; k < child_count; k++) {
 950          ts_subtree_retain(children[k]);
 951        }
 952        array_splice(&trees, j, 1, child_count, children);
 953        root = ts_subtree_from_mut(ts_subtree_new_node(
 954          ts_subtree_symbol(tree),
 955          &trees,
 956          tree.ptr->production_id,
 957          self->language
 958        ));
 959        ts_subtree_release(&self->tree_pool, tree);
 960        break;
 961      }
 962    }
 963
 964    assert(root.ptr);
 965    self->accept_count++;
 966
 967    if (self->finished_tree.ptr) {
 968      if (ts_parser__select_tree(self, self->finished_tree, root)) {
 969        ts_subtree_release(&self->tree_pool, self->finished_tree);
 970        self->finished_tree = root;
 971      } else {
 972        ts_subtree_release(&self->tree_pool, root);
 973      }
 974    } else {
 975      self->finished_tree = root;
 976    }
 977  }
 978
 979  ts_stack_remove_version(self->stack, pop.contents[0].version);
 980  ts_stack_halt(self->stack, version);
 981}
 982
 983static bool ts_parser__do_all_potential_reductions(
 984  TSParser *self,
 985  StackVersion starting_version,
 986  TSSymbol lookahead_symbol
 987) {
 988  uint32_t initial_version_count = ts_stack_version_count(self->stack);
 989
 990  bool can_shift_lookahead_symbol = false;
 991  StackVersion version = starting_version;
 992  for (unsigned i = 0; true; i++) {
 993    uint32_t version_count = ts_stack_version_count(self->stack);
 994    if (version >= version_count) break;
 995
 996    bool merged = false;
 997    for (StackVersion j = initial_version_count; j < version; j++) {
 998      if (ts_stack_merge(self->stack, j, version)) {
 999        merged = true;
1000        break;
1001      }
1002    }
1003    if (merged) continue;
1004
1005    TSStateId state = ts_stack_state(self->stack, version);
1006    bool has_shift_action = false;
1007    array_clear(&self->reduce_actions);
1008
1009    TSSymbol first_symbol, end_symbol;
1010    if (lookahead_symbol != 0) {
1011      first_symbol = lookahead_symbol;
1012      end_symbol = lookahead_symbol + 1;
1013    } else {
1014      first_symbol = 1;
1015      end_symbol = self->language->token_count;
1016    }
1017
1018    for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
1019      TableEntry entry;
1020      ts_language_table_entry(self->language, state, symbol, &entry);
1021      for (uint32_t j = 0; j < entry.action_count; j++) {
1022        TSParseAction action = entry.actions[j];
1023        switch (action.type) {
1024          case TSParseActionTypeShift:
1025          case TSParseActionTypeRecover:
1026            if (!action.shift.extra && !action.shift.repetition) has_shift_action = true;
1027            break;
1028          case TSParseActionTypeReduce:
1029            if (action.reduce.child_count > 0)
1030              ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) {
1031                .symbol = action.reduce.symbol,
1032                .count = action.reduce.child_count,
1033                .dynamic_precedence = action.reduce.dynamic_precedence,
1034                .production_id = action.reduce.production_id,
1035              });
1036            break;
1037          default:
1038            break;
1039        }
1040      }
1041    }
1042
1043    StackVersion reduction_version = STACK_VERSION_NONE;
1044    for (uint32_t j = 0; j < self->reduce_actions.size; j++) {
1045      ReduceAction action = self->reduce_actions.contents[j];
1046
1047      reduction_version = ts_parser__reduce(
1048        self, version, action.symbol, action.count,
1049        action.dynamic_precedence, action.production_id,
1050        true, false
1051      );
1052    }
1053
1054    if (has_shift_action) {
1055      can_shift_lookahead_symbol = true;
1056    } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) {
1057      ts_stack_renumber_version(self->stack, reduction_version, version);
1058      continue;
1059    } else if (lookahead_symbol != 0) {
1060      ts_stack_remove_version(self->stack, version);
1061    }
1062
1063    if (version == starting_version) {
1064      version = version_count;
1065    } else {
1066      version++;
1067    }
1068  }
1069
1070  return can_shift_lookahead_symbol;
1071}
1072
1073static bool ts_parser__recover_to_state(
1074  TSParser *self,
1075  StackVersion version,
1076  unsigned depth,
1077  TSStateId goal_state
1078) {
1079  StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth);
1080  StackVersion previous_version = STACK_VERSION_NONE;
1081
1082  for (unsigned i = 0; i < pop.size; i++) {
1083    StackSlice slice = pop.contents[i];
1084
1085    if (slice.version == previous_version) {
1086      ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1087      array_erase(&pop, i--);
1088      continue;
1089    }
1090
1091    if (ts_stack_state(self->stack, slice.version) != goal_state) {
1092      ts_stack_halt(self->stack, slice.version);
1093      ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1094      array_erase(&pop, i--);
1095      continue;
1096    }
1097
1098    SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version);
1099    if (error_trees.size > 0) {
1100      assert(error_trees.size == 1);
1101      Subtree error_tree = error_trees.contents[0];
1102      uint32_t error_child_count = ts_subtree_child_count(error_tree);
1103      if (error_child_count > 0) {
1104        array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree));
1105        for (unsigned j = 0; j < error_child_count; j++) {
1106          ts_subtree_retain(slice.subtrees.contents[j]);
1107        }
1108      }
1109      ts_subtree_array_delete(&self->tree_pool, &error_trees);
1110    }
1111
1112    ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras);
1113
1114    if (slice.subtrees.size > 0) {
1115      Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
1116      ts_stack_push(self->stack, slice.version, error, false, goal_state);
1117    } else {
1118      array_delete(&slice.subtrees);
1119    }
1120
1121    for (unsigned j = 0; j < self->trailing_extras.size; j++) {
1122      Subtree tree = self->trailing_extras.contents[j];
1123      ts_stack_push(self->stack, slice.version, tree, false, goal_state);
1124    }
1125
1126    previous_version = slice.version;
1127  }
1128
1129  return previous_version != STACK_VERSION_NONE;
1130}
1131
1132static void ts_parser__recover(
1133  TSParser *self,
1134  StackVersion version,
1135  Subtree lookahead
1136) {
1137  bool did_recover = false;
1138  unsigned previous_version_count = ts_stack_version_count(self->stack);
1139  Length position = ts_stack_position(self->stack, version);
1140  StackSummary *summary = ts_stack_get_summary(self->stack, version);
1141  unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version);
1142  unsigned current_error_cost = ts_stack_error_cost(self->stack, version);
1143
1144  // When the parser is in the error state, there are two strategies for recovering with a
1145  // given lookahead token:
1146  // 1. Find a previous state on the stack in which that lookahead token would be valid. Then,
1147  //    create a new stack version that is in that state again. This entails popping all of the
1148  //    subtrees that have been pushed onto the stack since that previous state, and wrapping
1149  //    them in an ERROR node.
1150  // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and
1151  //    move on to the next lookahead token, remaining in the error state.
1152  //
1153  // First, try the strategy 1. Upon entering the error state, the parser recorded a summary
1154  // of the previous parse states and their depths. Look at each state in the summary, to see
1155  // if the current lookahead token would be valid in that state.
1156  if (summary && !ts_subtree_is_error(lookahead)) {
1157    for (unsigned i = 0; i < summary->size; i++) {
1158      StackSummaryEntry entry = summary->contents[i];
1159
1160      if (entry.state == ERROR_STATE) continue;
1161      if (entry.position.bytes == position.bytes) continue;
1162      unsigned depth = entry.depth;
1163      if (node_count_since_error > 0) depth++;
1164
1165      // Do not recover in ways that create redundant stack versions.
1166      bool would_merge = false;
1167      for (unsigned j = 0; j < previous_version_count; j++) {
1168        if (
1169          ts_stack_state(self->stack, j) == entry.state &&
1170          ts_stack_position(self->stack, j).bytes == position.bytes
1171        ) {
1172          would_merge = true;
1173          break;
1174        }
1175      }
1176      if (would_merge) continue;
1177
1178      // Do not recover if the result would clearly be worse than some existing stack version.
1179      unsigned new_cost =
1180        current_error_cost +
1181        entry.depth * ERROR_COST_PER_SKIPPED_TREE +
1182        (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR +
1183        (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
1184      if (ts_parser__better_version_exists(self, version, false, new_cost)) break;
1185
1186      // If the current lookahead token is valid in some previous state, recover to that state.
1187      // Then stop looking for further recoveries.
1188      if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) {
1189        if (ts_parser__recover_to_state(self, version, depth, entry.state)) {
1190          did_recover = true;
1191          LOG("recover_to_previous state:%u, depth:%u", entry.state, depth);
1192          LOG_STACK();
1193          break;
1194        }
1195      }
1196    }
1197  }
1198
1199  // In the process of attempting to recover, some stack versions may have been created
1200  // and subsequently halted. Remove those versions.
1201  for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
1202    if (!ts_stack_is_active(self->stack, i)) {
1203      ts_stack_remove_version(self->stack, i--);
1204    }
1205  }
1206
1207  // If strategy 1 succeeded, a new stack version will have been created which is able to handle
1208  // the current lookahead token. Now, in addition, try strategy 2 described above: skip the
1209  // current lookahead token by wrapping it in an ERROR node.
1210
1211  // Don't pursue this additional strategy if there are already too many stack versions.
1212  if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1213    ts_stack_halt(self->stack, version);
1214    ts_subtree_release(&self->tree_pool, lookahead);
1215    return;
1216  }
1217
1218  if (
1219    did_recover &&
1220    ts_subtree_has_external_scanner_state_change(lookahead)
1221  ) {
1222    ts_stack_halt(self->stack, version);
1223    ts_subtree_release(&self->tree_pool, lookahead);
1224    return;
1225  }
1226
1227  // If the parser is still in the error state at the end of the file, just wrap everything
1228  // in an ERROR node and terminate.
1229  if (ts_subtree_is_eof(lookahead)) {
1230    LOG("recover_eof");
1231    SubtreeArray children = array_new();
1232    Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
1233    ts_stack_push(self->stack, version, parent, false, 1);
1234    ts_parser__accept(self, version, lookahead);
1235    return;
1236  }
1237
1238  // Do not recover if the result would clearly be worse than some existing stack version.
1239  unsigned new_cost =
1240    current_error_cost + ERROR_COST_PER_SKIPPED_TREE +
1241    ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR +
1242    ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE;
1243  if (ts_parser__better_version_exists(self, version, false, new_cost)) {
1244    ts_stack_halt(self->stack, version);
1245    ts_subtree_release(&self->tree_pool, lookahead);
1246    return;
1247  }
1248
1249  // If the current lookahead token is an extra token, mark it as extra. This means it won't
1250  // be counted in error cost calculations.
1251  unsigned n;
1252  const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
1253  if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) {
1254    MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1255    ts_subtree_set_extra(&mutable_lookahead, true);
1256    lookahead = ts_subtree_from_mut(mutable_lookahead);
1257  }
1258
1259  // Wrap the lookahead token in an ERROR.
1260  LOG("skip_token symbol:%s", TREE_NAME(lookahead));
1261  SubtreeArray children = array_new();
1262  array_reserve(&children, 1);
1263  array_push(&children, lookahead);
1264  MutableSubtree error_repeat = ts_subtree_new_node(
1265    ts_builtin_sym_error_repeat,
1266    &children,
1267    0,
1268    self->language
1269  );
1270
1271  // If other tokens have already been skipped, so there is already an ERROR at the top of the
1272  // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger
1273  // ERROR.
1274  if (node_count_since_error > 0) {
1275    StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1);
1276
1277    // TODO: Figure out how to make this condition occur.
1278    // See https://github.com/atom/atom/issues/18450#issuecomment-439579778
1279    // If multiple stack versions have merged at this point, just pick one of the errors
1280    // arbitrarily and discard the rest.
1281    if (pop.size > 1) {
1282      for (unsigned i = 1; i < pop.size; i++) {
1283        ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees);
1284      }
1285      while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) {
1286        ts_stack_remove_version(self->stack, pop.contents[0].version + 1);
1287      }
1288    }
1289
1290    ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
1291    array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
1292    error_repeat = ts_subtree_new_node(
1293      ts_builtin_sym_error_repeat,
1294      &pop.contents[0].subtrees,
1295      0,
1296      self->language
1297    );
1298  }
1299
1300  // Push the new ERROR onto the stack.
1301  ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
1302  if (ts_subtree_has_external_tokens(lookahead)) {
1303    ts_stack_set_last_external_token(
1304      self->stack, version, ts_subtree_last_external_token(lookahead)
1305    );
1306  }
1307}
1308
1309static void ts_parser__handle_error(
1310  TSParser *self,
1311  StackVersion version,
1312  Subtree lookahead
1313) {
1314  uint32_t previous_version_count = ts_stack_version_count(self->stack);
1315
1316  // Perform any reductions that can happen in this state, regardless of the lookahead. After
1317  // skipping one or more invalid tokens, the parser might find a token that would have allowed
1318  // a reduction to take place.
1319  ts_parser__do_all_potential_reductions(self, version, 0);
1320  uint32_t version_count = ts_stack_version_count(self->stack);
1321  Length position = ts_stack_position(self->stack, version);
1322
1323  // Push a discontinuity onto the stack. Merge all of the stack versions that
1324  // were created in the previous step.
1325  bool did_insert_missing_token = false;
1326  for (StackVersion v = version; v < version_count;) {
1327    if (!did_insert_missing_token) {
1328      TSStateId state = ts_stack_state(self->stack, v);
1329      for (
1330        TSSymbol missing_symbol = 1;
1331        missing_symbol < (uint16_t)self->language->token_count;
1332        missing_symbol++
1333      ) {
1334        TSStateId state_after_missing_symbol = ts_language_next_state(
1335          self->language, state, missing_symbol
1336        );
1337        if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
1338          continue;
1339        }
1340
1341        if (ts_language_has_reduce_action(
1342          self->language,
1343          state_after_missing_symbol,
1344          ts_subtree_leaf_symbol(lookahead)
1345        )) {
1346          // In case the parser is currently outside of any included range, the lexer will
1347          // snap to the beginning of the next included range. The missing token's padding
1348          // must be assigned to position it within the next included range.
1349          ts_lexer_reset(&self->lexer, position);
1350          ts_lexer_mark_end(&self->lexer);
1351          Length padding = length_sub(self->lexer.token_end_position, position);
1352          uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead);
1353
1354          StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
1355          Subtree missing_tree = ts_subtree_new_missing_leaf(
1356            &self->tree_pool, missing_symbol,
1357            padding, lookahead_bytes,
1358            self->language
1359          );
1360          ts_stack_push(
1361            self->stack, version_with_missing_tree,
1362            missing_tree, false,
1363            state_after_missing_symbol
1364          );
1365
1366          if (ts_parser__do_all_potential_reductions(
1367            self, version_with_missing_tree,
1368            ts_subtree_leaf_symbol(lookahead)
1369          )) {
1370            LOG(
1371              "recover_with_missing symbol:%s, state:%u",
1372              SYM_NAME(missing_symbol),
1373              ts_stack_state(self->stack, version_with_missing_tree)
1374            );
1375            did_insert_missing_token = true;
1376            break;
1377          }
1378        }
1379      }
1380    }
1381
1382    ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
1383    v = (v == version) ? previous_version_count : v + 1;
1384  }
1385
1386  for (unsigned i = previous_version_count; i < version_count; i++) {
1387    bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
1388    assert(did_merge);
1389    (void)did_merge;	//	fix warning/error with clang -Os
1390  }
1391
1392  ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH);
1393
1394  // Begin recovery with the current lookahead node, rather than waiting for the
1395  // next turn of the parse loop. This ensures that the tree accounts for the the
1396  // current lookahead token's "lookahead bytes" value, which describes how far
1397  // the lexer needed to look ahead beyond the content of the token in order to
1398  // recognize it.
1399  if (ts_subtree_child_count(lookahead) > 0) {
1400    ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1401  }
1402  ts_parser__recover(self, version, lookahead);
1403
1404  LOG_STACK();
1405}
1406
1407static bool ts_parser__advance(
1408  TSParser *self,
1409  StackVersion version,
1410  bool allow_node_reuse
1411) {
1412  TSStateId state = ts_stack_state(self->stack, version);
1413  uint32_t position = ts_stack_position(self->stack, version).bytes;
1414  Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
1415
1416  bool did_reuse = true;
1417  Subtree lookahead = NULL_SUBTREE;
1418  TableEntry table_entry = {.action_count = 0};
1419
1420  // If possible, reuse a node from the previous syntax tree.
1421  if (allow_node_reuse) {
1422    lookahead = ts_parser__reuse_node(
1423      self, version, &state, position, last_external_token, &table_entry
1424    );
1425  }
1426
1427  // If no node from the previous syntax tree could be reused, then try to
1428  // reuse the token previously returned by the lexer.
1429  if (!lookahead.ptr) {
1430    did_reuse = false;
1431    lookahead = ts_parser__get_cached_token(
1432      self, state, position, last_external_token, &table_entry
1433    );
1434  }
1435
1436  bool needs_lex = !lookahead.ptr;
1437  for (;;) {
1438    // Otherwise, re-run the lexer.
1439    if (needs_lex) {
1440      needs_lex = false;
1441      lookahead = ts_parser__lex(self, version, state);
1442
1443      if (lookahead.ptr) {
1444        ts_parser__set_cached_token(self, position, last_external_token, lookahead);
1445        ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
1446      }
1447
1448      // When parsing a non-terminal extra, a null lookahead indicates the
1449      // end of the rule. The reduction is stored in the EOF table entry.
1450      // After the reduction, the lexer needs to be run again.
1451      else {
1452        ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
1453      }
1454    }
1455
1456    // If a cancellation flag or a timeout was provided, then check every
1457    // time a fixed number of parse actions has been processed.
1458    if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
1459      self->operation_count = 0;
1460    }
1461    if (
1462      self->operation_count == 0 &&
1463      ((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
1464       (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
1465    ) {
1466      if (lookahead.ptr) {
1467        ts_subtree_release(&self->tree_pool, lookahead);
1468      }
1469      return false;
1470    }
1471
1472    // Process each parse action for the current lookahead token in
1473    // the current state. If there are multiple actions, then this is
1474    // an ambiguous state. REDUCE actions always create a new stack
1475    // version, whereas SHIFT actions update the existing stack version
1476    // and terminate this loop.
1477    StackVersion last_reduction_version = STACK_VERSION_NONE;
1478    for (uint32_t i = 0; i < table_entry.action_count; i++) {
1479      TSParseAction action = table_entry.actions[i];
1480
1481      switch (action.type) {
1482        case TSParseActionTypeShift: {
1483          if (action.shift.repetition) break;
1484          TSStateId next_state;
1485          if (action.shift.extra) {
1486            next_state = state;
1487            LOG("shift_extra");
1488          } else {
1489            next_state = action.shift.state;
1490            LOG("shift state:%u", next_state);
1491          }
1492
1493          if (ts_subtree_child_count(lookahead) > 0) {
1494            ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
1495            next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
1496          }
1497
1498          ts_parser__shift(self, version, next_state, lookahead, action.shift.extra);
1499          if (did_reuse) reusable_node_advance(&self->reusable_node);
1500          return true;
1501        }
1502
1503        case TSParseActionTypeReduce: {
1504          bool is_fragile = table_entry.action_count > 1;
1505          bool end_of_non_terminal_extra = lookahead.ptr == NULL;
1506          LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count);
1507          StackVersion reduction_version = ts_parser__reduce(
1508            self, version, action.reduce.symbol, action.reduce.child_count,
1509            action.reduce.dynamic_precedence, action.reduce.production_id,
1510            is_fragile, end_of_non_terminal_extra
1511          );
1512          if (reduction_version != STACK_VERSION_NONE) {
1513            last_reduction_version = reduction_version;
1514          }
1515          break;
1516        }
1517
1518        case TSParseActionTypeAccept: {
1519          LOG("accept");
1520          ts_parser__accept(self, version, lookahead);
1521          return true;
1522        }
1523
1524        case TSParseActionTypeRecover: {
1525          if (ts_subtree_child_count(lookahead) > 0) {
1526            ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1527          }
1528
1529          ts_parser__recover(self, version, lookahead);
1530          if (did_reuse) reusable_node_advance(&self->reusable_node);
1531          return true;
1532        }
1533      }
1534    }
1535
1536    // If a reduction was performed, then replace the current stack version
1537    // with one of the stack versions created by a reduction, and continue
1538    // processing this version of the stack with the same lookahead symbol.
1539    if (last_reduction_version != STACK_VERSION_NONE) {
1540      ts_stack_renumber_version(self->stack, last_reduction_version, version);
1541      LOG_STACK();
1542      state = ts_stack_state(self->stack, version);
1543
1544      // At the end of a non-terminal extra rule, the lexer will return a
1545      // null subtree, because the parser needs to perform a fixed reduction
1546      // regardless of the lookahead node. After performing that reduction,
1547      // (and completing the non-terminal extra rule) run the lexer again based
1548      // on the current parse state.
1549      if (!lookahead.ptr) {
1550        needs_lex = true;
1551      } else {
1552        ts_language_table_entry(
1553          self->language,
1554          state,
1555          ts_subtree_leaf_symbol(lookahead),
1556          &table_entry
1557        );
1558      }
1559
1560      continue;
1561    }
1562
1563    // A non-terminal extra rule was reduced and merged into an existing
1564    // stack version. This version can be discarded.
1565    if (!lookahead.ptr) {
1566      ts_stack_halt(self->stack, version);
1567      return true;
1568    }
1569
1570    // If there were no parse actions for the current lookahead token, then
1571    // it is not valid in this state. If the current lookahead token is a
1572    // keyword, then switch to treating it as the normal word token if that
1573    // token is valid in this state.
1574    if (
1575      ts_subtree_is_keyword(lookahead) &&
1576      ts_subtree_symbol(lookahead) != self->language->keyword_capture_token
1577    ) {
1578      ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry);
1579      if (table_entry.action_count > 0) {
1580        LOG(
1581          "switch from_keyword:%s, to_word_token:%s",
1582          TREE_NAME(lookahead),
1583          SYM_NAME(self->language->keyword_capture_token)
1584        );
1585
1586        MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1587        ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
1588        lookahead = ts_subtree_from_mut(mutable_lookahead);
1589        continue;
1590      }
1591    }
1592
1593    // If the current lookahead token is not valid and the parser is
1594    // already in the error state, restart the error recovery process.
1595    // TODO - can this be unified with the other `RECOVER` case above?
1596    if (state == ERROR_STATE) {
1597      ts_parser__recover(self, version, lookahead);
1598      return true;
1599    }
1600
1601    // If the current lookahead token is not valid and the previous
1602    // subtree on the stack was reused from an old tree, it isn't actually
1603    // valid to reuse it. Remove it from the stack, and in its place,
1604    // push each of its children. Then try again to process the current
1605    // lookahead.
1606    if (ts_parser__breakdown_top_of_stack(self, version)) {
1607      state = ts_stack_state(self->stack, version);
1608      ts_subtree_release(&self->tree_pool, lookahead);
1609      needs_lex = true;
1610      continue;
1611    }
1612
1613    // At this point, the current lookahead token is definitely not valid
1614    // for this parse stack version. Mark this version as paused and continue
1615    // processing any other stack versions that might exist. If some other
1616    // version advances successfully, then this version can simply be removed.
1617    // But if all versions end up paused, then error recovery is needed.
1618    LOG("detect_error");
1619    ts_stack_pause(self->stack, version, lookahead);
1620    return true;
1621  }
1622}
1623
1624static unsigned ts_parser__condense_stack(TSParser *self) {
1625  bool made_changes = false;
1626  unsigned min_error_cost = UINT_MAX;
1627  for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
1628    // Prune any versions that have been marked for removal.
1629    if (ts_stack_is_halted(self->stack, i)) {
1630      ts_stack_remove_version(self->stack, i);
1631      i--;
1632      continue;
1633    }
1634
1635    // Keep track of the minimum error cost of any stack version so
1636    // that it can be returned.
1637    ErrorStatus status_i = ts_parser__version_status(self, i);
1638    if (!status_i.is_in_error && status_i.cost < min_error_cost) {
1639      min_error_cost = status_i.cost;
1640    }
1641
1642    // Examine each pair of stack versions, removing any versions that
1643    // are clearly worse than another version. Ensure that the versions
1644    // are ordered from most promising to least promising.
1645    for (StackVersion j = 0; j < i; j++) {
1646      ErrorStatus status_j = ts_parser__version_status(self, j);
1647
1648      switch (ts_parser__compare_versions(self, status_j, status_i)) {
1649        case ErrorComparisonTakeLeft:
1650          made_changes = true;
1651          ts_stack_remove_version(self->stack, i);
1652          i--;
1653          j = i;
1654          break;
1655
1656        case ErrorComparisonPreferLeft:
1657        case ErrorComparisonNone:
1658          if (ts_stack_merge(self->stack, j, i)) {
1659            made_changes = true;
1660            i--;
1661            j = i;
1662          }
1663          break;
1664
1665        case ErrorComparisonPreferRight:
1666          made_changes = true;
1667          if (ts_stack_merge(self->stack, j, i)) {
1668            i--;
1669            j = i;
1670          } else {
1671            ts_stack_swap_versions(self->stack, i, j);
1672          }
1673          break;
1674
1675        case ErrorComparisonTakeRight:
1676          made_changes = true;
1677          ts_stack_remove_version(self->stack, j);
1678          i--;
1679          j--;
1680          break;
1681      }
1682    }
1683  }
1684
1685  // Enfore a hard upper bound on the number of stack versions by
1686  // discarding the least promising versions.
1687  while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1688    ts_stack_remove_version(self->stack, MAX_VERSION_COUNT);
1689    made_changes = true;
1690  }
1691
1692  // If the best-performing stack version is currently paused, or all
1693  // versions are paused, then resume the best paused version and begin
1694  // the error recovery process. Otherwise, remove the paused versions.
1695  if (ts_stack_version_count(self->stack) > 0) {
1696    bool has_unpaused_version = false;
1697    for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
1698      if (ts_stack_is_paused(self->stack, i)) {
1699        if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) {
1700          LOG("resume version:%u", i);
1701          min_error_cost = ts_stack_error_cost(self->stack, i);
1702          Subtree lookahead = ts_stack_resume(self->stack, i);
1703          ts_parser__handle_error(self, i, lookahead);
1704          has_unpaused_version = true;
1705        } else {
1706          ts_stack_remove_version(self->stack, i);
1707          i--;
1708          n--;
1709        }
1710      } else {
1711        has_unpaused_version = true;
1712      }
1713    }
1714  }
1715
1716  if (made_changes) {
1717    LOG("condense");
1718    LOG_STACK();
1719  }
1720
1721  return min_error_cost;
1722}
1723
1724static bool ts_parser_has_outstanding_parse(TSParser *self) {
1725  return (
1726    ts_stack_state(self->stack, 0) != 1 ||
1727    ts_stack_node_count_since_error(self->stack, 0) != 0
1728  );
1729}
1730
1731// Parser - Public
1732
1733TSParser *ts_parser_new(void) {
1734  TSParser *self = ts_calloc(1, sizeof(TSParser));
1735  ts_lexer_init(&self->lexer);
1736  array_init(&self->reduce_actions);
1737  array_reserve(&self->reduce_actions, 4);
1738  self->tree_pool = ts_subtree_pool_new(32);
1739  self->stack = ts_stack_new(&self->tree_pool);
1740  self->finished_tree = NULL_SUBTREE;
1741  self->reusable_node = reusable_node_new();
1742  self->dot_graph_file = NULL;
1743  self->cancellation_flag = NULL;
1744  self->timeout_duration = 0;
1745  self->end_clock = clock_null();
1746  self->operation_count = 0;
1747  self->old_tree = NULL_SUBTREE;
1748  self->included_range_differences = (TSRangeArray) array_new();
1749  self->included_range_difference_index = 0;
1750  ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1751  return self;
1752}
1753
1754void ts_parser_delete(TSParser *self) {
1755  if (!self) return;
1756
1757  ts_parser_set_language(self, NULL);
1758  ts_stack_delete(self->stack);
1759  if (self->reduce_actions.contents) {
1760    array_delete(&self->reduce_actions);
1761  }
1762  if (self->included_range_differences.contents) {
1763    array_delete(&self->included_range_differences);
1764  }
1765  if (self->old_tree.ptr) {
1766    ts_subtree_release(&self->tree_pool, self->old_tree);
1767    self->old_tree = NULL_SUBTREE;
1768  }
1769  ts_lexer_delete(&self->lexer);
1770  ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1771  ts_subtree_pool_delete(&self->tree_pool);
1772  reusable_node_delete(&self->reusable_node);
1773  array_delete(&self->trailing_extras);
1774  array_delete(&self->trailing_extras2);
1775  array_delete(&self->scratch_trees);
1776  ts_free(self);
1777}
1778
1779const TSLanguage *ts_parser_language(const TSParser *self) {
1780  return self->language;
1781}
1782
1783bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
1784  if (language) {
1785    if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false;
1786    if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false;
1787  }
1788
1789  if (self->external_scanner_payload && self->language->external_scanner.destroy) {
1790    self->language->external_scanner.destroy(self->external_scanner_payload);
1791  }
1792
1793  if (language && language->external_scanner.create) {
1794    self->external_scanner_payload = language->external_scanner.create();
1795  } else {
1796    self->external_scanner_payload = NULL;
1797  }
1798
1799  self->language = language;
1800  ts_parser_reset(self);
1801  return true;
1802}
1803
1804TSLogger ts_parser_logger(const TSParser *self) {
1805  return self->lexer.logger;
1806}
1807
1808void ts_parser_set_logger(TSParser *self, TSLogger logger) {
1809  self->lexer.logger = logger;
1810}
1811
1812void ts_parser_print_dot_graphs(TSParser *self, int fd) {
1813  if (self->dot_graph_file) {
1814    fclose(self->dot_graph_file);
1815  }
1816
1817  if (fd >= 0) {
1818    #ifdef _WIN32
1819    self->dot_graph_file = _fdopen(fd, "a");
1820    #else
1821    self->dot_graph_file = fdopen(fd, "a");
1822    #endif
1823  } else {
1824    self->dot_graph_file = NULL;
1825  }
1826}
1827
1828const size_t *ts_parser_cancellation_flag(const TSParser *self) {
1829  return (const size_t *)self->cancellation_flag;
1830}
1831
1832void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) {
1833  self->cancellation_flag = (const volatile size_t *)flag;
1834}
1835
1836uint64_t ts_parser_timeout_micros(const TSParser *self) {
1837  return duration_to_micros(self->timeout_duration);
1838}
1839
1840void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
1841  self->timeout_duration = duration_from_micros(timeout_micros);
1842}
1843
1844bool ts_parser_set_included_ranges(
1845  TSParser *self,
1846  const TSRange *ranges,
1847  uint32_t count
1848) {
1849  return ts_lexer_set_included_ranges(&self->lexer, ranges, count);
1850}
1851
1852const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) {
1853  return ts_lexer_included_ranges(&self->lexer, count);
1854}
1855
1856void ts_parser_reset(TSParser *self) {
1857  if (self->language && self->language->external_scanner.deserialize) {
1858    self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
1859  }
1860
1861  if (self->old_tree.ptr) {
1862    ts_subtree_release(&self->tree_pool, self->old_tree);
1863    self->old_tree = NULL_SUBTREE;
1864  }
1865
1866  reusable_node_clear(&self->reusable_node);
1867  ts_lexer_reset(&self->lexer, length_zero());
1868  ts_stack_clear(self->stack);
1869  ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1870  if (self->finished_tree.ptr) {
1871    ts_subtree_release(&self->tree_pool, self->finished_tree);
1872    self->finished_tree = NULL_SUBTREE;
1873  }
1874  self->accept_count = 0;
1875}
1876
1877TSTree *ts_parser_parse(
1878  TSParser *self,
1879  const TSTree *old_tree,
1880  TSInput input
1881) {
1882  if (!self->language || !input.read) return NULL;
1883
1884  ts_lexer_set_input(&self->lexer, input);
1885
1886  array_clear(&self->included_range_differences);
1887  self->included_range_difference_index = 0;
1888
1889  if (ts_parser_has_outstanding_parse(self)) {
1890    LOG("resume_parsing");
1891  } else if (old_tree) {
1892    ts_subtree_retain(old_tree->root);
1893    self->old_tree = old_tree->root;
1894    ts_range_array_get_changed_ranges(
1895      old_tree->included_ranges, old_tree->included_range_count,
1896      self->lexer.included_ranges, self->lexer.included_range_count,
1897      &self->included_range_differences
1898    );
1899    reusable_node_reset(&self->reusable_node, old_tree->root);
1900    LOG("parse_after_edit");
1901    LOG_TREE(self->old_tree);
1902    for (unsigned i = 0; i < self->included_range_differences.size; i++) {
1903      TSRange *range = &self->included_range_differences.contents[i];
1904      LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
1905    }
1906  } else {
1907    reusable_node_clear(&self->reusable_node);
1908    LOG("new_parse");
1909  }
1910
1911  self->operation_count = 0;
1912  if (self->timeout_duration) {
1913    self->end_clock = clock_after(clock_now(), self->timeout_duration);
1914  } else {
1915    self->end_clock = clock_null();
1916  }
1917
1918  uint32_t position = 0, last_position = 0, version_count = 0;
1919  do {
1920    for (
1921      StackVersion version = 0;
1922      version_count = ts_stack_version_count(self->stack),
1923      version < version_count;
1924      version++
1925    ) {
1926      bool allow_node_reuse = version_count == 1;
1927      while (ts_stack_is_active(self->stack, version)) {
1928        LOG(
1929          "process version:%d, version_count:%u, state:%d, row:%u, col:%u",
1930          version,
1931          ts_stack_version_count(self->stack),
1932          ts_stack_state(self->stack, version),
1933          ts_stack_position(self->stack, version).extent.row,
1934          ts_stack_position(self->stack, version).extent.column
1935        );
1936
1937        if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
1938        LOG_STACK();
1939
1940        position = ts_stack_position(self->stack, version).bytes;
1941        if (position > last_position || (version > 0 && position == last_position)) {
1942          last_position = position;
1943          break;
1944        }
1945      }
1946    }
1947
1948    // After advancing each version of the stack, re-sort the versions by their cost,
1949    // removing any versions that are no longer worth pursuing.
1950    unsigned min_error_cost = ts_parser__condense_stack(self);
1951
1952    // If there's already a finished parse tree that's better than any in-progress version,
1953    // then terminate parsing. Clear the parse stack to remove any extra references to subtrees
1954    // within the finished tree, ensuring that these subtrees can be safely mutated in-place
1955    // for rebalancing.
1956    if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
1957      ts_stack_clear(self->stack);
1958      break;
1959    }
1960
1961    while (self->included_range_difference_index < self->included_range_differences.size) {
1962      TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index];
1963      if (range->end_byte <= position) {
1964        self->included_range_difference_index++;
1965      } else {
1966        break;
1967      }
1968    }
1969  } while (version_count != 0);
1970
1971  assert(self->finished_tree.ptr);
1972  ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
1973  LOG("done");
1974  LOG_TREE(self->finished_tree);
1975
1976  TSTree *result = ts_tree_new(
1977    self->finished_tree,
1978    self->language,
1979    self->lexer.included_ranges,
1980    self->lexer.included_range_count
1981  );
1982  self->finished_tree = NULL_SUBTREE;
1983  ts_parser_reset(self);
1984  return result;
1985}
1986
1987TSTree *ts_parser_parse_string(
1988  TSParser *self,
1989  const TSTree *old_tree,
1990  const char *string,
1991  uint32_t length
1992) {
1993  return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8);
1994}
1995
1996TSTree *ts_parser_parse_string_encoding(
1997  TSParser *self,
1998  const TSTree *old_tree,
1999  const char *string,
2000  uint32_t length,
2001  TSInputEncoding encoding
2002) {
2003  TSStringInput input = {string, length};
2004  return ts_parser_parse(self, old_tree, (TSInput) {
2005    &input,
2006    ts_string_input_read,
2007    encoding,
2008  });
2009}
2010
2011#undef LOG