1#include <assert.h>
   2#include <ctype.h>
   3#include <stdint.h>
   4#include <stdbool.h>
   5#include <string.h>
   6#include <stdio.h>
   7#include "./alloc.h"
   8#include "./array.h"
   9#include "./atomic.h"
  10#include "./subtree.h"
  11#include "./length.h"
  12#include "./language.h"
  13#include "./error_costs.h"
  14#include <stddef.h>
  15
  16typedef struct {
  17  Length start;
  18  Length old_end;
  19  Length new_end;
  20} Edit;
  21
  22#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
  23#define TS_MAX_TREE_POOL_SIZE 32
  24
  25// ExternalScannerState
  26
  27void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
  28  self->length = length;
  29  if (length > sizeof(self->short_data)) {
  30    self->long_data = ts_malloc(length);
  31    memcpy(self->long_data, data, length);
  32  } else {
  33    memcpy(self->short_data, data, length);
  34  }
  35}
  36
  37ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) {
  38  ExternalScannerState result = *self;
  39  if (self->length > sizeof(self->short_data)) {
  40    result.long_data = ts_malloc(self->length);
  41    memcpy(result.long_data, self->long_data, self->length);
  42  }
  43  return result;
  44}
  45
  46void ts_external_scanner_state_delete(ExternalScannerState *self) {
  47  if (self->length > sizeof(self->short_data)) {
  48    ts_free(self->long_data);
  49  }
  50}
  51
  52const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
  53  if (self->length > sizeof(self->short_data)) {
  54    return self->long_data;
  55  } else {
  56    return self->short_data;
  57  }
  58}
  59
  60bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) {
  61  return
  62    self->length == length &&
  63    memcmp(ts_external_scanner_state_data(self), buffer, length) == 0;
  64}
  65
  66// SubtreeArray
  67
  68void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
  69  dest->size = self.size;
  70  dest->capacity = self.capacity;
  71  dest->contents = self.contents;
  72  if (self.capacity > 0) {
  73    dest->contents = ts_calloc(self.capacity, sizeof(Subtree));
  74    memcpy(dest->contents, self.contents, self.size * sizeof(Subtree));
  75    for (uint32_t i = 0; i < self.size; i++) {
  76      ts_subtree_retain(dest->contents[i]);
  77    }
  78  }
  79}
  80
  81void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) {
  82  for (uint32_t i = 0; i < self->size; i++) {
  83    ts_subtree_release(pool, self->contents[i]);
  84  }
  85  array_clear(self);
  86}
  87
  88void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
  89  ts_subtree_array_clear(pool, self);
  90  array_delete(self);
  91}
  92
  93void ts_subtree_array_remove_trailing_extras(
  94  SubtreeArray *self,
  95  SubtreeArray *destination
  96) {
  97  array_clear(destination);
  98  while (self->size > 0) {
  99    Subtree last = self->contents[self->size - 1];
 100    if (ts_subtree_extra(last)) {
 101      self->size--;
 102      array_push(destination, last);
 103    } else {
 104      break;
 105    }
 106  }
 107  ts_subtree_array_reverse(destination);
 108}
 109
 110void ts_subtree_array_reverse(SubtreeArray *self) {
 111  for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
 112    size_t reverse_index = self->size - 1 - i;
 113    Subtree swap = self->contents[i];
 114    self->contents[i] = self->contents[reverse_index];
 115    self->contents[reverse_index] = swap;
 116  }
 117}
 118
 119// SubtreePool
 120
 121SubtreePool ts_subtree_pool_new(uint32_t capacity) {
 122  SubtreePool self = {array_new(), array_new()};
 123  array_reserve(&self.free_trees, capacity);
 124  return self;
 125}
 126
 127void ts_subtree_pool_delete(SubtreePool *self) {
 128  if (self->free_trees.contents) {
 129    for (unsigned i = 0; i < self->free_trees.size; i++) {
 130      ts_free(self->free_trees.contents[i].ptr);
 131    }
 132    array_delete(&self->free_trees);
 133  }
 134  if (self->tree_stack.contents) array_delete(&self->tree_stack);
 135}
 136
 137static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) {
 138  if (self->free_trees.size > 0) {
 139    return array_pop(&self->free_trees).ptr;
 140  } else {
 141    return ts_malloc(sizeof(SubtreeHeapData));
 142  }
 143}
 144
 145static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) {
 146  if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) {
 147    array_push(&self->free_trees, (MutableSubtree) {.ptr = tree});
 148  } else {
 149    ts_free(tree);
 150  }
 151}
 152
 153// Subtree
 154
 155static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) {
 156  return
 157    padding.bytes < TS_MAX_INLINE_TREE_LENGTH &&
 158    padding.extent.row < 16 &&
 159    padding.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
 160    size.extent.row == 0 &&
 161    size.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
 162    lookahead_bytes < 16;
 163}
 164
 165Subtree ts_subtree_new_leaf(
 166  SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
 167  uint32_t lookahead_bytes, TSStateId parse_state,
 168  bool has_external_tokens, bool depends_on_column,
 169  bool is_keyword, const TSLanguage *language
 170) {
 171  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
 172  bool extra = symbol == ts_builtin_sym_end;
 173
 174  bool is_inline = (
 175    symbol <= UINT8_MAX &&
 176    !has_external_tokens &&
 177    ts_subtree_can_inline(padding, size, lookahead_bytes)
 178  );
 179
 180  if (is_inline) {
 181    return (Subtree) {{
 182      .parse_state = parse_state,
 183      .symbol = symbol,
 184      .padding_bytes = padding.bytes,
 185      .padding_rows = padding.extent.row,
 186      .padding_columns = padding.extent.column,
 187      .size_bytes = size.bytes,
 188      .lookahead_bytes = lookahead_bytes,
 189      .visible = metadata.visible,
 190      .named = metadata.named,
 191      .extra = extra,
 192      .has_changes = false,
 193      .is_missing = false,
 194      .is_keyword = is_keyword,
 195      .is_inline = true,
 196    }};
 197  } else {
 198    SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
 199    *data = (SubtreeHeapData) {
 200      .ref_count = 1,
 201      .padding = padding,
 202      .size = size,
 203      .lookahead_bytes = lookahead_bytes,
 204      .error_cost = 0,
 205      .child_count = 0,
 206      .symbol = symbol,
 207      .parse_state = parse_state,
 208      .visible = metadata.visible,
 209      .named = metadata.named,
 210      .extra = extra,
 211      .fragile_left = false,
 212      .fragile_right = false,
 213      .has_changes = false,
 214      .has_external_tokens = has_external_tokens,
 215      .has_external_scanner_state_change = false,
 216      .depends_on_column = depends_on_column,
 217      .is_missing = false,
 218      .is_keyword = is_keyword,
 219      {{.first_leaf = {.symbol = 0, .parse_state = 0}}}
 220    };
 221    return (Subtree) {.ptr = data};
 222  }
 223}
 224
 225void ts_subtree_set_symbol(
 226  MutableSubtree *self,
 227  TSSymbol symbol,
 228  const TSLanguage *language
 229) {
 230  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
 231  if (self->data.is_inline) {
 232    assert(symbol < UINT8_MAX);
 233    self->data.symbol = symbol;
 234    self->data.named = metadata.named;
 235    self->data.visible = metadata.visible;
 236  } else {
 237    self->ptr->symbol = symbol;
 238    self->ptr->named = metadata.named;
 239    self->ptr->visible = metadata.visible;
 240  }
 241}
 242
 243Subtree ts_subtree_new_error(
 244  SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
 245  uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
 246) {
 247  Subtree result = ts_subtree_new_leaf(
 248    pool, ts_builtin_sym_error, padding, size, bytes_scanned,
 249    parse_state, false, false, false, language
 250  );
 251  SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
 252  data->fragile_left = true;
 253  data->fragile_right = true;
 254  data->lookahead_char = lookahead_char;
 255  return result;
 256}
 257
 258// Clone a subtree.
 259MutableSubtree ts_subtree_clone(Subtree self) {
 260  size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count);
 261  Subtree *new_children = ts_malloc(alloc_size);
 262  Subtree *old_children = ts_subtree_children(self);
 263  memcpy(new_children, old_children, alloc_size);
 264  SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count];
 265  if (self.ptr->child_count > 0) {
 266    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
 267      ts_subtree_retain(new_children[i]);
 268    }
 269  } else if (self.ptr->has_external_tokens) {
 270    result->external_scanner_state = ts_external_scanner_state_copy(
 271      &self.ptr->external_scanner_state
 272    );
 273  }
 274  result->ref_count = 1;
 275  return (MutableSubtree) {.ptr = result};
 276}
 277
 278// Get mutable version of a subtree.
 279//
 280// This takes ownership of the subtree. If the subtree has only one owner,
 281// this will directly convert it into a mutable version. Otherwise, it will
 282// perform a copy.
 283MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
 284  if (self.data.is_inline) return (MutableSubtree) {self.data};
 285  if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
 286  MutableSubtree result = ts_subtree_clone(self);
 287  ts_subtree_release(pool, self);
 288  return result;
 289}
 290
 291static void ts_subtree__compress(
 292  MutableSubtree self,
 293  unsigned count,
 294  const TSLanguage *language,
 295  MutableSubtreeArray *stack
 296) {
 297  unsigned initial_stack_size = stack->size;
 298
 299  MutableSubtree tree = self;
 300  TSSymbol symbol = tree.ptr->symbol;
 301  for (unsigned i = 0; i < count; i++) {
 302    if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;
 303
 304    MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
 305    if (
 306      child.data.is_inline ||
 307      child.ptr->child_count < 2 ||
 308      child.ptr->ref_count > 1 ||
 309      child.ptr->symbol != symbol
 310    ) break;
 311
 312    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]);
 313    if (
 314      grandchild.data.is_inline ||
 315      grandchild.ptr->child_count < 2 ||
 316      grandchild.ptr->ref_count > 1 ||
 317      grandchild.ptr->symbol != symbol
 318    ) break;
 319
 320    ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild);
 321    ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1];
 322    ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
 323    array_push(stack, tree);
 324    tree = grandchild;
 325  }
 326
 327  while (stack->size > initial_stack_size) {
 328    tree = array_pop(stack);
 329    MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
 330    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]);
 331    ts_subtree_summarize_children(grandchild, language);
 332    ts_subtree_summarize_children(child, language);
 333    ts_subtree_summarize_children(tree, language);
 334  }
 335}
 336
 337void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) {
 338  array_clear(&pool->tree_stack);
 339
 340  if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) {
 341    array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
 342  }
 343
 344  while (pool->tree_stack.size > 0) {
 345    MutableSubtree tree = array_pop(&pool->tree_stack);
 346
 347    if (tree.ptr->repeat_depth > 0) {
 348      Subtree child1 = ts_subtree_children(tree)[0];
 349      Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1];
 350      long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2);
 351      if (repeat_delta > 0) {
 352        unsigned n = (unsigned)repeat_delta;
 353        for (unsigned i = n / 2; i > 0; i /= 2) {
 354          ts_subtree__compress(tree, i, language, &pool->tree_stack);
 355          n -= i;
 356        }
 357      }
 358    }
 359
 360    for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
 361      Subtree child = ts_subtree_children(tree)[i];
 362      if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
 363        array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
 364      }
 365    }
 366  }
 367}
 368
 369// Assign all of the node's properties that depend on its children.
 370void ts_subtree_summarize_children(
 371  MutableSubtree self,
 372  const TSLanguage *language
 373) {
 374  assert(!self.data.is_inline);
 375
 376  self.ptr->named_child_count = 0;
 377  self.ptr->visible_child_count = 0;
 378  self.ptr->error_cost = 0;
 379  self.ptr->repeat_depth = 0;
 380  self.ptr->visible_descendant_count = 0;
 381  self.ptr->has_external_tokens = false;
 382  self.ptr->depends_on_column = false;
 383  self.ptr->has_external_scanner_state_change = false;
 384  self.ptr->dynamic_precedence = 0;
 385
 386  uint32_t structural_index = 0;
 387  const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
 388  uint32_t lookahead_end_byte = 0;
 389
 390  const Subtree *children = ts_subtree_children(self);
 391  for (uint32_t i = 0; i < self.ptr->child_count; i++) {
 392    Subtree child = children[i];
 393
 394    if (
 395      self.ptr->size.extent.row == 0 &&
 396      ts_subtree_depends_on_column(child)
 397    ) {
 398      self.ptr->depends_on_column = true;
 399    }
 400
 401    if (ts_subtree_has_external_scanner_state_change(child)) {
 402      self.ptr->has_external_scanner_state_change = true;
 403    }
 404
 405    if (i == 0) {
 406      self.ptr->padding = ts_subtree_padding(child);
 407      self.ptr->size = ts_subtree_size(child);
 408    } else {
 409      self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child));
 410    }
 411
 412    uint32_t child_lookahead_end_byte =
 413      self.ptr->padding.bytes +
 414      self.ptr->size.bytes +
 415      ts_subtree_lookahead_bytes(child);
 416    if (child_lookahead_end_byte > lookahead_end_byte) {
 417      lookahead_end_byte = child_lookahead_end_byte;
 418    }
 419
 420    if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) {
 421      self.ptr->error_cost += ts_subtree_error_cost(child);
 422    }
 423
 424    uint32_t grandchild_count = ts_subtree_child_count(child);
 425    if (
 426      self.ptr->symbol == ts_builtin_sym_error ||
 427      self.ptr->symbol == ts_builtin_sym_error_repeat
 428    ) {
 429      if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) {
 430        if (ts_subtree_visible(child)) {
 431          self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
 432        } else if (grandchild_count > 0) {
 433          self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
 434        }
 435      }
 436    }
 437
 438    self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
 439    self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child);
 440
 441    if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) {
 442      self.ptr->visible_descendant_count++;
 443      self.ptr->visible_child_count++;
 444      if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) {
 445        self.ptr->named_child_count++;
 446      }
 447    } else if (ts_subtree_visible(child)) {
 448      self.ptr->visible_descendant_count++;
 449      self.ptr->visible_child_count++;
 450      if (ts_subtree_named(child)) self.ptr->named_child_count++;
 451    } else if (grandchild_count > 0) {
 452      self.ptr->visible_child_count += child.ptr->visible_child_count;
 453      self.ptr->named_child_count += child.ptr->named_child_count;
 454    }
 455
 456    if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true;
 457
 458    if (ts_subtree_is_error(child)) {
 459      self.ptr->fragile_left = self.ptr->fragile_right = true;
 460      self.ptr->parse_state = TS_TREE_STATE_NONE;
 461    }
 462
 463    if (!ts_subtree_extra(child)) structural_index++;
 464  }
 465
 466  self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
 467
 468  if (
 469    self.ptr->symbol == ts_builtin_sym_error ||
 470    self.ptr->symbol == ts_builtin_sym_error_repeat
 471  ) {
 472    self.ptr->error_cost +=
 473      ERROR_COST_PER_RECOVERY +
 474      ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
 475      ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
 476  }
 477
 478  if (self.ptr->child_count > 0) {
 479    Subtree first_child = children[0];
 480    Subtree last_child = children[self.ptr->child_count - 1];
 481
 482    self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
 483    self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
 484
 485    if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true;
 486    if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true;
 487
 488    if (
 489      self.ptr->child_count >= 2 &&
 490      !self.ptr->visible &&
 491      !self.ptr->named &&
 492      ts_subtree_symbol(first_child) == self.ptr->symbol
 493    ) {
 494      if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) {
 495        self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
 496      } else {
 497        self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
 498      }
 499    }
 500  }
 501}
 502
 503// Create a new parent node with the given children.
 504//
 505// This takes ownership of the children array.
 506MutableSubtree ts_subtree_new_node(
 507  TSSymbol symbol,
 508  SubtreeArray *children,
 509  unsigned production_id,
 510  const TSLanguage *language
 511) {
 512  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
 513  bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
 514
 515  // Allocate the node's data at the end of the array of children.
 516  size_t new_byte_size = ts_subtree_alloc_size(children->size);
 517  if (children->capacity * sizeof(Subtree) < new_byte_size) {
 518    children->contents = ts_realloc(children->contents, new_byte_size);
 519    children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree));
 520  }
 521  SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size];
 522
 523  *data = (SubtreeHeapData) {
 524    .ref_count = 1,
 525    .symbol = symbol,
 526    .child_count = children->size,
 527    .visible = metadata.visible,
 528    .named = metadata.named,
 529    .has_changes = false,
 530    .has_external_scanner_state_change = false,
 531    .fragile_left = fragile,
 532    .fragile_right = fragile,
 533    .is_keyword = false,
 534    {{
 535      .visible_descendant_count = 0,
 536      .production_id = production_id,
 537      .first_leaf = {.symbol = 0, .parse_state = 0},
 538    }}
 539  };
 540  MutableSubtree result = {.ptr = data};
 541  ts_subtree_summarize_children(result, language);
 542  return result;
 543}
 544
 545// Create a new error node containing the given children.
 546//
 547// This node is treated as 'extra'. Its children are prevented from having
 548// having any effect on the parse state.
 549Subtree ts_subtree_new_error_node(
 550  SubtreeArray *children,
 551  bool extra,
 552  const TSLanguage *language
 553) {
 554  MutableSubtree result = ts_subtree_new_node(
 555    ts_builtin_sym_error, children, 0, language
 556  );
 557  result.ptr->extra = extra;
 558  return ts_subtree_from_mut(result);
 559}
 560
 561// Create a new 'missing leaf' node.
 562//
 563// This node is treated as 'extra'. Its children are prevented from having
 564// having any effect on the parse state.
 565Subtree ts_subtree_new_missing_leaf(
 566  SubtreePool *pool,
 567  TSSymbol symbol,
 568  Length padding,
 569  uint32_t lookahead_bytes,
 570  const TSLanguage *language
 571) {
 572  Subtree result = ts_subtree_new_leaf(
 573    pool, symbol, padding, length_zero(), lookahead_bytes,
 574    0, false, false, false, language
 575  );
 576  if (result.data.is_inline) {
 577    result.data.is_missing = true;
 578  } else {
 579    ((SubtreeHeapData *)result.ptr)->is_missing = true;
 580  }
 581  return result;
 582}
 583
 584void ts_subtree_retain(Subtree self) {
 585  if (self.data.is_inline) return;
 586  assert(self.ptr->ref_count > 0);
 587  atomic_inc((volatile uint32_t *)&self.ptr->ref_count);
 588  assert(self.ptr->ref_count != 0);
 589}
 590
 591void ts_subtree_release(SubtreePool *pool, Subtree self) {
 592  if (self.data.is_inline) return;
 593  array_clear(&pool->tree_stack);
 594
 595  assert(self.ptr->ref_count > 0);
 596  if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) {
 597    array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
 598  }
 599
 600  while (pool->tree_stack.size > 0) {
 601    MutableSubtree tree = array_pop(&pool->tree_stack);
 602    if (tree.ptr->child_count > 0) {
 603      Subtree *children = ts_subtree_children(tree);
 604      for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
 605        Subtree child = children[i];
 606        if (child.data.is_inline) continue;
 607        assert(child.ptr->ref_count > 0);
 608        if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
 609          array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
 610        }
 611      }
 612      ts_free(children);
 613    } else {
 614      if (tree.ptr->has_external_tokens) {
 615        ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
 616      }
 617      ts_subtree_pool_free(pool, tree.ptr);
 618    }
 619  }
 620}
 621
 622int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) {
 623  array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left));
 624  array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right));
 625
 626  while (pool->tree_stack.size > 0) {
 627    right = ts_subtree_from_mut(array_pop(&pool->tree_stack));
 628    left = ts_subtree_from_mut(array_pop(&pool->tree_stack));
 629
 630    int result = 0;
 631    if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) result = -1;
 632    else if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) result = 1;
 633    else if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) result = -1;
 634    else if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) result = 1;
 635    if (result != 0) {
 636      array_clear(&pool->tree_stack);
 637      return result;
 638    }
 639
 640    for (uint32_t i = ts_subtree_child_count(left); i > 0; i--) {
 641      Subtree left_child = ts_subtree_children(left)[i - 1];
 642      Subtree right_child = ts_subtree_children(right)[i - 1];
 643      array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left_child));
 644      array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right_child));
 645    }
 646  }
 647
 648  return 0;
 649}
 650
 651static inline void ts_subtree_set_has_changes(MutableSubtree *self) {
 652  if (self->data.is_inline) {
 653    self->data.has_changes = true;
 654  } else {
 655    self->ptr->has_changes = true;
 656  }
 657}
 658
 659Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) {
 660  typedef struct {
 661    Subtree *tree;
 662    Edit edit;
 663  } EditEntry;
 664
 665  Array(EditEntry) stack = array_new();
 666  array_push(&stack, ((EditEntry) {
 667    .tree = &self,
 668    .edit = (Edit) {
 669      .start = {input_edit->start_byte, input_edit->start_point},
 670      .old_end = {input_edit->old_end_byte, input_edit->old_end_point},
 671      .new_end = {input_edit->new_end_byte, input_edit->new_end_point},
 672    },
 673  }));
 674
 675  while (stack.size) {
 676    EditEntry entry = array_pop(&stack);
 677    Edit edit = entry.edit;
 678    bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
 679    bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
 680    bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree);
 681
 682    Length size = ts_subtree_size(*entry.tree);
 683    Length padding = ts_subtree_padding(*entry.tree);
 684    Length total_size = length_add(padding, size);
 685    uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
 686    uint32_t end_byte = total_size.bytes + lookahead_bytes;
 687    if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue;
 688
 689    // If the edit is entirely within the space before this subtree, then shift this
 690    // subtree over according to the edit without changing its size.
 691    if (edit.old_end.bytes <= padding.bytes) {
 692      padding = length_add(edit.new_end, length_sub(padding, edit.old_end));
 693    }
 694
 695    // If the edit starts in the space before this subtree and extends into this subtree,
 696    // shrink the subtree's content to compensate for the change in the space before it.
 697    else if (edit.start.bytes < padding.bytes) {
 698      size = length_saturating_sub(size, length_sub(edit.old_end, padding));
 699      padding = edit.new_end;
 700    }
 701
 702    // If the edit is a pure insertion right at the start of the subtree,
 703    // shift the subtree over according to the insertion.
 704    else if (edit.start.bytes == padding.bytes && is_pure_insertion) {
 705      padding = edit.new_end;
 706    }
 707
 708    // If the edit is within this subtree, resize the subtree to reflect the edit.
 709    else if (
 710      edit.start.bytes < total_size.bytes ||
 711      (edit.start.bytes == total_size.bytes && is_pure_insertion)
 712    ) {
 713      size = length_add(
 714        length_sub(edit.new_end, padding),
 715        length_saturating_sub(total_size, edit.old_end)
 716      );
 717    }
 718
 719    MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);
 720
 721    if (result.data.is_inline) {
 722      if (ts_subtree_can_inline(padding, size, lookahead_bytes)) {
 723        result.data.padding_bytes = padding.bytes;
 724        result.data.padding_rows = padding.extent.row;
 725        result.data.padding_columns = padding.extent.column;
 726        result.data.size_bytes = size.bytes;
 727      } else {
 728        SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
 729        data->ref_count = 1;
 730        data->padding = padding;
 731        data->size = size;
 732        data->lookahead_bytes = lookahead_bytes;
 733        data->error_cost = 0;
 734        data->child_count = 0;
 735        data->symbol = result.data.symbol;
 736        data->parse_state = result.data.parse_state;
 737        data->visible = result.data.visible;
 738        data->named = result.data.named;
 739        data->extra = result.data.extra;
 740        data->fragile_left = false;
 741        data->fragile_right = false;
 742        data->has_changes = false;
 743        data->has_external_tokens = false;
 744        data->depends_on_column = false;
 745        data->is_missing = result.data.is_missing;
 746        data->is_keyword = result.data.is_keyword;
 747        result.ptr = data;
 748      }
 749    } else {
 750      result.ptr->padding = padding;
 751      result.ptr->size = size;
 752    }
 753
 754    ts_subtree_set_has_changes(&result);
 755    *entry.tree = ts_subtree_from_mut(result);
 756
 757    Length child_left, child_right = length_zero();
 758    for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
 759      Subtree *child = &ts_subtree_children(*entry.tree)[i];
 760      Length child_size = ts_subtree_total_size(*child);
 761      child_left = child_right;
 762      child_right = length_add(child_left, child_size);
 763
 764      // If this child ends before the edit, it is not affected.
 765      if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
 766
 767      // Keep editing child nodes until a node is reached that starts after the edit.
 768      // Also, if this node's validity depends on its column position, then continue
 769      // invaliditing child nodes until reaching a line break.
 770      if ((
 771        (child_left.bytes > edit.old_end.bytes) ||
 772        (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)
 773      ) && (
 774        !invalidate_first_row ||
 775        child_left.extent.row > entry.tree->ptr->padding.extent.row
 776      )) {
 777        break;
 778      }
 779
 780      // Transform edit into the child's coordinate space.
 781      Edit child_edit = {
 782        .start = length_saturating_sub(edit.start, child_left),
 783        .old_end = length_saturating_sub(edit.old_end, child_left),
 784        .new_end = length_saturating_sub(edit.new_end, child_left),
 785      };
 786
 787      // Interpret all inserted text as applying to the *first* child that touches the edit.
 788      // Subsequent children are only never have any text inserted into them; they are only
 789      // shrunk to compensate for the edit.
 790      if (
 791        child_right.bytes > edit.start.bytes ||
 792        (child_right.bytes == edit.start.bytes && is_pure_insertion)
 793      ) {
 794        edit.new_end = edit.start;
 795      }
 796
 797      // Children that occur before the edit are not reshaped by the edit.
 798      else {
 799        child_edit.old_end = child_edit.start;
 800        child_edit.new_end = child_edit.start;
 801      }
 802
 803      // Queue processing of this child's subtree.
 804      array_push(&stack, ((EditEntry) {
 805        .tree = child,
 806        .edit = child_edit,
 807      }));
 808    }
 809  }
 810
 811  array_delete(&stack);
 812  return self;
 813}
 814
 815Subtree ts_subtree_last_external_token(Subtree tree) {
 816  if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
 817  while (tree.ptr->child_count > 0) {
 818    for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
 819      Subtree child = ts_subtree_children(tree)[i];
 820      if (ts_subtree_has_external_tokens(child)) {
 821        tree = child;
 822        break;
 823      }
 824    }
 825  }
 826  return tree;
 827}
 828
 829static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) {
 830  if (chr == -1)
 831    return snprintf(str, n, "INVALID");
 832  else if (chr == '\0')
 833    return snprintf(str, n, "'\\0'");
 834  else if (chr == '\n')
 835    return snprintf(str, n, "'\\n'");
 836  else if (chr == '\t')
 837    return snprintf(str, n, "'\\t'");
 838  else if (chr == '\r')
 839    return snprintf(str, n, "'\\r'");
 840  else if (0 < chr && chr < 128 && isprint(chr))
 841    return snprintf(str, n, "'%c'", chr);
 842  else
 843    return snprintf(str, n, "%d", chr);
 844}
 845
 846static const char *const ROOT_FIELD = "__ROOT__";
 847
 848static size_t ts_subtree__write_to_string(
 849  Subtree self, char *string, size_t limit,
 850  const TSLanguage *language, bool include_all,
 851  TSSymbol alias_symbol, bool alias_is_named, const char *field_name
 852) {
 853  if (!self.ptr) return snprintf(string, limit, "(NULL)");
 854
 855  char *cursor = string;
 856  char **writer = (limit > 1) ? &cursor : &string;
 857  bool is_root = field_name == ROOT_FIELD;
 858  bool is_visible =
 859    include_all ||
 860    ts_subtree_missing(self) ||
 861    (
 862      alias_symbol
 863        ? alias_is_named
 864        : ts_subtree_visible(self) && ts_subtree_named(self)
 865    );
 866
 867  if (is_visible) {
 868    if (!is_root) {
 869      cursor += snprintf(*writer, limit, " ");
 870      if (field_name) {
 871        cursor += snprintf(*writer, limit, "%s: ", field_name);
 872      }
 873    }
 874
 875    if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
 876      cursor += snprintf(*writer, limit, "(UNEXPECTED ");
 877      cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
 878    } else {
 879      TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
 880      const char *symbol_name = ts_language_symbol_name(language, symbol);
 881      if (ts_subtree_missing(self)) {
 882        cursor += snprintf(*writer, limit, "(MISSING ");
 883        if (alias_is_named || ts_subtree_named(self)) {
 884          cursor += snprintf(*writer, limit, "%s", symbol_name);
 885        } else {
 886          cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
 887        }
 888      } else {
 889        cursor += snprintf(*writer, limit, "(%s", symbol_name);
 890      }
 891    }
 892  } else if (is_root) {
 893    TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
 894    const char *symbol_name = ts_language_symbol_name(language, symbol);
 895    if (ts_subtree_child_count(self) > 0) {
 896      cursor += snprintf(*writer, limit, "(%s", symbol_name);
 897    } else if (ts_subtree_named(self)) {
 898      cursor += snprintf(*writer, limit, "(%s)", symbol_name);
 899    } else {
 900      cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name);
 901    }
 902  }
 903
 904  if (ts_subtree_child_count(self)) {
 905    const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
 906    const TSFieldMapEntry *field_map, *field_map_end;
 907    ts_language_field_map(
 908      language,
 909      self.ptr->production_id,
 910      &field_map,
 911      &field_map_end
 912    );
 913
 914    uint32_t structural_child_index = 0;
 915    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
 916      Subtree child = ts_subtree_children(self)[i];
 917      if (ts_subtree_extra(child)) {
 918        cursor += ts_subtree__write_to_string(
 919          child, *writer, limit,
 920          language, include_all,
 921          0, false, NULL
 922        );
 923      } else {
 924        TSSymbol subtree_alias_symbol = alias_sequence
 925          ? alias_sequence[structural_child_index]
 926          : 0;
 927        bool subtree_alias_is_named = subtree_alias_symbol
 928          ? ts_language_symbol_metadata(language, subtree_alias_symbol).named
 929          : false;
 930
 931        const char *child_field_name = is_visible ? NULL : field_name;
 932        for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
 933          if (!map->inherited && map->child_index == structural_child_index) {
 934            child_field_name = language->field_names[map->field_id];
 935            break;
 936          }
 937        }
 938
 939        cursor += ts_subtree__write_to_string(
 940          child, *writer, limit,
 941          language, include_all,
 942          subtree_alias_symbol, subtree_alias_is_named, child_field_name
 943        );
 944        structural_child_index++;
 945      }
 946    }
 947  }
 948
 949  if (is_visible) cursor += snprintf(*writer, limit, ")");
 950
 951  return cursor - string;
 952}
 953
 954char *ts_subtree_string(
 955  Subtree self,
 956  TSSymbol alias_symbol,
 957  bool alias_is_named,
 958  const TSLanguage *language,
 959  bool include_all
 960) {
 961  char scratch_string[1];
 962  size_t size = ts_subtree__write_to_string(
 963    self, scratch_string, 1,
 964    language, include_all,
 965    alias_symbol, alias_is_named, ROOT_FIELD
 966  ) + 1;
 967  char *result = ts_malloc(size * sizeof(char));
 968  ts_subtree__write_to_string(
 969    self, result, size,
 970    language, include_all,
 971    alias_symbol, alias_is_named, ROOT_FIELD
 972  );
 973  return result;
 974}
 975
 976void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
 977                                 const TSLanguage *language, TSSymbol alias_symbol,
 978                                 FILE *f) {
 979  TSSymbol subtree_symbol = ts_subtree_symbol(*self);
 980  TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol;
 981  uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
 982  fprintf(f, "tree_%p [label=\"", (void *)self);
 983  ts_language_write_symbol_as_dot_string(language, f, symbol);
 984  fprintf(f, "\"");
 985
 986  if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
 987  if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray");
 988
 989  fprintf(f, ", tooltip=\""
 990    "range: %u - %u\n"
 991    "state: %d\n"
 992    "error-cost: %u\n"
 993    "has-changes: %u\n"
 994    "depends-on-column: %u\n"
 995    "descendant-count: %u\n"
 996    "repeat-depth: %u\n"
 997    "lookahead-bytes: %u",
 998    start_offset, end_offset,
 999    ts_subtree_parse_state(*self),
1000    ts_subtree_error_cost(*self),
1001    ts_subtree_has_changes(*self),
1002    ts_subtree_depends_on_column(*self),
1003    ts_subtree_visible_descendant_count(*self),
1004    ts_subtree_repeat_depth(*self),
1005    ts_subtree_lookahead_bytes(*self)
1006  );
1007
1008  if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) {
1009    fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
1010  }
1011
1012  fprintf(f, "\"]\n");
1013
1014  uint32_t child_start_offset = start_offset;
1015  uint32_t child_info_offset =
1016    language->max_alias_sequence_length *
1017    ts_subtree_production_id(*self);
1018  for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
1019    const Subtree *child = &ts_subtree_children(*self)[i];
1020    TSSymbol subtree_alias_symbol = 0;
1021    if (!ts_subtree_extra(*child) && child_info_offset) {
1022      subtree_alias_symbol = language->alias_sequences[child_info_offset];
1023      child_info_offset++;
1024    }
1025    ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f);
1026    fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i);
1027    child_start_offset += ts_subtree_total_bytes(*child);
1028  }
1029}
1030
1031void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) {
1032  fprintf(f, "digraph tree {\n");
1033  fprintf(f, "edge [arrowhead=none]\n");
1034  ts_subtree__print_dot_graph(&self, 0, language, 0, f);
1035  fprintf(f, "}\n");
1036}
1037
1038const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) {
1039  static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0};
1040  if (
1041    self.ptr &&
1042    !self.data.is_inline &&
1043    self.ptr->has_external_tokens &&
1044    self.ptr->child_count == 0
1045  ) {
1046    return &self.ptr->external_scanner_state;
1047  } else {
1048    return &empty_state;
1049  }
1050}
1051
1052bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) {
1053  const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self);
1054  const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other);
1055  return ts_external_scanner_state_eq(
1056    state_self,
1057    ts_external_scanner_state_data(state_other),
1058    state_other->length
1059  );
1060}