1#include <assert.h>
   2#include <ctype.h>
   3#include <limits.h>
   4#include <stdbool.h>
   5#include <string.h>
   6#include <stdio.h>
   7#include "./alloc.h"
   8#include "./atomic.h"
   9#include "./subtree.h"
  10#include "./length.h"
  11#include "./language.h"
  12#include "./error_costs.h"
  13#include <stddef.h>
  14
  15typedef struct {
  16  Length start;
  17  Length old_end;
  18  Length new_end;
  19} Edit;
  20
  21#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
  22#define TS_MAX_TREE_POOL_SIZE 32
  23
  24// ExternalScannerState
  25
  26void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
  27  self->length = length;
  28  if (length > sizeof(self->short_data)) {
  29    self->long_data = ts_malloc(length);
  30    memcpy(self->long_data, data, length);
  31  } else {
  32    memcpy(self->short_data, data, length);
  33  }
  34}
  35
  36ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) {
  37  ExternalScannerState result = *self;
  38  if (self->length > sizeof(self->short_data)) {
  39    result.long_data = ts_malloc(self->length);
  40    memcpy(result.long_data, self->long_data, self->length);
  41  }
  42  return result;
  43}
  44
  45void ts_external_scanner_state_delete(ExternalScannerState *self) {
  46  if (self->length > sizeof(self->short_data)) {
  47    ts_free(self->long_data);
  48  }
  49}
  50
  51const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
  52  if (self->length > sizeof(self->short_data)) {
  53    return self->long_data;
  54  } else {
  55    return self->short_data;
  56  }
  57}
  58
  59bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) {
  60  return
  61    self->length == length &&
  62    memcmp(ts_external_scanner_state_data(self), buffer, length) == 0;
  63}
  64
  65// SubtreeArray
  66
  67void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
  68  dest->size = self.size;
  69  dest->capacity = self.capacity;
  70  dest->contents = self.contents;
  71  if (self.capacity > 0) {
  72    dest->contents = ts_calloc(self.capacity, sizeof(Subtree));
  73    memcpy(dest->contents, self.contents, self.size * sizeof(Subtree));
  74    for (uint32_t i = 0; i < self.size; i++) {
  75      ts_subtree_retain(dest->contents[i]);
  76    }
  77  }
  78}
  79
  80void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) {
  81  for (uint32_t i = 0; i < self->size; i++) {
  82    ts_subtree_release(pool, self->contents[i]);
  83  }
  84  array_clear(self);
  85}
  86
  87void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
  88  ts_subtree_array_clear(pool, self);
  89  array_delete(self);
  90}
  91
  92void ts_subtree_array_remove_trailing_extras(
  93  SubtreeArray *self,
  94  SubtreeArray *destination
  95) {
  96  array_clear(destination);
  97  while (self->size > 0) {
  98    Subtree last = self->contents[self->size - 1];
  99    if (ts_subtree_extra(last)) {
 100      self->size--;
 101      array_push(destination, last);
 102    } else {
 103      break;
 104    }
 105  }
 106  ts_subtree_array_reverse(destination);
 107}
 108
 109void ts_subtree_array_reverse(SubtreeArray *self) {
 110  for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
 111    size_t reverse_index = self->size - 1 - i;
 112    Subtree swap = self->contents[i];
 113    self->contents[i] = self->contents[reverse_index];
 114    self->contents[reverse_index] = swap;
 115  }
 116}
 117
 118// SubtreePool
 119
 120SubtreePool ts_subtree_pool_new(uint32_t capacity) {
 121  SubtreePool self = {array_new(), array_new()};
 122  array_reserve(&self.free_trees, capacity);
 123  return self;
 124}
 125
 126void ts_subtree_pool_delete(SubtreePool *self) {
 127  if (self->free_trees.contents) {
 128    for (unsigned i = 0; i < self->free_trees.size; i++) {
 129      ts_free(self->free_trees.contents[i].ptr);
 130    }
 131    array_delete(&self->free_trees);
 132  }
 133  if (self->tree_stack.contents) array_delete(&self->tree_stack);
 134}
 135
 136static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) {
 137  if (self->free_trees.size > 0) {
 138    return array_pop(&self->free_trees).ptr;
 139  } else {
 140    return ts_malloc(sizeof(SubtreeHeapData));
 141  }
 142}
 143
 144static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) {
 145  if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) {
 146    array_push(&self->free_trees, (MutableSubtree) {.ptr = tree});
 147  } else {
 148    ts_free(tree);
 149  }
 150}
 151
 152// Subtree
 153
 154static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) {
 155  return
 156    padding.bytes < TS_MAX_INLINE_TREE_LENGTH &&
 157    padding.extent.row < 16 &&
 158    padding.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
 159    size.extent.row == 0 &&
 160    size.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
 161    lookahead_bytes < 16;
 162}
 163
 164Subtree ts_subtree_new_leaf(
 165  SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
 166  uint32_t lookahead_bytes, TSStateId parse_state,
 167  bool has_external_tokens, bool depends_on_column,
 168  bool is_keyword, const TSLanguage *language
 169) {
 170  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
 171  bool extra = symbol == ts_builtin_sym_end;
 172
 173  bool is_inline = (
 174    symbol <= UINT8_MAX &&
 175    !has_external_tokens &&
 176    ts_subtree_can_inline(padding, size, lookahead_bytes)
 177  );
 178
 179  if (is_inline) {
 180    return (Subtree) {{
 181      .parse_state = parse_state,
 182      .symbol = symbol,
 183      .padding_bytes = padding.bytes,
 184      .padding_rows = padding.extent.row,
 185      .padding_columns = padding.extent.column,
 186      .size_bytes = size.bytes,
 187      .lookahead_bytes = lookahead_bytes,
 188      .visible = metadata.visible,
 189      .named = metadata.named,
 190      .extra = extra,
 191      .has_changes = false,
 192      .is_missing = false,
 193      .is_keyword = is_keyword,
 194      .is_inline = true,
 195    }};
 196  } else {
 197    SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
 198    *data = (SubtreeHeapData) {
 199      .ref_count = 1,
 200      .padding = padding,
 201      .size = size,
 202      .lookahead_bytes = lookahead_bytes,
 203      .error_cost = 0,
 204      .child_count = 0,
 205      .symbol = symbol,
 206      .parse_state = parse_state,
 207      .visible = metadata.visible,
 208      .named = metadata.named,
 209      .extra = extra,
 210      .fragile_left = false,
 211      .fragile_right = false,
 212      .has_changes = false,
 213      .has_external_tokens = has_external_tokens,
 214      .has_external_scanner_state_change = false,
 215      .depends_on_column = depends_on_column,
 216      .is_missing = false,
 217      .is_keyword = is_keyword,
 218      {{.first_leaf = {.symbol = 0, .parse_state = 0}}}
 219    };
 220    return (Subtree) {.ptr = data};
 221  }
 222}
 223
 224void ts_subtree_set_symbol(
 225  MutableSubtree *self,
 226  TSSymbol symbol,
 227  const TSLanguage *language
 228) {
 229  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
 230  if (self->data.is_inline) {
 231    assert(symbol < UINT8_MAX);
 232    self->data.symbol = symbol;
 233    self->data.named = metadata.named;
 234    self->data.visible = metadata.visible;
 235  } else {
 236    self->ptr->symbol = symbol;
 237    self->ptr->named = metadata.named;
 238    self->ptr->visible = metadata.visible;
 239  }
 240}
 241
 242Subtree ts_subtree_new_error(
 243  SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
 244  uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
 245) {
 246  Subtree result = ts_subtree_new_leaf(
 247    pool, ts_builtin_sym_error, padding, size, bytes_scanned,
 248    parse_state, false, false, false, language
 249  );
 250  SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
 251  data->fragile_left = true;
 252  data->fragile_right = true;
 253  data->lookahead_char = lookahead_char;
 254  return result;
 255}
 256
 257// Clone a subtree.
 258MutableSubtree ts_subtree_clone(Subtree self) {
 259  size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count);
 260  Subtree *new_children = ts_malloc(alloc_size);
 261  Subtree *old_children = ts_subtree_children(self);
 262  memcpy(new_children, old_children, alloc_size);
 263  SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count];
 264  if (self.ptr->child_count > 0) {
 265    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
 266      ts_subtree_retain(new_children[i]);
 267    }
 268  } else if (self.ptr->has_external_tokens) {
 269    result->external_scanner_state = ts_external_scanner_state_copy(
 270      &self.ptr->external_scanner_state
 271    );
 272  }
 273  result->ref_count = 1;
 274  return (MutableSubtree) {.ptr = result};
 275}
 276
 277// Get mutable version of a subtree.
 278//
 279// This takes ownership of the subtree. If the subtree has only one owner,
 280// this will directly convert it into a mutable version. Otherwise, it will
 281// perform a copy.
 282MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
 283  if (self.data.is_inline) return (MutableSubtree) {self.data};
 284  if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
 285  MutableSubtree result = ts_subtree_clone(self);
 286  ts_subtree_release(pool, self);
 287  return result;
 288}
 289
 290static void ts_subtree__compress(
 291  MutableSubtree self,
 292  unsigned count,
 293  const TSLanguage *language,
 294  MutableSubtreeArray *stack
 295) {
 296  unsigned initial_stack_size = stack->size;
 297
 298  MutableSubtree tree = self;
 299  TSSymbol symbol = tree.ptr->symbol;
 300  for (unsigned i = 0; i < count; i++) {
 301    if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;
 302
 303    MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
 304    if (
 305      child.data.is_inline ||
 306      child.ptr->child_count < 2 ||
 307      child.ptr->ref_count > 1 ||
 308      child.ptr->symbol != symbol
 309    ) break;
 310
 311    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]);
 312    if (
 313      grandchild.data.is_inline ||
 314      grandchild.ptr->child_count < 2 ||
 315      grandchild.ptr->ref_count > 1 ||
 316      grandchild.ptr->symbol != symbol
 317    ) break;
 318
 319    ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild);
 320    ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1];
 321    ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
 322    array_push(stack, tree);
 323    tree = grandchild;
 324  }
 325
 326  while (stack->size > initial_stack_size) {
 327    tree = array_pop(stack);
 328    MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
 329    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]);
 330    ts_subtree_summarize_children(grandchild, language);
 331    ts_subtree_summarize_children(child, language);
 332    ts_subtree_summarize_children(tree, language);
 333  }
 334}
 335
 336void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) {
 337  array_clear(&pool->tree_stack);
 338
 339  if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) {
 340    array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
 341  }
 342
 343  while (pool->tree_stack.size > 0) {
 344    MutableSubtree tree = array_pop(&pool->tree_stack);
 345
 346    if (tree.ptr->repeat_depth > 0) {
 347      Subtree child1 = ts_subtree_children(tree)[0];
 348      Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1];
 349      long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2);
 350      if (repeat_delta > 0) {
 351        unsigned n = (unsigned)repeat_delta;
 352        for (unsigned i = n / 2; i > 0; i /= 2) {
 353          ts_subtree__compress(tree, i, language, &pool->tree_stack);
 354          n -= i;
 355        }
 356      }
 357    }
 358
 359    for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
 360      Subtree child = ts_subtree_children(tree)[i];
 361      if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
 362        array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
 363      }
 364    }
 365  }
 366}
 367
 368// Assign all of the node's properties that depend on its children.
 369void ts_subtree_summarize_children(
 370  MutableSubtree self,
 371  const TSLanguage *language
 372) {
 373  assert(!self.data.is_inline);
 374
 375  self.ptr->named_child_count = 0;
 376  self.ptr->visible_child_count = 0;
 377  self.ptr->error_cost = 0;
 378  self.ptr->repeat_depth = 0;
 379  self.ptr->visible_descendant_count = 0;
 380  self.ptr->has_external_tokens = false;
 381  self.ptr->depends_on_column = false;
 382  self.ptr->has_external_scanner_state_change = false;
 383  self.ptr->dynamic_precedence = 0;
 384
 385  uint32_t structural_index = 0;
 386  const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
 387  uint32_t lookahead_end_byte = 0;
 388
 389  const Subtree *children = ts_subtree_children(self);
 390  for (uint32_t i = 0; i < self.ptr->child_count; i++) {
 391    Subtree child = children[i];
 392
 393    if (
 394      self.ptr->size.extent.row == 0 &&
 395      ts_subtree_depends_on_column(child)
 396    ) {
 397      self.ptr->depends_on_column = true;
 398    }
 399
 400    if (ts_subtree_has_external_scanner_state_change(child)) {
 401      self.ptr->has_external_scanner_state_change = true;
 402    }
 403
 404    if (i == 0) {
 405      self.ptr->padding = ts_subtree_padding(child);
 406      self.ptr->size = ts_subtree_size(child);
 407    } else {
 408      self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child));
 409    }
 410
 411    uint32_t child_lookahead_end_byte =
 412      self.ptr->padding.bytes +
 413      self.ptr->size.bytes +
 414      ts_subtree_lookahead_bytes(child);
 415    if (child_lookahead_end_byte > lookahead_end_byte) {
 416      lookahead_end_byte = child_lookahead_end_byte;
 417    }
 418
 419    if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) {
 420      self.ptr->error_cost += ts_subtree_error_cost(child);
 421    }
 422
 423    uint32_t grandchild_count = ts_subtree_child_count(child);
 424    if (
 425      self.ptr->symbol == ts_builtin_sym_error ||
 426      self.ptr->symbol == ts_builtin_sym_error_repeat
 427    ) {
 428      if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) {
 429        if (ts_subtree_visible(child)) {
 430          self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
 431        } else if (grandchild_count > 0) {
 432          self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
 433        }
 434      }
 435    }
 436
 437    self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
 438    self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child);
 439
 440    if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) {
 441      self.ptr->visible_descendant_count++;
 442      self.ptr->visible_child_count++;
 443      if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) {
 444        self.ptr->named_child_count++;
 445      }
 446    } else if (ts_subtree_visible(child)) {
 447      self.ptr->visible_descendant_count++;
 448      self.ptr->visible_child_count++;
 449      if (ts_subtree_named(child)) self.ptr->named_child_count++;
 450    } else if (grandchild_count > 0) {
 451      self.ptr->visible_child_count += child.ptr->visible_child_count;
 452      self.ptr->named_child_count += child.ptr->named_child_count;
 453    }
 454
 455    if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true;
 456
 457    if (ts_subtree_is_error(child)) {
 458      self.ptr->fragile_left = self.ptr->fragile_right = true;
 459      self.ptr->parse_state = TS_TREE_STATE_NONE;
 460    }
 461
 462    if (!ts_subtree_extra(child)) structural_index++;
 463  }
 464
 465  self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
 466
 467  if (
 468    self.ptr->symbol == ts_builtin_sym_error ||
 469    self.ptr->symbol == ts_builtin_sym_error_repeat
 470  ) {
 471    self.ptr->error_cost +=
 472      ERROR_COST_PER_RECOVERY +
 473      ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
 474      ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
 475  }
 476
 477  if (self.ptr->child_count > 0) {
 478    Subtree first_child = children[0];
 479    Subtree last_child = children[self.ptr->child_count - 1];
 480
 481    self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
 482    self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
 483
 484    if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true;
 485    if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true;
 486
 487    if (
 488      self.ptr->child_count >= 2 &&
 489      !self.ptr->visible &&
 490      !self.ptr->named &&
 491      ts_subtree_symbol(first_child) == self.ptr->symbol
 492    ) {
 493      if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) {
 494        self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
 495      } else {
 496        self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
 497      }
 498    }
 499  }
 500}
 501
 502// Create a new parent node with the given children.
 503//
 504// This takes ownership of the children array.
 505MutableSubtree ts_subtree_new_node(
 506  TSSymbol symbol,
 507  SubtreeArray *children,
 508  unsigned production_id,
 509  const TSLanguage *language
 510) {
 511  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
 512  bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
 513
 514  // Allocate the node's data at the end of the array of children.
 515  size_t new_byte_size = ts_subtree_alloc_size(children->size);
 516  if (children->capacity * sizeof(Subtree) < new_byte_size) {
 517    children->contents = ts_realloc(children->contents, new_byte_size);
 518    children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree));
 519  }
 520  SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size];
 521
 522  *data = (SubtreeHeapData) {
 523    .ref_count = 1,
 524    .symbol = symbol,
 525    .child_count = children->size,
 526    .visible = metadata.visible,
 527    .named = metadata.named,
 528    .has_changes = false,
 529    .has_external_scanner_state_change = false,
 530    .fragile_left = fragile,
 531    .fragile_right = fragile,
 532    .is_keyword = false,
 533    {{
 534      .visible_descendant_count = 0,
 535      .production_id = production_id,
 536      .first_leaf = {.symbol = 0, .parse_state = 0},
 537    }}
 538  };
 539  MutableSubtree result = {.ptr = data};
 540  ts_subtree_summarize_children(result, language);
 541  return result;
 542}
 543
 544// Create a new error node containing the given children.
 545//
 546// This node is treated as 'extra'. Its children are prevented from having
 547// having any effect on the parse state.
 548Subtree ts_subtree_new_error_node(
 549  SubtreeArray *children,
 550  bool extra,
 551  const TSLanguage *language
 552) {
 553  MutableSubtree result = ts_subtree_new_node(
 554    ts_builtin_sym_error, children, 0, language
 555  );
 556  result.ptr->extra = extra;
 557  return ts_subtree_from_mut(result);
 558}
 559
 560// Create a new 'missing leaf' node.
 561//
 562// This node is treated as 'extra'. Its children are prevented from having
 563// having any effect on the parse state.
 564Subtree ts_subtree_new_missing_leaf(
 565  SubtreePool *pool,
 566  TSSymbol symbol,
 567  Length padding,
 568  uint32_t lookahead_bytes,
 569  const TSLanguage *language
 570) {
 571  Subtree result = ts_subtree_new_leaf(
 572    pool, symbol, padding, length_zero(), lookahead_bytes,
 573    0, false, false, false, language
 574  );
 575  if (result.data.is_inline) {
 576    result.data.is_missing = true;
 577  } else {
 578    ((SubtreeHeapData *)result.ptr)->is_missing = true;
 579  }
 580  return result;
 581}
 582
 583void ts_subtree_retain(Subtree self) {
 584  if (self.data.is_inline) return;
 585  assert(self.ptr->ref_count > 0);
 586  atomic_inc((volatile uint32_t *)&self.ptr->ref_count);
 587  assert(self.ptr->ref_count != 0);
 588}
 589
 590void ts_subtree_release(SubtreePool *pool, Subtree self) {
 591  if (self.data.is_inline) return;
 592  array_clear(&pool->tree_stack);
 593
 594  assert(self.ptr->ref_count > 0);
 595  if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) {
 596    array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
 597  }
 598
 599  while (pool->tree_stack.size > 0) {
 600    MutableSubtree tree = array_pop(&pool->tree_stack);
 601    if (tree.ptr->child_count > 0) {
 602      Subtree *children = ts_subtree_children(tree);
 603      for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
 604        Subtree child = children[i];
 605        if (child.data.is_inline) continue;
 606        assert(child.ptr->ref_count > 0);
 607        if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
 608          array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
 609        }
 610      }
 611      ts_free(children);
 612    } else {
 613      if (tree.ptr->has_external_tokens) {
 614        ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
 615      }
 616      ts_subtree_pool_free(pool, tree.ptr);
 617    }
 618  }
 619}
 620
 621int ts_subtree_compare(Subtree left, Subtree right) {
 622  if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) return -1;
 623  if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) return 1;
 624  if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1;
 625  if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1;
 626  for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) {
 627    Subtree left_child = ts_subtree_children(left)[i];
 628    Subtree right_child = ts_subtree_children(right)[i];
 629    switch (ts_subtree_compare(left_child, right_child)) {
 630      case -1: return -1;
 631      case 1: return 1;
 632      default: break;
 633    }
 634  }
 635  return 0;
 636}
 637
 638static inline void ts_subtree_set_has_changes(MutableSubtree *self) {
 639  if (self->data.is_inline) {
 640    self->data.has_changes = true;
 641  } else {
 642    self->ptr->has_changes = true;
 643  }
 644}
 645
 646Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) {
 647  typedef struct {
 648    Subtree *tree;
 649    Edit edit;
 650  } EditEntry;
 651
 652  Array(EditEntry) stack = array_new();
 653  array_push(&stack, ((EditEntry) {
 654    .tree = &self,
 655    .edit = (Edit) {
 656      .start = {input_edit->start_byte, input_edit->start_point},
 657      .old_end = {input_edit->old_end_byte, input_edit->old_end_point},
 658      .new_end = {input_edit->new_end_byte, input_edit->new_end_point},
 659    },
 660  }));
 661
 662  while (stack.size) {
 663    EditEntry entry = array_pop(&stack);
 664    Edit edit = entry.edit;
 665    bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
 666    bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
 667    bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree);
 668
 669    Length size = ts_subtree_size(*entry.tree);
 670    Length padding = ts_subtree_padding(*entry.tree);
 671    Length total_size = length_add(padding, size);
 672    uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
 673    uint32_t end_byte = total_size.bytes + lookahead_bytes;
 674    if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue;
 675
 676    // If the edit is entirely within the space before this subtree, then shift this
 677    // subtree over according to the edit without changing its size.
 678    if (edit.old_end.bytes <= padding.bytes) {
 679      padding = length_add(edit.new_end, length_sub(padding, edit.old_end));
 680    }
 681
 682    // If the edit starts in the space before this subtree and extends into this subtree,
 683    // shrink the subtree's content to compensate for the change in the space before it.
 684    else if (edit.start.bytes < padding.bytes) {
 685      size = length_saturating_sub(size, length_sub(edit.old_end, padding));
 686      padding = edit.new_end;
 687    }
 688
 689    // If the edit is a pure insertion right at the start of the subtree,
 690    // shift the subtree over according to the insertion.
 691    else if (edit.start.bytes == padding.bytes && is_pure_insertion) {
 692      padding = edit.new_end;
 693    }
 694
 695    // If the edit is within this subtree, resize the subtree to reflect the edit.
 696    else if (
 697      edit.start.bytes < total_size.bytes ||
 698      (edit.start.bytes == total_size.bytes && is_pure_insertion)
 699    ) {
 700      size = length_add(
 701        length_sub(edit.new_end, padding),
 702        length_saturating_sub(total_size, edit.old_end)
 703      );
 704    }
 705
 706    MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);
 707
 708    if (result.data.is_inline) {
 709      if (ts_subtree_can_inline(padding, size, lookahead_bytes)) {
 710        result.data.padding_bytes = padding.bytes;
 711        result.data.padding_rows = padding.extent.row;
 712        result.data.padding_columns = padding.extent.column;
 713        result.data.size_bytes = size.bytes;
 714      } else {
 715        SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
 716        data->ref_count = 1;
 717        data->padding = padding;
 718        data->size = size;
 719        data->lookahead_bytes = lookahead_bytes;
 720        data->error_cost = 0;
 721        data->child_count = 0;
 722        data->symbol = result.data.symbol;
 723        data->parse_state = result.data.parse_state;
 724        data->visible = result.data.visible;
 725        data->named = result.data.named;
 726        data->extra = result.data.extra;
 727        data->fragile_left = false;
 728        data->fragile_right = false;
 729        data->has_changes = false;
 730        data->has_external_tokens = false;
 731        data->depends_on_column = false;
 732        data->is_missing = result.data.is_missing;
 733        data->is_keyword = result.data.is_keyword;
 734        result.ptr = data;
 735      }
 736    } else {
 737      result.ptr->padding = padding;
 738      result.ptr->size = size;
 739    }
 740
 741    ts_subtree_set_has_changes(&result);
 742    *entry.tree = ts_subtree_from_mut(result);
 743
 744    Length child_left, child_right = length_zero();
 745    for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
 746      Subtree *child = &ts_subtree_children(*entry.tree)[i];
 747      Length child_size = ts_subtree_total_size(*child);
 748      child_left = child_right;
 749      child_right = length_add(child_left, child_size);
 750
 751      // If this child ends before the edit, it is not affected.
 752      if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
 753
 754      // Keep editing child nodes until a node is reached that starts after the edit.
 755      // Also, if this node's validity depends on its column position, then continue
 756      // invaliditing child nodes until reaching a line break.
 757      if ((
 758        (child_left.bytes > edit.old_end.bytes) ||
 759        (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)
 760      ) && (
 761        !invalidate_first_row ||
 762        child_left.extent.row > entry.tree->ptr->padding.extent.row
 763      )) {
 764        break;
 765      }
 766
 767      // Transform edit into the child's coordinate space.
 768      Edit child_edit = {
 769        .start = length_saturating_sub(edit.start, child_left),
 770        .old_end = length_saturating_sub(edit.old_end, child_left),
 771        .new_end = length_saturating_sub(edit.new_end, child_left),
 772      };
 773
 774      // Interpret all inserted text as applying to the *first* child that touches the edit.
 775      // Subsequent children are only never have any text inserted into them; they are only
 776      // shrunk to compensate for the edit.
 777      if (
 778        child_right.bytes > edit.start.bytes ||
 779        (child_right.bytes == edit.start.bytes && is_pure_insertion)
 780      ) {
 781        edit.new_end = edit.start;
 782      }
 783
 784      // Children that occur before the edit are not reshaped by the edit.
 785      else {
 786        child_edit.old_end = child_edit.start;
 787        child_edit.new_end = child_edit.start;
 788      }
 789
 790      // Queue processing of this child's subtree.
 791      array_push(&stack, ((EditEntry) {
 792        .tree = child,
 793        .edit = child_edit,
 794      }));
 795    }
 796  }
 797
 798  array_delete(&stack);
 799  return self;
 800}
 801
 802Subtree ts_subtree_last_external_token(Subtree tree) {
 803  if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
 804  while (tree.ptr->child_count > 0) {
 805    for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
 806      Subtree child = ts_subtree_children(tree)[i];
 807      if (ts_subtree_has_external_tokens(child)) {
 808        tree = child;
 809        break;
 810      }
 811    }
 812  }
 813  return tree;
 814}
 815
 816static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) {
 817  if (chr == -1)
 818    return snprintf(str, n, "INVALID");
 819  else if (chr == '\0')
 820    return snprintf(str, n, "'\\0'");
 821  else if (chr == '\n')
 822    return snprintf(str, n, "'\\n'");
 823  else if (chr == '\t')
 824    return snprintf(str, n, "'\\t'");
 825  else if (chr == '\r')
 826    return snprintf(str, n, "'\\r'");
 827  else if (0 < chr && chr < 128 && isprint(chr))
 828    return snprintf(str, n, "'%c'", chr);
 829  else
 830    return snprintf(str, n, "%d", chr);
 831}
 832
 833static const char *const ROOT_FIELD = "__ROOT__";
 834
 835static size_t ts_subtree__write_to_string(
 836  Subtree self, char *string, size_t limit,
 837  const TSLanguage *language, bool include_all,
 838  TSSymbol alias_symbol, bool alias_is_named, const char *field_name
 839) {
 840  if (!self.ptr) return snprintf(string, limit, "(NULL)");
 841
 842  char *cursor = string;
 843  char **writer = (limit > 1) ? &cursor : &string;
 844  bool is_root = field_name == ROOT_FIELD;
 845  bool is_visible =
 846    include_all ||
 847    ts_subtree_missing(self) ||
 848    (
 849      alias_symbol
 850        ? alias_is_named
 851        : ts_subtree_visible(self) && ts_subtree_named(self)
 852    );
 853
 854  if (is_visible) {
 855    if (!is_root) {
 856      cursor += snprintf(*writer, limit, " ");
 857      if (field_name) {
 858        cursor += snprintf(*writer, limit, "%s: ", field_name);
 859      }
 860    }
 861
 862    if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
 863      cursor += snprintf(*writer, limit, "(UNEXPECTED ");
 864      cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
 865    } else {
 866      TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
 867      const char *symbol_name = ts_language_symbol_name(language, symbol);
 868      if (ts_subtree_missing(self)) {
 869        cursor += snprintf(*writer, limit, "(MISSING ");
 870        if (alias_is_named || ts_subtree_named(self)) {
 871          cursor += snprintf(*writer, limit, "%s", symbol_name);
 872        } else {
 873          cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
 874        }
 875      } else {
 876        cursor += snprintf(*writer, limit, "(%s", symbol_name);
 877      }
 878    }
 879  } else if (is_root) {
 880    TSSymbol symbol = ts_subtree_symbol(self);
 881    const char *symbol_name = ts_language_symbol_name(language, symbol);
 882    cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name);
 883  }
 884
 885  if (ts_subtree_child_count(self)) {
 886    const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
 887    const TSFieldMapEntry *field_map, *field_map_end;
 888    ts_language_field_map(
 889      language,
 890      self.ptr->production_id,
 891      &field_map,
 892      &field_map_end
 893    );
 894
 895    uint32_t structural_child_index = 0;
 896    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
 897      Subtree child = ts_subtree_children(self)[i];
 898      if (ts_subtree_extra(child)) {
 899        cursor += ts_subtree__write_to_string(
 900          child, *writer, limit,
 901          language, include_all,
 902          0, false, NULL
 903        );
 904      } else {
 905        TSSymbol subtree_alias_symbol = alias_sequence
 906          ? alias_sequence[structural_child_index]
 907          : 0;
 908        bool subtree_alias_is_named = subtree_alias_symbol
 909          ? ts_language_symbol_metadata(language, subtree_alias_symbol).named
 910          : false;
 911
 912        const char *child_field_name = is_visible ? NULL : field_name;
 913        for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
 914          if (!map->inherited && map->child_index == structural_child_index) {
 915            child_field_name = language->field_names[map->field_id];
 916            break;
 917          }
 918        }
 919
 920        cursor += ts_subtree__write_to_string(
 921          child, *writer, limit,
 922          language, include_all,
 923          subtree_alias_symbol, subtree_alias_is_named, child_field_name
 924        );
 925        structural_child_index++;
 926      }
 927    }
 928  }
 929
 930  if (is_visible) cursor += snprintf(*writer, limit, ")");
 931
 932  return cursor - string;
 933}
 934
 935char *ts_subtree_string(
 936  Subtree self,
 937  const TSLanguage *language,
 938  bool include_all
 939) {
 940  char scratch_string[1];
 941  size_t size = ts_subtree__write_to_string(
 942    self, scratch_string, 1,
 943    language, include_all,
 944    0, false, ROOT_FIELD
 945  ) + 1;
 946  char *result = ts_malloc(size * sizeof(char));
 947  ts_subtree__write_to_string(
 948    self, result, size,
 949    language, include_all,
 950    0, false, ROOT_FIELD
 951  );
 952  return result;
 953}
 954
 955void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
 956                                 const TSLanguage *language, TSSymbol alias_symbol,
 957                                 FILE *f) {
 958  TSSymbol subtree_symbol = ts_subtree_symbol(*self);
 959  TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol;
 960  uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
 961  fprintf(f, "tree_%p [label=\"", (void *)self);
 962  ts_language_write_symbol_as_dot_string(language, f, symbol);
 963  fprintf(f, "\"");
 964
 965  if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
 966  if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray");
 967
 968  fprintf(f, ", tooltip=\""
 969    "range: %u - %u\n"
 970    "state: %d\n"
 971    "error-cost: %u\n"
 972    "has-changes: %u\n"
 973    "depends-on-column: %u\n"
 974    "descendant-count: %u\n"
 975    "repeat-depth: %u\n"
 976    "lookahead-bytes: %u",
 977    start_offset, end_offset,
 978    ts_subtree_parse_state(*self),
 979    ts_subtree_error_cost(*self),
 980    ts_subtree_has_changes(*self),
 981    ts_subtree_depends_on_column(*self),
 982    ts_subtree_visible_descendant_count(*self),
 983    ts_subtree_repeat_depth(*self),
 984    ts_subtree_lookahead_bytes(*self)
 985  );
 986
 987  if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0) {
 988    fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
 989  }
 990
 991  fprintf(f, "\"]\n");
 992
 993  uint32_t child_start_offset = start_offset;
 994  uint32_t child_info_offset =
 995    language->max_alias_sequence_length *
 996    ts_subtree_production_id(*self);
 997  for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
 998    const Subtree *child = &ts_subtree_children(*self)[i];
 999    TSSymbol subtree_alias_symbol = 0;
1000    if (!ts_subtree_extra(*child) && child_info_offset) {
1001      subtree_alias_symbol = language->alias_sequences[child_info_offset];
1002      child_info_offset++;
1003    }
1004    ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f);
1005    fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i);
1006    child_start_offset += ts_subtree_total_bytes(*child);
1007  }
1008}
1009
1010void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) {
1011  fprintf(f, "digraph tree {\n");
1012  fprintf(f, "edge [arrowhead=none]\n");
1013  ts_subtree__print_dot_graph(&self, 0, language, 0, f);
1014  fprintf(f, "}\n");
1015}
1016
1017const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) {
1018  static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0};
1019  if (
1020    self.ptr &&
1021    !self.data.is_inline &&
1022    self.ptr->has_external_tokens &&
1023    self.ptr->child_count == 0
1024  ) {
1025    return &self.ptr->external_scanner_state;
1026  } else {
1027    return &empty_state;
1028  }
1029}
1030
1031bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) {
1032  const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self);
1033  const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other);
1034  return ts_external_scanner_state_eq(
1035    state_self,
1036    ts_external_scanner_state_data(state_other),
1037    state_other->length
1038  );
1039}