1#include "./get_changed_ranges.h"
  2#include "./subtree.h"
  3#include "./language.h"
  4#include "./error_costs.h"
  5#include "./tree_cursor.h"
  6#include <assert.h>
  7
  8// #define DEBUG_GET_CHANGED_RANGES
  9
 10static void ts_range_array_add(
 11  TSRangeArray *self,
 12  Length start,
 13  Length end
 14) {
 15  if (self->size > 0) {
 16    TSRange *last_range = array_back(self);
 17    if (start.bytes <= last_range->end_byte) {
 18      last_range->end_byte = end.bytes;
 19      last_range->end_point = end.extent;
 20      return;
 21    }
 22  }
 23
 24  if (start.bytes < end.bytes) {
 25    TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
 26    array_push(self, range);
 27  }
 28}
 29
 30bool ts_range_array_intersects(
 31  const TSRangeArray *self,
 32  unsigned start_index,
 33  uint32_t start_byte,
 34  uint32_t end_byte
 35) {
 36  for (unsigned i = start_index; i < self->size; i++) {
 37    TSRange *range = &self->contents[i];
 38    if (range->end_byte > start_byte) {
 39      if (range->start_byte >= end_byte) break;
 40      return true;
 41    }
 42  }
 43  return false;
 44}
 45
 46void ts_range_array_get_changed_ranges(
 47  const TSRange *old_ranges, unsigned old_range_count,
 48  const TSRange *new_ranges, unsigned new_range_count,
 49  TSRangeArray *differences
 50) {
 51  unsigned new_index = 0;
 52  unsigned old_index = 0;
 53  Length current_position = length_zero();
 54  bool in_old_range = false;
 55  bool in_new_range = false;
 56
 57  while (old_index < old_range_count || new_index < new_range_count) {
 58    const TSRange *old_range = &old_ranges[old_index];
 59    const TSRange *new_range = &new_ranges[new_index];
 60
 61    Length next_old_position;
 62    if (in_old_range) {
 63      next_old_position = (Length) {old_range->end_byte, old_range->end_point};
 64    } else if (old_index < old_range_count) {
 65      next_old_position = (Length) {old_range->start_byte, old_range->start_point};
 66    } else {
 67      next_old_position = LENGTH_MAX;
 68    }
 69
 70    Length next_new_position;
 71    if (in_new_range) {
 72      next_new_position = (Length) {new_range->end_byte, new_range->end_point};
 73    } else if (new_index < new_range_count) {
 74      next_new_position = (Length) {new_range->start_byte, new_range->start_point};
 75    } else {
 76      next_new_position = LENGTH_MAX;
 77    }
 78
 79    if (next_old_position.bytes < next_new_position.bytes) {
 80      if (in_old_range != in_new_range) {
 81        ts_range_array_add(differences, current_position, next_old_position);
 82      }
 83      if (in_old_range) old_index++;
 84      current_position = next_old_position;
 85      in_old_range = !in_old_range;
 86    } else if (next_new_position.bytes < next_old_position.bytes) {
 87      if (in_old_range != in_new_range) {
 88        ts_range_array_add(differences, current_position, next_new_position);
 89      }
 90      if (in_new_range) new_index++;
 91      current_position = next_new_position;
 92      in_new_range = !in_new_range;
 93    } else {
 94      if (in_old_range != in_new_range) {
 95        ts_range_array_add(differences, current_position, next_new_position);
 96      }
 97      if (in_old_range) old_index++;
 98      if (in_new_range) new_index++;
 99      in_old_range = !in_old_range;
100      in_new_range = !in_new_range;
101      current_position = next_new_position;
102    }
103  }
104}
105
106typedef struct {
107  TreeCursor cursor;
108  const TSLanguage *language;
109  unsigned visible_depth;
110  bool in_padding;
111} Iterator;
112
113static Iterator iterator_new(
114  TreeCursor *cursor,
115  const Subtree *tree,
116  const TSLanguage *language
117) {
118  array_clear(&cursor->stack);
119  array_push(&cursor->stack, ((TreeCursorEntry) {
120    .subtree = tree,
121    .position = length_zero(),
122    .child_index = 0,
123    .structural_child_index = 0,
124  }));
125  return (Iterator) {
126    .cursor = *cursor,
127    .language = language,
128    .visible_depth = 1,
129    .in_padding = false,
130  };
131}
132
133static bool iterator_done(Iterator *self) {
134  return self->cursor.stack.size == 0;
135}
136
137static Length iterator_start_position(Iterator *self) {
138  TreeCursorEntry entry = *array_back(&self->cursor.stack);
139  if (self->in_padding) {
140    return entry.position;
141  } else {
142    return length_add(entry.position, ts_subtree_padding(*entry.subtree));
143  }
144}
145
146static Length iterator_end_position(Iterator *self) {
147  TreeCursorEntry entry = *array_back(&self->cursor.stack);
148  Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
149  if (self->in_padding) {
150    return result;
151  } else {
152    return length_add(result, ts_subtree_size(*entry.subtree));
153  }
154}
155
156static bool iterator_tree_is_visible(const Iterator *self) {
157  TreeCursorEntry entry = *array_back(&self->cursor.stack);
158  if (ts_subtree_visible(*entry.subtree)) return true;
159  if (self->cursor.stack.size > 1) {
160    Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
161    return ts_language_alias_at(
162      self->language,
163      parent.ptr->production_id,
164      entry.structural_child_index
165    ) != 0;
166  }
167  return false;
168}
169
170static void iterator_get_visible_state(
171  const Iterator *self,
172  Subtree *tree,
173  TSSymbol *alias_symbol,
174  uint32_t *start_byte
175) {
176  uint32_t i = self->cursor.stack.size - 1;
177
178  if (self->in_padding) {
179    if (i == 0) return;
180    i--;
181  }
182
183  for (; i + 1 > 0; i--) {
184    TreeCursorEntry entry = self->cursor.stack.contents[i];
185
186    if (i > 0) {
187      const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
188      *alias_symbol = ts_language_alias_at(
189        self->language,
190        parent->ptr->production_id,
191        entry.structural_child_index
192      );
193    }
194
195    if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
196      *tree = *entry.subtree;
197      *start_byte = entry.position.bytes;
198      break;
199    }
200  }
201}
202
203static void iterator_ascend(Iterator *self) {
204  if (iterator_done(self)) return;
205  if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
206  if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
207  self->cursor.stack.size--;
208}
209
210static bool iterator_descend(Iterator *self, uint32_t goal_position) {
211  if (self->in_padding) return false;
212
213  bool did_descend = false;
214  do {
215    did_descend = false;
216    TreeCursorEntry entry = *array_back(&self->cursor.stack);
217    Length position = entry.position;
218    uint32_t structural_child_index = 0;
219    for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
220      const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
221      Length child_left = length_add(position, ts_subtree_padding(*child));
222      Length child_right = length_add(child_left, ts_subtree_size(*child));
223
224      if (child_right.bytes > goal_position) {
225        array_push(&self->cursor.stack, ((TreeCursorEntry) {
226          .subtree = child,
227          .position = position,
228          .child_index = i,
229          .structural_child_index = structural_child_index,
230        }));
231
232        if (iterator_tree_is_visible(self)) {
233          if (child_left.bytes > goal_position) {
234            self->in_padding = true;
235          } else {
236            self->visible_depth++;
237          }
238          return true;
239        }
240
241        did_descend = true;
242        break;
243      }
244
245      position = child_right;
246      if (!ts_subtree_extra(*child)) structural_child_index++;
247    }
248  } while (did_descend);
249
250  return false;
251}
252
253static void iterator_advance(Iterator *self) {
254  if (self->in_padding) {
255    self->in_padding = false;
256    if (iterator_tree_is_visible(self)) {
257      self->visible_depth++;
258    } else {
259      iterator_descend(self, 0);
260    }
261    return;
262  }
263
264  for (;;) {
265    if (iterator_tree_is_visible(self)) self->visible_depth--;
266    TreeCursorEntry entry = array_pop(&self->cursor.stack);
267    if (iterator_done(self)) return;
268
269    const Subtree *parent = array_back(&self->cursor.stack)->subtree;
270    uint32_t child_index = entry.child_index + 1;
271    if (ts_subtree_child_count(*parent) > child_index) {
272      Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
273      uint32_t structural_child_index = entry.structural_child_index;
274      if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
275      const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
276
277      array_push(&self->cursor.stack, ((TreeCursorEntry) {
278        .subtree = next_child,
279        .position = position,
280        .child_index = child_index,
281        .structural_child_index = structural_child_index,
282      }));
283
284      if (iterator_tree_is_visible(self)) {
285        if (ts_subtree_padding(*next_child).bytes > 0) {
286          self->in_padding = true;
287        } else {
288          self->visible_depth++;
289        }
290      } else {
291        iterator_descend(self, 0);
292      }
293      break;
294    }
295  }
296}
297
298typedef enum {
299  IteratorDiffers,
300  IteratorMayDiffer,
301  IteratorMatches,
302} IteratorComparison;
303
304static IteratorComparison iterator_compare(
305  const Iterator *old_iter,
306  const Iterator *new_iter
307) {
308  Subtree old_tree = NULL_SUBTREE;
309  Subtree new_tree = NULL_SUBTREE;
310  uint32_t old_start = 0;
311  uint32_t new_start = 0;
312  TSSymbol old_alias_symbol = 0;
313  TSSymbol new_alias_symbol = 0;
314  iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
315  iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
316
317  if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
318  if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
319
320  if (
321    old_alias_symbol == new_alias_symbol &&
322    ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
323  ) {
324    if (old_start == new_start &&
325        !ts_subtree_has_changes(old_tree) &&
326        ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
327        ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
328        ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
329        ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
330        (ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
331        (ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
332      return IteratorMatches;
333    } else {
334      return IteratorMayDiffer;
335    }
336  }
337
338  return IteratorDiffers;
339}
340
341#ifdef DEBUG_GET_CHANGED_RANGES
342static inline void iterator_print_state(Iterator *self) {
343  TreeCursorEntry entry = *array_back(&self->cursor.stack);
344  TSPoint start = iterator_start_position(self).extent;
345  TSPoint end = iterator_end_position(self).extent;
346  const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
347  printf(
348    "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
349    name, self->in_padding ? "(p)" : "   ",
350    self->visible_depth,
351    start.row + 1, start.column,
352    end.row + 1, end.column
353  );
354}
355#endif
356
357unsigned ts_subtree_get_changed_ranges(
358  const Subtree *old_tree, const Subtree *new_tree,
359  TreeCursor *cursor1, TreeCursor *cursor2,
360  const TSLanguage *language,
361  const TSRangeArray *included_range_differences,
362  TSRange **ranges
363) {
364  TSRangeArray results = array_new();
365
366  Iterator old_iter = iterator_new(cursor1, old_tree, language);
367  Iterator new_iter = iterator_new(cursor2, new_tree, language);
368
369  unsigned included_range_difference_index = 0;
370
371  Length position = iterator_start_position(&old_iter);
372  Length next_position = iterator_start_position(&new_iter);
373  if (position.bytes < next_position.bytes) {
374    ts_range_array_add(&results, position, next_position);
375    position = next_position;
376  } else if (position.bytes > next_position.bytes) {
377    ts_range_array_add(&results, next_position, position);
378    next_position = position;
379  }
380
381  do {
382    #ifdef DEBUG_GET_CHANGED_RANGES
383    printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
384    iterator_print_state(&old_iter);
385    printf("\tvs\t");
386    iterator_print_state(&new_iter);
387    puts("");
388    #endif
389
390    // Compare the old and new subtrees.
391    IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
392
393    // Even if the two subtrees appear to be identical, they could differ
394    // internally if they contain a range of text that was previously
395    // excluded from the parse, and is now included, or vice-versa.
396    if (comparison == IteratorMatches && ts_range_array_intersects(
397      included_range_differences,
398      included_range_difference_index,
399      position.bytes,
400      iterator_end_position(&old_iter).bytes
401    )) {
402      comparison = IteratorMayDiffer;
403    }
404
405    bool is_changed = false;
406    switch (comparison) {
407      // If the subtrees are definitely identical, move to the end
408      // of both subtrees.
409      case IteratorMatches:
410        next_position = iterator_end_position(&old_iter);
411        break;
412
413      // If the subtrees might differ internally, descend into both
414      // subtrees, finding the first child that spans the current position.
415      case IteratorMayDiffer:
416        if (iterator_descend(&old_iter, position.bytes)) {
417          if (!iterator_descend(&new_iter, position.bytes)) {
418            is_changed = true;
419            next_position = iterator_end_position(&old_iter);
420          }
421        } else if (iterator_descend(&new_iter, position.bytes)) {
422          is_changed = true;
423          next_position = iterator_end_position(&new_iter);
424        } else {
425          next_position = length_min(
426            iterator_end_position(&old_iter),
427            iterator_end_position(&new_iter)
428          );
429        }
430        break;
431
432      // If the subtrees are different, record a change and then move
433      // to the end of both subtrees.
434      case IteratorDiffers:
435        is_changed = true;
436        next_position = length_min(
437          iterator_end_position(&old_iter),
438          iterator_end_position(&new_iter)
439        );
440        break;
441    }
442
443    // Ensure that both iterators are caught up to the current position.
444    while (
445      !iterator_done(&old_iter) &&
446      iterator_end_position(&old_iter).bytes <= next_position.bytes
447    ) iterator_advance(&old_iter);
448    while (
449      !iterator_done(&new_iter) &&
450      iterator_end_position(&new_iter).bytes <= next_position.bytes
451    ) iterator_advance(&new_iter);
452
453    // Ensure that both iterators are at the same depth in the tree.
454    while (old_iter.visible_depth > new_iter.visible_depth) {
455      iterator_ascend(&old_iter);
456    }
457    while (new_iter.visible_depth > old_iter.visible_depth) {
458      iterator_ascend(&new_iter);
459    }
460
461    if (is_changed) {
462      #ifdef DEBUG_GET_CHANGED_RANGES
463      printf(
464        "  change: [[%u, %u] - [%u, %u]]\n",
465        position.extent.row + 1, position.extent.column,
466        next_position.extent.row + 1, next_position.extent.column
467      );
468      #endif
469
470      ts_range_array_add(&results, position, next_position);
471    }
472
473    position = next_position;
474
475    // Keep track of the current position in the included range differences
476    // array in order to avoid scanning the entire array on each iteration.
477    while (included_range_difference_index < included_range_differences->size) {
478      const TSRange *range = &included_range_differences->contents[
479        included_range_difference_index
480      ];
481      if (range->end_byte <= position.bytes) {
482        included_range_difference_index++;
483      } else {
484        break;
485      }
486    }
487  } while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
488
489  Length old_size = ts_subtree_total_size(*old_tree);
490  Length new_size = ts_subtree_total_size(*new_tree);
491  if (old_size.bytes < new_size.bytes) {
492    ts_range_array_add(&results, old_size, new_size);
493  } else if (new_size.bytes < old_size.bytes) {
494    ts_range_array_add(&results, new_size, old_size);
495  }
496
497  *cursor1 = old_iter.cursor;
498  *cursor2 = new_iter.cursor;
499  *ranges = results.contents;
500  return results.size;
501}