1#ifndef TREE_SITTER_SUBTREE_H_
  2#define TREE_SITTER_SUBTREE_H_
  3
  4#ifdef __cplusplus
  5extern "C" {
  6#endif
  7
  8#include <limits.h>
  9#include <stdbool.h>
 10#include <stdio.h>
 11#include "./length.h"
 12#include "./array.h"
 13#include "./error_costs.h"
 14#include "./host.h"
 15#include "tree_sitter/api.h"
 16#include "tree_sitter/parser.h"
 17
 18#define TS_TREE_STATE_NONE USHRT_MAX
 19#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
 20
 21// The serialized state of an external scanner.
 22//
 23// Every time an external token subtree is created after a call to an
 24// external scanner, the scanner's `serialize` function is called to
 25// retrieve a serialized copy of its state. The bytes are then copied
 26// onto the subtree itself so that the scanner's state can later be
 27// restored using its `deserialize` function.
 28//
 29// Small byte arrays are stored inline, and long ones are allocated
 30// separately on the heap.
 31typedef struct {
 32  union {
 33    char *long_data;
 34    char short_data[24];
 35  };
 36  uint32_t length;
 37} ExternalScannerState;
 38
 39// A compact representation of a subtree.
 40//
 41// This representation is used for small leaf nodes that are not
 42// errors, and were not created by an external scanner.
 43//
 44// The idea behind the layout of this struct is that the `is_inline`
 45// bit will fall exactly into the same location as the least significant
 46// bit of the pointer in `Subtree` or `MutableSubtree`, respectively.
 47// Because of alignment, for any valid pointer this will be 0, giving
 48// us the opportunity to make use of this bit to signify whether to use
 49// the pointer or the inline struct.
 50typedef struct SubtreeInlineData SubtreeInlineData;
 51
 52#define SUBTREE_BITS    \
 53  bool visible : 1;     \
 54  bool named : 1;       \
 55  bool extra : 1;       \
 56  bool has_changes : 1; \
 57  bool is_missing : 1;  \
 58  bool is_keyword : 1;
 59
 60#define SUBTREE_SIZE           \
 61  uint8_t padding_columns;     \
 62  uint8_t padding_rows : 4;    \
 63  uint8_t lookahead_bytes : 4; \
 64  uint8_t padding_bytes;       \
 65  uint8_t size_bytes;
 66
 67#if TS_BIG_ENDIAN
 68#if TS_PTR_SIZE == 32
 69
 70struct SubtreeInlineData {
 71  uint16_t parse_state;
 72  uint8_t symbol;
 73  SUBTREE_BITS
 74  bool unused : 1;
 75  bool is_inline : 1;
 76  SUBTREE_SIZE
 77};
 78
 79#else
 80
 81struct SubtreeInlineData {
 82  SUBTREE_SIZE
 83  uint16_t parse_state;
 84  uint8_t symbol;
 85  SUBTREE_BITS
 86  bool unused : 1;
 87  bool is_inline : 1;
 88};
 89
 90#endif
 91#else
 92
 93struct SubtreeInlineData {
 94  bool is_inline : 1;
 95  SUBTREE_BITS
 96  uint8_t symbol;
 97  uint16_t parse_state;
 98  SUBTREE_SIZE
 99};
100
101#endif
102
103#undef SUBTREE_BITS
104#undef SUBTREE_SIZE
105
106// A heap-allocated representation of a subtree.
107//
108// This representation is used for parent nodes, external tokens,
109// errors, and other leaf nodes whose data is too large to fit into
110// the inline representation.
111typedef struct {
112  volatile uint32_t ref_count;
113  Length padding;
114  Length size;
115  uint32_t lookahead_bytes;
116  uint32_t error_cost;
117  uint32_t child_count;
118  TSSymbol symbol;
119  TSStateId parse_state;
120
121  bool visible : 1;
122  bool named : 1;
123  bool extra : 1;
124  bool fragile_left : 1;
125  bool fragile_right : 1;
126  bool has_changes : 1;
127  bool has_external_tokens : 1;
128  bool has_external_scanner_state_change : 1;
129  bool depends_on_column: 1;
130  bool is_missing : 1;
131  bool is_keyword : 1;
132
133  union {
134    // Non-terminal subtrees (`child_count > 0`)
135    struct {
136      uint32_t visible_child_count;
137      uint32_t named_child_count;
138      uint32_t visible_descendant_count;
139      int32_t dynamic_precedence;
140      uint16_t repeat_depth;
141      uint16_t production_id;
142      struct {
143        TSSymbol symbol;
144        TSStateId parse_state;
145      } first_leaf;
146    };
147
148    // External terminal subtrees (`child_count == 0 && has_external_tokens`)
149    ExternalScannerState external_scanner_state;
150
151    // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
152    int32_t lookahead_char;
153  };
154} SubtreeHeapData;
155
156// The fundamental building block of a syntax tree.
157typedef union {
158  SubtreeInlineData data;
159  const SubtreeHeapData *ptr;
160} Subtree;
161
162// Like Subtree, but mutable.
163typedef union {
164  SubtreeInlineData data;
165  SubtreeHeapData *ptr;
166} MutableSubtree;
167
168typedef Array(Subtree) SubtreeArray;
169typedef Array(MutableSubtree) MutableSubtreeArray;
170
171typedef struct {
172  MutableSubtreeArray free_trees;
173  MutableSubtreeArray tree_stack;
174} SubtreePool;
175
176void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
177const char *ts_external_scanner_state_data(const ExternalScannerState *);
178bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned);
179void ts_external_scanner_state_delete(ExternalScannerState *self);
180
181void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
182void ts_subtree_array_clear(SubtreePool *, SubtreeArray *);
183void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
184void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *);
185void ts_subtree_array_reverse(SubtreeArray *);
186
187SubtreePool ts_subtree_pool_new(uint32_t capacity);
188void ts_subtree_pool_delete(SubtreePool *);
189
190Subtree ts_subtree_new_leaf(
191  SubtreePool *, TSSymbol, Length, Length, uint32_t,
192  TSStateId, bool, bool, bool, const TSLanguage *
193);
194Subtree ts_subtree_new_error(
195  SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
196);
197MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
198Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *);
199Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *);
200MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
201void ts_subtree_retain(Subtree);
202void ts_subtree_release(SubtreePool *, Subtree);
203int ts_subtree_compare(Subtree, Subtree);
204void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
205void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *);
206void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *);
207void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
208Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
209char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
210void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *);
211Subtree ts_subtree_last_external_token(Subtree);
212const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self);
213bool ts_subtree_external_scanner_state_eq(Subtree, Subtree);
214
215#define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name)
216
217static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
218static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
219static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
220static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
221static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
222static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
223static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
224static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
225static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
226
227#undef SUBTREE_GET
228
229// Get the size needed to store a heap-allocated subtree with the given
230// number of children.
231static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
232  return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
233}
234
235// Get a subtree's children, which are allocated immediately before the
236// tree's own heap data.
237#define ts_subtree_children(self) \
238  ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
239
240static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
241  if (self->data.is_inline) {
242    self->data.extra = is_extra;
243  } else {
244    self->ptr->extra = is_extra;
245  }
246}
247
248static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
249  if (self.data.is_inline) return self.data.symbol;
250  if (self.ptr->child_count == 0) return self.ptr->symbol;
251  return self.ptr->first_leaf.symbol;
252}
253
254static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
255  if (self.data.is_inline) return self.data.parse_state;
256  if (self.ptr->child_count == 0) return self.ptr->parse_state;
257  return self.ptr->first_leaf.parse_state;
258}
259
260static inline Length ts_subtree_padding(Subtree self) {
261  if (self.data.is_inline) {
262    Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
263    return result;
264  } else {
265    return self.ptr->padding;
266  }
267}
268
269static inline Length ts_subtree_size(Subtree self) {
270  if (self.data.is_inline) {
271    Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
272    return result;
273  } else {
274    return self.ptr->size;
275  }
276}
277
278static inline Length ts_subtree_total_size(Subtree self) {
279  return length_add(ts_subtree_padding(self), ts_subtree_size(self));
280}
281
282static inline uint32_t ts_subtree_total_bytes(Subtree self) {
283  return ts_subtree_total_size(self).bytes;
284}
285
286static inline uint32_t ts_subtree_child_count(Subtree self) {
287  return self.data.is_inline ? 0 : self.ptr->child_count;
288}
289
290static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
291  return self.data.is_inline ? 0 : self.ptr->repeat_depth;
292}
293
294static inline uint32_t ts_subtree_is_repetition(Subtree self) {
295  return self.data.is_inline
296    ? 0
297    : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0;
298}
299
300static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) {
301  return (self.data.is_inline || self.ptr->child_count == 0)
302    ? 0
303    : self.ptr->visible_descendant_count;
304}
305
306static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
307  if (ts_subtree_child_count(self) > 0) {
308    return self.ptr->visible_child_count;
309  } else {
310    return 0;
311  }
312}
313
314static inline uint32_t ts_subtree_error_cost(Subtree self) {
315  if (ts_subtree_missing(self)) {
316    return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
317  } else {
318    return self.data.is_inline ? 0 : self.ptr->error_cost;
319  }
320}
321
322static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
323  return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
324}
325
326static inline uint16_t ts_subtree_production_id(Subtree self) {
327  if (ts_subtree_child_count(self) > 0) {
328    return self.ptr->production_id;
329  } else {
330    return 0;
331  }
332}
333
334static inline bool ts_subtree_fragile_left(Subtree self) {
335  return self.data.is_inline ? false : self.ptr->fragile_left;
336}
337
338static inline bool ts_subtree_fragile_right(Subtree self) {
339  return self.data.is_inline ? false : self.ptr->fragile_right;
340}
341
342static inline bool ts_subtree_has_external_tokens(Subtree self) {
343  return self.data.is_inline ? false : self.ptr->has_external_tokens;
344}
345
346static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) {
347  return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change;
348}
349
350static inline bool ts_subtree_depends_on_column(Subtree self) {
351  return self.data.is_inline ? false : self.ptr->depends_on_column;
352}
353
354static inline bool ts_subtree_is_fragile(Subtree self) {
355  return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
356}
357
358static inline bool ts_subtree_is_error(Subtree self) {
359  return ts_subtree_symbol(self) == ts_builtin_sym_error;
360}
361
362static inline bool ts_subtree_is_eof(Subtree self) {
363  return ts_subtree_symbol(self) == ts_builtin_sym_end;
364}
365
366static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
367  Subtree result;
368  result.data = self.data;
369  return result;
370}
371
372static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
373  MutableSubtree result;
374  result.data = self.data;
375  return result;
376}
377
378#ifdef __cplusplus
379}
380#endif
381
382#endif  // TREE_SITTER_SUBTREE_H_