1#ifndef TREE_SITTER_LANGUAGE_H_
  2#define TREE_SITTER_LANGUAGE_H_
  3
  4#ifdef __cplusplus
  5extern "C" {
  6#endif
  7
  8#include "./subtree.h"
  9#include "./parser.h"
 10
 11#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
 12
 13#define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14
 14#define LANGUAGE_VERSION_USABLE_VIA_WASM 13
 15
 16typedef struct {
 17  const TSParseAction *actions;
 18  uint32_t action_count;
 19  bool is_reusable;
 20} TableEntry;
 21
 22typedef struct {
 23  const TSLanguage *language;
 24  const uint16_t *data;
 25  const uint16_t *group_end;
 26  TSStateId state;
 27  uint16_t table_value;
 28  uint16_t section_index;
 29  uint16_t group_count;
 30  bool is_small_state;
 31
 32  const TSParseAction *actions;
 33  TSSymbol symbol;
 34  TSStateId next_state;
 35  uint16_t action_count;
 36} LookaheadIterator;
 37
 38void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
 39
 40TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
 41
 42TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
 43
 44TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol);
 45
 46static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
 47  return 0 < symbol && symbol < self->external_token_count + 1;
 48}
 49
 50static inline const TSParseAction *ts_language_actions(
 51  const TSLanguage *self,
 52  TSStateId state,
 53  TSSymbol symbol,
 54  uint32_t *count
 55) {
 56  TableEntry entry;
 57  ts_language_table_entry(self, state, symbol, &entry);
 58  *count = entry.action_count;
 59  return entry.actions;
 60}
 61
 62static inline bool ts_language_has_reduce_action(
 63  const TSLanguage *self,
 64  TSStateId state,
 65  TSSymbol symbol
 66) {
 67  TableEntry entry;
 68  ts_language_table_entry(self, state, symbol, &entry);
 69  return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
 70}
 71
 72// Lookup the table value for a given symbol and state.
 73//
 74// For non-terminal symbols, the table value represents a successor state.
 75// For terminal symbols, it represents an index in the actions table.
 76// For 'large' parse states, this is a direct lookup. For 'small' parse
 77// states, this requires searching through the symbol groups to find
 78// the given symbol.
 79static inline uint16_t ts_language_lookup(
 80  const TSLanguage *self,
 81  TSStateId state,
 82  TSSymbol symbol
 83) {
 84  if (state >= self->large_state_count) {
 85    uint32_t index = self->small_parse_table_map[state - self->large_state_count];
 86    const uint16_t *data = &self->small_parse_table[index];
 87    uint16_t group_count = *(data++);
 88    for (unsigned i = 0; i < group_count; i++) {
 89      uint16_t section_value = *(data++);
 90      uint16_t symbol_count = *(data++);
 91      for (unsigned j = 0; j < symbol_count; j++) {
 92        if (*(data++) == symbol) return section_value;
 93      }
 94    }
 95    return 0;
 96  } else {
 97    return self->parse_table[state * self->symbol_count + symbol];
 98  }
 99}
100
101static inline bool ts_language_has_actions(
102  const TSLanguage *self,
103  TSStateId state,
104  TSSymbol symbol
105) {
106  return ts_language_lookup(self, state, symbol) != 0;
107}
108
109// Iterate over all of the symbols that are valid in the given state.
110//
111// For 'large' parse states, this just requires iterating through
112// all possible symbols and checking the parse table for each one.
113// For 'small' parse states, this exploits the structure of the
114// table to only visit the valid symbols.
115static inline LookaheadIterator ts_language_lookaheads(
116  const TSLanguage *self,
117  TSStateId state
118) {
119  bool is_small_state = state >= self->large_state_count;
120  const uint16_t *data;
121  const uint16_t *group_end = NULL;
122  uint16_t group_count = 0;
123  if (is_small_state) {
124    uint32_t index = self->small_parse_table_map[state - self->large_state_count];
125    data = &self->small_parse_table[index];
126    group_end = data + 1;
127    group_count = *data;
128  } else {
129    data = &self->parse_table[state * self->symbol_count] - 1;
130  }
131  return (LookaheadIterator) {
132    .language = self,
133    .data = data,
134    .group_end = group_end,
135    .group_count = group_count,
136    .is_small_state = is_small_state,
137    .symbol = UINT16_MAX,
138    .next_state = 0,
139  };
140}
141
142static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) {
143  // For small parse states, valid symbols are listed explicitly,
144  // grouped by their value. There's no need to look up the actions
145  // again until moving to the next group.
146  if (self->is_small_state) {
147    self->data++;
148    if (self->data == self->group_end) {
149      if (self->group_count == 0) return false;
150      self->group_count--;
151      self->table_value = *(self->data++);
152      unsigned symbol_count = *(self->data++);
153      self->group_end = self->data + symbol_count;
154      self->symbol = *self->data;
155    } else {
156      self->symbol = *self->data;
157      return true;
158    }
159  }
160
161  // For large parse states, iterate through every symbol until one
162  // is found that has valid actions.
163  else {
164    do {
165      self->data++;
166      self->symbol++;
167      if (self->symbol >= self->language->symbol_count) return false;
168      self->table_value = *self->data;
169    } while (!self->table_value);
170  }
171
172  // Depending on if the symbols is terminal or non-terminal, the table value either
173  // represents a list of actions or a successor state.
174  if (self->symbol < self->language->token_count) {
175    const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
176    self->action_count = entry->entry.count;
177    self->actions = (const TSParseAction *)(entry + 1);
178    self->next_state = 0;
179  } else {
180    self->action_count = 0;
181    self->next_state = self->table_value;
182  }
183  return true;
184}
185
186// Whether the state is a "primary state". If this returns false, it indicates that there exists
187// another state that behaves identically to this one with respect to query analysis.
188static inline bool ts_language_state_is_primary(
189  const TSLanguage *self,
190  TSStateId state
191) {
192  if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
193    return state == self->primary_state_ids[state];
194  } else {
195    return true;
196  }
197}
198
199static inline const bool *ts_language_enabled_external_tokens(
200  const TSLanguage *self,
201  unsigned external_scanner_state
202) {
203  if (external_scanner_state == 0) {
204    return NULL;
205  } else {
206    return self->external_scanner.states + self->external_token_count * external_scanner_state;
207  }
208}
209
210static inline const TSSymbol *ts_language_alias_sequence(
211  const TSLanguage *self,
212  uint32_t production_id
213) {
214  return production_id ?
215    &self->alias_sequences[production_id * self->max_alias_sequence_length] :
216    NULL;
217}
218
219static inline TSSymbol ts_language_alias_at(
220  const TSLanguage *self,
221  uint32_t production_id,
222  uint32_t child_index
223) {
224  return production_id ?
225    self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
226    0;
227}
228
229static inline void ts_language_field_map(
230  const TSLanguage *self,
231  uint32_t production_id,
232  const TSFieldMapEntry **start,
233  const TSFieldMapEntry **end
234) {
235  if (self->field_count == 0) {
236    *start = NULL;
237    *end = NULL;
238    return;
239  }
240
241  TSFieldMapSlice slice = self->field_map_slices[production_id];
242  *start = &self->field_map_entries[slice.index];
243  *end = &self->field_map_entries[slice.index] + slice.length;
244}
245
246static inline void ts_language_aliases_for_symbol(
247  const TSLanguage *self,
248  TSSymbol original_symbol,
249  const TSSymbol **start,
250  const TSSymbol **end
251) {
252  *start = &self->public_symbol_map[original_symbol];
253  *end = *start + 1;
254
255  unsigned idx = 0;
256  for (;;) {
257    TSSymbol symbol = self->alias_map[idx++];
258    if (symbol == 0 || symbol > original_symbol) break;
259    uint16_t count = self->alias_map[idx++];
260    if (symbol == original_symbol) {
261      *start = &self->alias_map[idx];
262      *end = &self->alias_map[idx + count];
263      break;
264    }
265    idx += count;
266  }
267}
268
269static inline void ts_language_write_symbol_as_dot_string(
270  const TSLanguage *self,
271  FILE *f,
272  TSSymbol symbol
273) {
274  const char *name = ts_language_symbol_name(self, symbol);
275  for (const char *chr = name; *chr; chr++) {
276    switch (*chr) {
277      case '"':
278      case '\\':
279        fputc('\\', f);
280        fputc(*chr, f);
281        break;
282      case '\n':
283        fputs("\\n", f);
284        break;
285      case '\t':
286        fputs("\\t", f);
287        break;
288      default:
289        fputc(*chr, f);
290        break;
291    }
292  }
293}
294
295#ifdef __cplusplus
296}
297#endif
298
299#endif  // TREE_SITTER_LANGUAGE_H_