aboutsummaryrefslogtreecommitdiff
path: root/vendor/tree-sitter
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/tree-sitter')
-rw-r--r--vendor/tree-sitter/LICENSE21
-rw-r--r--vendor/tree-sitter/Makefile78
-rw-r--r--vendor/tree-sitter/lib/include/tree_sitter/api.h1180
-rw-r--r--vendor/tree-sitter/lib/include/tree_sitter/parser.h224
-rw-r--r--vendor/tree-sitter/lib/src/alloc.c48
-rw-r--r--vendor/tree-sitter/lib/src/alloc.h37
-rw-r--r--vendor/tree-sitter/lib/src/array.h249
-rw-r--r--vendor/tree-sitter/lib/src/atomic.h67
-rw-r--r--vendor/tree-sitter/lib/src/clock.h146
-rw-r--r--vendor/tree-sitter/lib/src/error_costs.h11
-rw-r--r--vendor/tree-sitter/lib/src/get_changed_ranges.c501
-rw-r--r--vendor/tree-sitter/lib/src/get_changed_ranges.h36
-rw-r--r--vendor/tree-sitter/lib/src/host.h21
-rw-r--r--vendor/tree-sitter/lib/src/language.c208
-rw-r--r--vendor/tree-sitter/lib/src/language.h296
-rw-r--r--vendor/tree-sitter/lib/src/length.h52
-rw-r--r--vendor/tree-sitter/lib/src/lexer.c419
-rw-r--r--vendor/tree-sitter/lib/src/lexer.h49
-rw-r--r--vendor/tree-sitter/lib/src/lib.c18
-rw-r--r--vendor/tree-sitter/lib/src/node.c767
-rw-r--r--vendor/tree-sitter/lib/src/parser.c2011
-rw-r--r--vendor/tree-sitter/lib/src/point.h62
-rw-r--r--vendor/tree-sitter/lib/src/query.c4130
-rw-r--r--vendor/tree-sitter/lib/src/reduce_action.h34
-rw-r--r--vendor/tree-sitter/lib/src/reusable_node.h95
-rw-r--r--vendor/tree-sitter/lib/src/stack.c897
-rw-r--r--vendor/tree-sitter/lib/src/stack.h133
-rw-r--r--vendor/tree-sitter/lib/src/subtree.c1039
-rw-r--r--vendor/tree-sitter/lib/src/subtree.h382
-rw-r--r--vendor/tree-sitter/lib/src/tree.c143
-rw-r--r--vendor/tree-sitter/lib/src/tree.h31
-rw-r--r--vendor/tree-sitter/lib/src/tree_cursor.c712
-rw-r--r--vendor/tree-sitter/lib/src/tree_cursor.h47
-rw-r--r--vendor/tree-sitter/lib/src/unicode.h50
-rw-r--r--vendor/tree-sitter/lib/src/unicode/ICU_SHA1
-rw-r--r--vendor/tree-sitter/lib/src/unicode/LICENSE414
-rw-r--r--vendor/tree-sitter/lib/src/unicode/README.md29
-rw-r--r--vendor/tree-sitter/lib/src/unicode/ptypes.h1
-rw-r--r--vendor/tree-sitter/lib/src/unicode/umachine.h448
-rw-r--r--vendor/tree-sitter/lib/src/unicode/urename.h1
-rw-r--r--vendor/tree-sitter/lib/src/unicode/utf.h1
-rw-r--r--vendor/tree-sitter/lib/src/unicode/utf16.h733
-rw-r--r--vendor/tree-sitter/lib/src/unicode/utf8.h881
43 files changed, 16703 insertions, 0 deletions
diff --git a/vendor/tree-sitter/LICENSE b/vendor/tree-sitter/LICENSE
new file mode 100644
index 0000000..3f67411
--- /dev/null
+++ b/vendor/tree-sitter/LICENSE
@@ -0,0 +1,21 @@
1The MIT License (MIT)
2
3Copyright (c) 2018-2023 Max Brunsfeld
4
5Permission is hereby granted, free of charge, to any person obtaining a copy
6of this software and associated documentation files (the "Software"), to deal
7in the Software without restriction, including without limitation the rights
8to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9copies of the Software, and to permit persons to whom the Software is
10furnished to do so, subject to the following conditions:
11
12The above copyright notice and this permission notice shall be included in all
13copies or substantial portions of the Software.
14
15THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21SOFTWARE.
diff --git a/vendor/tree-sitter/Makefile b/vendor/tree-sitter/Makefile
new file mode 100644
index 0000000..eb4075f
--- /dev/null
+++ b/vendor/tree-sitter/Makefile
@@ -0,0 +1,78 @@
1VERSION := 0.20.10
2
3# install directory layout
4PREFIX ?= /usr/local
5INCLUDEDIR ?= $(PREFIX)/include
6LIBDIR ?= $(PREFIX)/lib
7PCLIBDIR ?= $(LIBDIR)/pkgconfig
8
9# collect sources
10ifneq ($(AMALGAMATED),1)
11 SRC := $(wildcard lib/src/*.c)
12 # do not double-include amalgamation
13 SRC := $(filter-out lib/src/lib.c,$(SRC))
14else
15 # use amalgamated build
16 SRC := lib/src/lib.c
17endif
18OBJ := $(SRC:.c=.o)
19
20# define default flags, and override to append mandatory flags
21override CFLAGS := -O3 -std=gnu99 -fPIC -fvisibility=hidden -Wall -Wextra -Wshadow $(CFLAGS)
22override CFLAGS += -Ilib/src -Ilib/include
23
24# ABI versioning
25SONAME_MAJOR := 0
26SONAME_MINOR := 0
27
28# OS-specific bits
29ifeq ($(shell uname),Darwin)
30 SOEXT = dylib
31 SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
32 SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
33 LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SONAME_MAJOR).dylib
34else
35 SOEXT = so
36 SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
37 SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
38 LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR)
39endif
40ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
41 PCLIBDIR := $(PREFIX)/libdata/pkgconfig
42endif
43
44all: libtree-sitter.a libtree-sitter.$(SOEXTVER)
45
46libtree-sitter.a: $(OBJ)
47 $(AR) rcs $@ $^
48
49libtree-sitter.$(SOEXTVER): $(OBJ)
50 $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
51 ln -sf $@ libtree-sitter.$(SOEXT)
52 ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR)
53ifneq ($(STRIP),)
54 $(STRIP) $@
55endif
56
57install: all
58 sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \
59 -e 's|=$(PREFIX)|=$${prefix}|' \
60 -e 's|@PREFIX@|$(PREFIX)|' \
61 tree-sitter.pc.in > tree-sitter.pc
62
63 install -d '$(DESTDIR)$(LIBDIR)'
64 install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/
65 install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/
66 ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR)
67 ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT)
68
69 install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
70 install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/
71
72 install -d '$(DESTDIR)$(PCLIBDIR)'
73 install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/
74
75clean:
76 rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER)
77
78.PHONY: all install clean
diff --git a/vendor/tree-sitter/lib/include/tree_sitter/api.h b/vendor/tree-sitter/lib/include/tree_sitter/api.h
new file mode 100644
index 0000000..56093d9
--- /dev/null
+++ b/vendor/tree-sitter/lib/include/tree_sitter/api.h
@@ -0,0 +1,1180 @@
1#ifndef TREE_SITTER_API_H_
2#define TREE_SITTER_API_H_
3
4#if defined(__GNUC__) || defined(__clang__)
5#pragma GCC visibility push(default)
6#endif
7
8#ifdef __cplusplus
9extern "C" {
10#endif
11
12#include <stdlib.h>
13#include <stdint.h>
14#include <stdbool.h>
15
16/****************************/
17/* Section - ABI Versioning */
18/****************************/
19
20/**
21 * The latest ABI version that is supported by the current version of the
22 * library. When Languages are generated by the Tree-sitter CLI, they are
23 * assigned an ABI version number that corresponds to the current CLI version.
24 * The Tree-sitter library is generally backwards-compatible with languages
25 * generated using older CLI versions, but is not forwards-compatible.
26 */
27#define TREE_SITTER_LANGUAGE_VERSION 14
28
29/**
30 * The earliest ABI version that is supported by the current version of the
31 * library.
32 */
33#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13
34
35/*******************/
36/* Section - Types */
37/*******************/
38
39typedef uint16_t TSStateId;
40typedef uint16_t TSSymbol;
41typedef uint16_t TSFieldId;
42typedef struct TSLanguage TSLanguage;
43typedef struct TSParser TSParser;
44typedef struct TSTree TSTree;
45typedef struct TSQuery TSQuery;
46typedef struct TSQueryCursor TSQueryCursor;
47typedef struct TSLookaheadIterator TSLookaheadIterator;
48
49typedef enum {
50 TSInputEncodingUTF8,
51 TSInputEncodingUTF16,
52} TSInputEncoding;
53
54typedef enum {
55 TSSymbolTypeRegular,
56 TSSymbolTypeAnonymous,
57 TSSymbolTypeAuxiliary,
58} TSSymbolType;
59
60typedef struct {
61 uint32_t row;
62 uint32_t column;
63} TSPoint;
64
65typedef struct {
66 TSPoint start_point;
67 TSPoint end_point;
68 uint32_t start_byte;
69 uint32_t end_byte;
70} TSRange;
71
72typedef struct {
73 void *payload;
74 const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read);
75 TSInputEncoding encoding;
76} TSInput;
77
78typedef enum {
79 TSLogTypeParse,
80 TSLogTypeLex,
81} TSLogType;
82
83typedef struct {
84 void *payload;
85 void (*log)(void *payload, TSLogType log_type, const char *buffer);
86} TSLogger;
87
88typedef struct {
89 uint32_t start_byte;
90 uint32_t old_end_byte;
91 uint32_t new_end_byte;
92 TSPoint start_point;
93 TSPoint old_end_point;
94 TSPoint new_end_point;
95} TSInputEdit;
96
97typedef struct {
98 uint32_t context[4];
99 const void *id;
100 const TSTree *tree;
101} TSNode;
102
103typedef struct {
104 const void *tree;
105 const void *id;
106 uint32_t context[2];
107} TSTreeCursor;
108
109typedef struct {
110 TSNode node;
111 uint32_t index;
112} TSQueryCapture;
113
114typedef enum {
115 TSQuantifierZero = 0, // must match the array initialization value
116 TSQuantifierZeroOrOne,
117 TSQuantifierZeroOrMore,
118 TSQuantifierOne,
119 TSQuantifierOneOrMore,
120} TSQuantifier;
121
122typedef struct {
123 uint32_t id;
124 uint16_t pattern_index;
125 uint16_t capture_count;
126 const TSQueryCapture *captures;
127} TSQueryMatch;
128
129typedef enum {
130 TSQueryPredicateStepTypeDone,
131 TSQueryPredicateStepTypeCapture,
132 TSQueryPredicateStepTypeString,
133} TSQueryPredicateStepType;
134
135typedef struct {
136 TSQueryPredicateStepType type;
137 uint32_t value_id;
138} TSQueryPredicateStep;
139
140typedef enum {
141 TSQueryErrorNone = 0,
142 TSQueryErrorSyntax,
143 TSQueryErrorNodeType,
144 TSQueryErrorField,
145 TSQueryErrorCapture,
146 TSQueryErrorStructure,
147 TSQueryErrorLanguage,
148} TSQueryError;
149
150/********************/
151/* Section - Parser */
152/********************/
153
154/**
155 * Create a new parser.
156 */
157TSParser *ts_parser_new(void);
158
159/**
160 * Delete the parser, freeing all of the memory that it used.
161 */
162void ts_parser_delete(TSParser *self);
163
164/**
165 * Get the parser's current language.
166 */
167const TSLanguage *ts_parser_language(const TSParser *self);
168
169/**
170 * Set the language that the parser should use for parsing.
171 *
172 * Returns a boolean indicating whether or not the language was successfully
173 * assigned. True means assignment succeeded. False means there was a version
174 * mismatch: the language was generated with an incompatible version of the
175 * Tree-sitter CLI. Check the language's version using [`ts_language_version`]
176 * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and
177 * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
178 */
179bool ts_parser_set_language(TSParser *self, const TSLanguage *language);
180
181/**
182 * Set the ranges of text that the parser should include when parsing.
183 *
184 * By default, the parser will always include entire documents. This function
185 * allows you to parse only a *portion* of a document but still return a syntax
186 * tree whose ranges match up with the document as a whole. You can also pass
187 * multiple disjoint ranges.
188 *
189 * The second and third parameters specify the location and length of an array
190 * of ranges. The parser does *not* take ownership of these ranges; it copies
191 * the data, so it doesn't matter how these ranges are allocated.
192 *
193 * If `count` is zero, then the entire document will be parsed. Otherwise,
194 * the given ranges must be ordered from earliest to latest in the document,
195 * and they must not overlap. That is, the following must hold for all:
196 *
197 * `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte`
198 *
199 * If this requirement is not satisfied, the operation will fail, the ranges
200 * will not be assigned, and this function will return `false`. On success,
201 * this function returns `true`
202 */
203bool ts_parser_set_included_ranges(
204 TSParser *self,
205 const TSRange *ranges,
206 uint32_t count
207);
208
209/**
210 * Get the ranges of text that the parser will include when parsing.
211 *
212 * The returned pointer is owned by the parser. The caller should not free it
213 * or write to it. The length of the array will be written to the given
214 * `count` pointer.
215 */
216const TSRange *ts_parser_included_ranges(
217 const TSParser *self,
218 uint32_t *count
219);
220
221/**
222 * Use the parser to parse some source code and create a syntax tree.
223 *
224 * If you are parsing this document for the first time, pass `NULL` for the
225 * `old_tree` parameter. Otherwise, if you have already parsed an earlier
226 * version of this document and the document has since been edited, pass the
227 * previous syntax tree so that the unchanged parts of it can be reused.
228 * This will save time and memory. For this to work correctly, you must have
229 * already edited the old syntax tree using the [`ts_tree_edit`] function in a
230 * way that exactly matches the source code changes.
231 *
232 * The [`TSInput`] parameter lets you specify how to read the text. It has the
233 * following three fields:
234 * 1. [`read`]: A function to retrieve a chunk of text at a given byte offset
235 * and (row, column) position. The function should return a pointer to the
236 * text and write its length to the [`bytes_read`] pointer. The parser does
237 * not take ownership of this buffer; it just borrows it until it has
238 * finished reading it. The function should write a zero value to the
239 * [`bytes_read`] pointer to indicate the end of the document.
240 * 2. [`payload`]: An arbitrary pointer that will be passed to each invocation
241 * of the [`read`] function.
242 * 3. [`encoding`]: An indication of how the text is encoded. Either
243 * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.
244 *
245 * This function returns a syntax tree on success, and `NULL` on failure. There
246 * are three possible reasons for failure:
247 * 1. The parser does not have a language assigned. Check for this using the
248 [`ts_parser_language`] function.
249 * 2. Parsing was cancelled due to a timeout that was set by an earlier call to
250 * the [`ts_parser_set_timeout_micros`] function. You can resume parsing from
251 * where the parser left out by calling [`ts_parser_parse`] again with the
252 * same arguments. Or you can start parsing from scratch by first calling
253 * [`ts_parser_reset`].
254 * 3. Parsing was cancelled using a cancellation flag that was set by an
255 * earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing
256 * from where the parser left out by calling [`ts_parser_parse`] again with
257 * the same arguments.
258 *
259 * [`read`]: TSInput::read
260 * [`payload`]: TSInput::payload
261 * [`encoding`]: TSInput::encoding
262 * [`bytes_read`]: TSInput::read
263 */
264TSTree *ts_parser_parse(
265 TSParser *self,
266 const TSTree *old_tree,
267 TSInput input
268);
269
270/**
271 * Use the parser to parse some source code stored in one contiguous buffer.
272 * The first two parameters are the same as in the [`ts_parser_parse`] function
273 * above. The second two parameters indicate the location of the buffer and its
274 * length in bytes.
275 */
276TSTree *ts_parser_parse_string(
277 TSParser *self,
278 const TSTree *old_tree,
279 const char *string,
280 uint32_t length
281);
282
283/**
284 * Use the parser to parse some source code stored in one contiguous buffer with
285 * a given encoding. The first four parameters work the same as in the
286 * [`ts_parser_parse_string`] method above. The final parameter indicates whether
287 * the text is encoded as UTF8 or UTF16.
288 */
289TSTree *ts_parser_parse_string_encoding(
290 TSParser *self,
291 const TSTree *old_tree,
292 const char *string,
293 uint32_t length,
294 TSInputEncoding encoding
295);
296
297/**
298 * Instruct the parser to start the next parse from the beginning.
299 *
300 * If the parser previously failed because of a timeout or a cancellation, then
301 * by default, it will resume where it left off on the next call to
302 * [`ts_parser_parse`] or other parsing functions. If you don't want to resume,
303 * and instead intend to use this parser to parse some other document, you must
304 * call [`ts_parser_reset`] first.
305 */
306void ts_parser_reset(TSParser *self);
307
308/**
309 * Set the maximum duration in microseconds that parsing should be allowed to
310 * take before halting.
311 *
312 * If parsing takes longer than this, it will halt early, returning NULL.
313 * See [`ts_parser_parse`] for more information.
314 */
315void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros);
316
317/**
318 * Get the duration in microseconds that parsing is allowed to take.
319 */
320uint64_t ts_parser_timeout_micros(const TSParser *self);
321
322/**
323 * Set the parser's current cancellation flag pointer.
324 *
325 * If a non-null pointer is assigned, then the parser will periodically read
326 * from this pointer during parsing. If it reads a non-zero value, it will
327 * halt early, returning NULL. See [`ts_parser_parse`] for more information.
328 */
329void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag);
330
331/**
332 * Get the parser's current cancellation flag pointer.
333 */
334const size_t *ts_parser_cancellation_flag(const TSParser *self);
335
336/**
337 * Set the logger that a parser should use during parsing.
338 *
339 * The parser does not take ownership over the logger payload. If a logger was
340 * previously assigned, the caller is responsible for releasing any memory
341 * owned by the previous logger.
342 */
343void ts_parser_set_logger(TSParser *self, TSLogger logger);
344
345/**
346 * Get the parser's current logger.
347 */
348TSLogger ts_parser_logger(const TSParser *self);
349
350/**
351 * Set the file descriptor to which the parser should write debugging graphs
352 * during parsing. The graphs are formatted in the DOT language. You may want
353 * to pipe these graphs directly to a `dot(1)` process in order to generate
354 * SVG output. You can turn off this logging by passing a negative number.
355 */
356void ts_parser_print_dot_graphs(TSParser *self, int fd);
357
358/******************/
359/* Section - Tree */
360/******************/
361
362/**
363 * Create a shallow copy of the syntax tree. This is very fast.
364 *
365 * You need to copy a syntax tree in order to use it on more than one thread at
366 * a time, as syntax trees are not thread safe.
367 */
368TSTree *ts_tree_copy(const TSTree *self);
369
370/**
371 * Delete the syntax tree, freeing all of the memory that it used.
372 */
373void ts_tree_delete(TSTree *self);
374
375/**
376 * Get the root node of the syntax tree.
377 */
378TSNode ts_tree_root_node(const TSTree *self);
379
380/**
381 * Get the root node of the syntax tree, but with its position
382 * shifted forward by the given offset.
383 */
384TSNode ts_tree_root_node_with_offset(
385 const TSTree *self,
386 uint32_t offset_bytes,
387 TSPoint offset_extent
388);
389
390/**
391 * Get the language that was used to parse the syntax tree.
392 */
393const TSLanguage *ts_tree_language(const TSTree *self);
394
395/**
396 * Get the array of included ranges that was used to parse the syntax tree.
397 *
398 * The returned pointer must be freed by the caller.
399 */
400TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length);
401
402/**
403 * Edit the syntax tree to keep it in sync with source code that has been
404 * edited.
405 *
406 * You must describe the edit both in terms of byte offsets and in terms of
407 * (row, column) coordinates.
408 */
409void ts_tree_edit(TSTree *self, const TSInputEdit *edit);
410
411/**
412 * Compare an old edited syntax tree to a new syntax tree representing the same
413 * document, returning an array of ranges whose syntactic structure has changed.
414 *
415 * For this to work correctly, the old syntax tree must have been edited such
416 * that its ranges match up to the new tree. Generally, you'll want to call
417 * this function right after calling one of the [`ts_parser_parse`] functions.
418 * You need to pass the old tree that was passed to parse, as well as the new
419 * tree that was returned from that function.
420 *
421 * The returned array is allocated using `malloc` and the caller is responsible
422 * for freeing it using `free`. The length of the array will be written to the
423 * given `length` pointer.
424 */
425TSRange *ts_tree_get_changed_ranges(
426 const TSTree *old_tree,
427 const TSTree *new_tree,
428 uint32_t *length
429);
430
431/**
432 * Write a DOT graph describing the syntax tree to the given file.
433 */
434void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor);
435
436/******************/
437/* Section - Node */
438/******************/
439
440/**
441 * Get the node's type as a null-terminated string.
442 */
443const char *ts_node_type(TSNode self);
444
445/**
446 * Get the node's type as a numerical id.
447 */
448TSSymbol ts_node_symbol(TSNode self);
449
450/**
451 * Get the node's language.
452 */
453const TSLanguage *ts_node_language(TSNode self);
454
455/**
456 * Get the node's type as it appears in the grammar ignoring aliases as a
457 * null-terminated string.
458 */
459const char *ts_node_grammar_type(TSNode self);
460
461/**
462 * Get the node's type as a numerical id as it appears in the grammar ignoring
463 * aliases. This should be used in [`ts_language_next_state`] instead of
464 * [`ts_node_symbol`].
465 */
466TSSymbol ts_node_grammar_symbol(TSNode self);
467
468/**
469 * Get the node's start byte.
470 */
471uint32_t ts_node_start_byte(TSNode self);
472
473/**
474 * Get the node's start position in terms of rows and columns.
475 */
476TSPoint ts_node_start_point(TSNode self);
477
478/**
479 * Get the node's end byte.
480 */
481uint32_t ts_node_end_byte(TSNode self);
482
483/**
484 * Get the node's end position in terms of rows and columns.
485 */
486TSPoint ts_node_end_point(TSNode self);
487
488/**
489 * Get an S-expression representing the node as a string.
490 *
491 * This string is allocated with `malloc` and the caller is responsible for
492 * freeing it using `free`.
493 */
494char *ts_node_string(TSNode self);
495
496/**
497 * Check if the node is null. Functions like [`ts_node_child`] and
498 * [`ts_node_next_sibling`] will return a null node to indicate that no such node
499 * was found.
500 */
501bool ts_node_is_null(TSNode self);
502
503/**
504 * Check if the node is *named*. Named nodes correspond to named rules in the
505 * grammar, whereas *anonymous* nodes correspond to string literals in the
506 * grammar.
507 */
508bool ts_node_is_named(TSNode self);
509
510/**
511 * Check if the node is *missing*. Missing nodes are inserted by the parser in
512 * order to recover from certain kinds of syntax errors.
513 */
514bool ts_node_is_missing(TSNode self);
515
516/**
517 * Check if the node is *extra*. Extra nodes represent things like comments,
518 * which are not required the grammar, but can appear anywhere.
519 */
520bool ts_node_is_extra(TSNode self);
521
522/**
523 * Check if a syntax node has been edited.
524 */
525bool ts_node_has_changes(TSNode self);
526
527/**
528 * Check if the node is a syntax error or contains any syntax errors.
529 */
530bool ts_node_has_error(TSNode self);
531
532/**
533 * Check if the node is a syntax error.
534*/
535bool ts_node_is_error(TSNode self);
536
537/**
538 * Get this node's parse state.
539*/
540TSStateId ts_node_parse_state(TSNode self);
541
542/**
543 * Get the parse state after this node.
544*/
545TSStateId ts_node_next_parse_state(TSNode self);
546
547/**
548 * Get the node's immediate parent.
549 */
550TSNode ts_node_parent(TSNode self);
551
552/**
553 * Get the node's child at the given index, where zero represents the first
554 * child.
555 */
556TSNode ts_node_child(TSNode self, uint32_t child_index);
557
558/**
559 * Get the field name for node's child at the given index, where zero represents
560 * the first child. Returns NULL, if no field is found.
561 */
562const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index);
563
564/**
565 * Get the node's number of children.
566 */
567uint32_t ts_node_child_count(TSNode self);
568
569/**
570 * Get the node's *named* child at the given index.
571 *
572 * See also [`ts_node_is_named`].
573 */
574TSNode ts_node_named_child(TSNode self, uint32_t child_index);
575
576/**
577 * Get the node's number of *named* children.
578 *
579 * See also [`ts_node_is_named`].
580 */
581uint32_t ts_node_named_child_count(TSNode self);
582
583/**
584 * Get the node's child with the given field name.
585 */
586TSNode ts_node_child_by_field_name(
587 TSNode self,
588 const char *name,
589 uint32_t name_length
590);
591
592/**
593 * Get the node's child with the given numerical field id.
594 *
595 * You can convert a field name to an id using the
596 * [`ts_language_field_id_for_name`] function.
597 */
598TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id);
599
600/**
601 * Get the node's next / previous sibling.
602 */
603TSNode ts_node_next_sibling(TSNode self);
604TSNode ts_node_prev_sibling(TSNode self);
605
606/**
607 * Get the node's next / previous *named* sibling.
608 */
609TSNode ts_node_next_named_sibling(TSNode self);
610TSNode ts_node_prev_named_sibling(TSNode self);
611
612/**
613 * Get the node's first child that extends beyond the given byte offset.
614 */
615TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte);
616
617/**
618 * Get the node's first named child that extends beyond the given byte offset.
619 */
620TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte);
621
622/**
623 * Get the node's number of descendants, including one for the node itself.
624 */
625uint32_t ts_node_descendant_count(TSNode self);
626
627/**
628 * Get the smallest node within this node that spans the given range of bytes
629 * or (row, column) positions.
630 */
631TSNode ts_node_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end);
632TSNode ts_node_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end);
633
634/**
635 * Get the smallest named node within this node that spans the given range of
636 * bytes or (row, column) positions.
637 */
638TSNode ts_node_named_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end);
639TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end);
640
641/**
642 * Edit the node to keep it in-sync with source code that has been edited.
643 *
644 * This function is only rarely needed. When you edit a syntax tree with the
645 * [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree
646 * afterward will already reflect the edit. You only need to use [`ts_node_edit`]
647 * when you have a [`TSNode`] instance that you want to keep and continue to use
648 * after an edit.
649 */
650void ts_node_edit(TSNode *self, const TSInputEdit *edit);
651
652/**
653 * Check if two nodes are identical.
654 */
655bool ts_node_eq(TSNode self, TSNode other);
656
657/************************/
658/* Section - TreeCursor */
659/************************/
660
661/**
662 * Create a new tree cursor starting from the given node.
663 *
664 * A tree cursor allows you to walk a syntax tree more efficiently than is
665 * possible using the [`TSNode`] functions. It is a mutable object that is always
666 * on a certain syntax node, and can be moved imperatively to different nodes.
667 */
668TSTreeCursor ts_tree_cursor_new(TSNode node);
669
670/**
671 * Delete a tree cursor, freeing all of the memory that it used.
672 */
673void ts_tree_cursor_delete(TSTreeCursor *self);
674
675/**
676 * Re-initialize a tree cursor to start at a different node.
677 */
678void ts_tree_cursor_reset(TSTreeCursor *self, TSNode node);
679
680/**
681 * Re-initialize a tree cursor to the same position as another cursor.
682 *
683 * Unlike [`ts_tree_cursor_reset`], this will not lose parent information and
684 * allows reusing already created cursors.
685*/
686void ts_tree_cursor_reset_to(TSTreeCursor *dst, const TSTreeCursor *src);
687
688/**
689 * Get the tree cursor's current node.
690 */
691TSNode ts_tree_cursor_current_node(const TSTreeCursor *self);
692
693/**
694 * Get the field name of the tree cursor's current node.
695 *
696 * This returns `NULL` if the current node doesn't have a field.
697 * See also [`ts_node_child_by_field_name`].
698 */
699const char *ts_tree_cursor_current_field_name(const TSTreeCursor *self);
700
701/**
702 * Get the field id of the tree cursor's current node.
703 *
704 * This returns zero if the current node doesn't have a field.
705 * See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`].
706 */
707TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self);
708
709/**
710 * Move the cursor to the parent of its current node.
711 *
712 * This returns `true` if the cursor successfully moved, and returns `false`
713 * if there was no parent node (the cursor was already on the root node).
714 */
715bool ts_tree_cursor_goto_parent(TSTreeCursor *self);
716
717/**
718 * Move the cursor to the next sibling of its current node.
719 *
720 * This returns `true` if the cursor successfully moved, and returns `false`
721 * if there was no next sibling node.
722 */
723bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self);
724
725/**
726 * Move the cursor to the previous sibling of its current node.
727 *
728 * This returns `true` if the cursor successfully moved, and returns `false` if
729 * there was no previous sibling node.
730 *
731 * Note, that this function may be slower than
732 * [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In
733 * the worst case, this will need to iterate through all the children upto the
734 * previous sibling node to recalculate its position.
735 */
736bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self);
737
738/**
739 * Move the cursor to the first child of its current node.
740 *
741 * This returns `true` if the cursor successfully moved, and returns `false`
742 * if there were no children.
743 */
744bool ts_tree_cursor_goto_first_child(TSTreeCursor *self);
745
746/**
747 * Move the cursor to the last child of its current node.
748 *
749 * This returns `true` if the cursor successfully moved, and returns `false` if
750 * there were no children.
751 *
752 * Note that this function may be slower than [`ts_tree_cursor_goto_first_child`]
753 * because it needs to iterate through all the children to compute the child's
754 * position.
755 */
756bool ts_tree_cursor_goto_last_child(TSTreeCursor *self);
757
758/**
759 * Move the cursor to the node that is the nth descendant of
760 * the original node that the cursor was constructed with, where
761 * zero represents the original node itself.
762 */
763void ts_tree_cursor_goto_descendant(TSTreeCursor *self, uint32_t goal_descendant_index);
764
765/**
766 * Get the index of the cursor's current node out of all of the
767 * descendants of the original node that the cursor was constructed with.
768 */
769uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *self);
770
771/**
772 * Get the depth of the cursor's current node relative to the original
773 * node that the cursor was constructed with.
774 */
775uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *self);
776
777/**
778 * Move the cursor to the first child of its current node that extends beyond
779 * the given byte offset or point.
780 *
781 * This returns the index of the child node if one was found, and returns -1
782 * if no such child was found.
783 */
784int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte);
785int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point);
786
787TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *cursor);
788
789/*******************/
790/* Section - Query */
791/*******************/
792
793/**
794 * Create a new query from a string containing one or more S-expression
795 * patterns. The query is associated with a particular language, and can
796 * only be run on syntax nodes parsed with that language.
797 *
798 * If all of the given patterns are valid, this returns a [`TSQuery`].
799 * If a pattern is invalid, this returns `NULL`, and provides two pieces
800 * of information about the problem:
801 * 1. The byte offset of the error is written to the `error_offset` parameter.
802 * 2. The type of error is written to the `error_type` parameter.
803 */
804TSQuery *ts_query_new(
805 const TSLanguage *language,
806 const char *source,
807 uint32_t source_len,
808 uint32_t *error_offset,
809 TSQueryError *error_type
810);
811
812/**
813 * Delete a query, freeing all of the memory that it used.
814 */
815void ts_query_delete(TSQuery *self);
816
817/**
818 * Get the number of patterns, captures, or string literals in the query.
819 */
820uint32_t ts_query_pattern_count(const TSQuery *self);
821uint32_t ts_query_capture_count(const TSQuery *self);
822uint32_t ts_query_string_count(const TSQuery *self);
823
824/**
825 * Get the byte offset where the given pattern starts in the query's source.
826 *
827 * This can be useful when combining queries by concatenating their source
828 * code strings.
829 */
830uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_index);
831
832/**
833 * Get all of the predicates for the given pattern in the query.
834 *
835 * The predicates are represented as a single array of steps. There are three
836 * types of steps in this array, which correspond to the three legal values for
837 * the `type` field:
838 * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names
839 * of captures. Their `value_id` can be used with the
840 * [`ts_query_capture_name_for_id`] function to obtain the name of the capture.
841 * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal
842 * strings. Their `value_id` can be used with the
843 * [`ts_query_string_value_for_id`] function to obtain their string value.
844 * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*
845 * that represent the end of an individual predicate. If a pattern has two
846 * predicates, then there will be two steps with this `type` in the array.
847 */
848const TSQueryPredicateStep *ts_query_predicates_for_pattern(
849 const TSQuery *self,
850 uint32_t pattern_index,
851 uint32_t *step_count
852);
853
854/*
855 * Check if the given pattern in the query has a single root node.
856 */
857bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index);
858
859/*
860 * Check if the given pattern in the query is 'non local'.
861 *
862 * A non-local pattern has multiple root nodes and can match within a
863 * repeating sequence of nodes, as specified by the grammar. Non-local
864 * patterns disable certain optimizations that would otherwise be possible
865 * when executing a query on a specific range of a syntax tree.
866 */
867bool ts_query_is_pattern_non_local(const TSQuery *self, uint32_t pattern_index);
868
869/*
870 * Check if a given pattern is guaranteed to match once a given step is reached.
871 * The step is specified by its byte offset in the query's source code.
872 */
873bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_offset);
874
875/**
876 * Get the name and length of one of the query's captures, or one of the
877 * query's string literals. Each capture and string is associated with a
878 * numeric id based on the order that it appeared in the query's source.
879 */
880const char *ts_query_capture_name_for_id(
881 const TSQuery *self,
882 uint32_t index,
883 uint32_t *length
884);
885
886/**
887 * Get the quantifier of the query's captures. Each capture is * associated
888 * with a numeric id based on the order that it appeared in the query's source.
889 */
890TSQuantifier ts_query_capture_quantifier_for_id(
891 const TSQuery *self,
892 uint32_t pattern_index,
893 uint32_t capture_index
894);
895
896const char *ts_query_string_value_for_id(
897 const TSQuery *self,
898 uint32_t index,
899 uint32_t *length
900);
901
902/**
903 * Disable a certain capture within a query.
904 *
905 * This prevents the capture from being returned in matches, and also avoids
906 * any resource usage associated with recording the capture. Currently, there
907 * is no way to undo this.
908 */
909void ts_query_disable_capture(TSQuery *self, const char *name, uint32_t length);
910
911/**
912 * Disable a certain pattern within a query.
913 *
914 * This prevents the pattern from matching and removes most of the overhead
915 * associated with the pattern. Currently, there is no way to undo this.
916 */
917void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index);
918
919/**
920 * Create a new cursor for executing a given query.
921 *
922 * The cursor stores the state that is needed to iteratively search
923 * for matches. To use the query cursor, first call [`ts_query_cursor_exec`]
924 * to start running a given query on a given syntax node. Then, there are
925 * two options for consuming the results of the query:
926 * 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the
927 * *matches* in the order that they were found. Each match contains the
928 * index of the pattern that matched, and an array of captures. Because
929 * multiple patterns can match the same set of nodes, one match may contain
930 * captures that appear *before* some of the captures from a previous match.
931 * 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the
932 * individual *captures* in the order that they appear. This is useful if
933 * don't care about which pattern matched, and just want a single ordered
934 * sequence of captures.
935 *
936 * If you don't care about consuming all of the results, you can stop calling
937 * [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point.
938 * You can then start executing another query on another node by calling
939 * [`ts_query_cursor_exec`] again.
940 */
941TSQueryCursor *ts_query_cursor_new(void);
942
943/**
944 * Delete a query cursor, freeing all of the memory that it used.
945 */
946void ts_query_cursor_delete(TSQueryCursor *self);
947
948/**
949 * Start running a given query on a given node.
950 */
951void ts_query_cursor_exec(TSQueryCursor *self, const TSQuery *query, TSNode node);
952
953/**
954 * Manage the maximum number of in-progress matches allowed by this query
955 * cursor.
956 *
957 * Query cursors have an optional maximum capacity for storing lists of
958 * in-progress captures. If this capacity is exceeded, then the
959 * earliest-starting match will silently be dropped to make room for further
960 * matches. This maximum capacity is optional — by default, query cursors allow
961 * any number of pending matches, dynamically allocating new space for them as
962 * needed as the query is executed.
963 */
964bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self);
965uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self);
966void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit);
967
968/**
969 * Set the range of bytes or (row, column) positions in which the query
970 * will be executed.
971 */
972void ts_query_cursor_set_byte_range(TSQueryCursor *self, uint32_t start_byte, uint32_t end_byte);
973void ts_query_cursor_set_point_range(TSQueryCursor *self, TSPoint start_point, TSPoint end_point);
974
975/**
976 * Advance to the next match of the currently running query.
977 *
978 * If there is a match, write it to `*match` and return `true`.
979 * Otherwise, return `false`.
980 */
981bool ts_query_cursor_next_match(TSQueryCursor *self, TSQueryMatch *match);
982void ts_query_cursor_remove_match(TSQueryCursor *self, uint32_t match_id);
983
984/**
985 * Advance to the next capture of the currently running query.
986 *
987 * If there is a capture, write its match to `*match` and its index within
988 * the matche's capture list to `*capture_index`. Otherwise, return `false`.
989 */
990bool ts_query_cursor_next_capture(
991 TSQueryCursor *self,
992 TSQueryMatch *match,
993 uint32_t *capture_index
994);
995
996/**
997 * Set the maximum start depth for a query cursor.
998 *
999 * This prevents cursors from exploring children nodes at a certain depth.
1000 * Note if a pattern includes many children, then they will still be checked.
1001 *
1002 * The zero max start depth value can be used as a special behavior and
1003 * it helps to destructure a subtree by staying on a node and using captures
1004 * for interested parts. Note that the zero max start depth only limit a search
1005 * depth for a pattern's root node but other nodes that are parts of the pattern
1006 * may be searched at any depth what defined by the pattern structure.
1007 *
1008 * Set to `UINT32_MAX` to remove the maximum start depth.
1009 */
1010void ts_query_cursor_set_max_start_depth(TSQueryCursor *self, uint32_t max_start_depth);
1011
1012/**********************/
1013/* Section - Language */
1014/**********************/
1015
1016/**
1017 * Get the number of distinct node types in the language.
1018 */
1019uint32_t ts_language_symbol_count(const TSLanguage *self);
1020
1021/**
1022 * Get the number of valid states in this language.
1023*/
1024uint32_t ts_language_state_count(const TSLanguage *self);
1025
1026/**
1027 * Get a node type string for the given numerical id.
1028 */
1029const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol);
1030
1031/**
1032 * Get the numerical id for the given node type string.
1033 */
1034TSSymbol ts_language_symbol_for_name(
1035 const TSLanguage *self,
1036 const char *string,
1037 uint32_t length,
1038 bool is_named
1039);
1040
1041/**
1042 * Get the number of distinct field names in the language.
1043 */
1044uint32_t ts_language_field_count(const TSLanguage *self);
1045
1046/**
1047 * Get the field name string for the given numerical id.
1048 */
1049const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id);
1050
1051/**
1052 * Get the numerical id for the given field name string.
1053 */
1054TSFieldId ts_language_field_id_for_name(const TSLanguage *self, const char *name, uint32_t name_length);
1055
1056/**
1057 * Check whether the given node type id belongs to named nodes, anonymous nodes,
1058 * or a hidden nodes.
1059 *
1060 * See also [`ts_node_is_named`]. Hidden nodes are never returned from the API.
1061 */
1062TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol);
1063
1064/**
1065 * Get the ABI version number for this language. This version number is used
1066 * to ensure that languages were generated by a compatible version of
1067 * Tree-sitter.
1068 *
1069 * See also [`ts_parser_set_language`].
1070 */
1071uint32_t ts_language_version(const TSLanguage *self);
1072
1073/**
1074 * Get the next parse state. Combine this with lookahead iterators to generate
1075 * completion suggestions or valid symbols in error nodes. Use
1076 * [`ts_node_grammar_symbol`] for valid symbols.
1077*/
1078TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol);
1079
1080/********************************/
1081/* Section - Lookahead Iterator */
1082/********************************/
1083
1084/**
1085 * Create a new lookahead iterator for the given language and parse state.
1086 *
1087 * This returns `NULL` if state is invalid for the language.
1088 *
1089 * Repeatedly using [`ts_lookahead_iterator_next`] and
1090 * [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the
1091 * given parse state. Newly created lookahead iterators will contain the `ERROR`
1092 * symbol.
1093 *
1094 * Lookahead iterators can be useful to generate suggestions and improve syntax
1095 * error diagnostics. To get symbols valid in an ERROR node, use the lookahead
1096 * iterator on its first leaf node state. For `MISSING` nodes, a lookahead
1097 * iterator created on the previous non-extra leaf node may be appropriate.
1098*/
1099TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state);
1100
1101/**
1102 * Delete a lookahead iterator freeing all the memory used.
1103*/
1104void ts_lookahead_iterator_delete(TSLookaheadIterator *self);
1105
1106/**
1107 * Reset the lookahead iterator to another state.
1108 *
1109 * This returns `true` if the iterator was reset to the given state and `false`
1110 * otherwise.
1111*/
1112bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, TSStateId state);
1113
1114/**
1115 * Reset the lookahead iterator.
1116 *
1117 * This returns `true` if the language was set successfully and `false`
1118 * otherwise.
1119*/
1120bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state);
1121
1122/**
1123 * Get the current language of the lookahead iterator.
1124*/
1125const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self);
1126
1127/**
1128 * Advance the lookahead iterator to the next symbol.
1129 *
1130 * This returns `true` if there is a new symbol and `false` otherwise.
1131*/
1132bool ts_lookahead_iterator_next(TSLookaheadIterator *self);
1133
1134/**
1135 * Get the current symbol of the lookahead iterator;
1136*/
1137TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self);
1138
1139/**
1140 * Get the current symbol type of the lookahead iterator as a null terminated
1141 * string.
1142*/
1143const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self);
1144
1145/**********************************/
1146/* Section - Global Configuration */
1147/**********************************/
1148
1149/**
1150 * Set the allocation functions used by the library.
1151 *
1152 * By default, Tree-sitter uses the standard libc allocation functions,
1153 * but aborts the process when an allocation fails. This function lets
1154 * you supply alternative allocation functions at runtime.
1155 *
1156 * If you pass `NULL` for any parameter, Tree-sitter will switch back to
1157 * its default implementation of that function.
1158 *
1159 * If you call this function after the library has already been used, then
1160 * you must ensure that either:
1161 * 1. All the existing objects have been freed.
1162 * 2. The new allocator shares its state with the old one, so it is capable
1163 * of freeing memory that was allocated by the old allocator.
1164 */
1165void ts_set_allocator(
1166 void *(*new_malloc)(size_t),
1167 void *(*new_calloc)(size_t, size_t),
1168 void *(*new_realloc)(void *, size_t),
1169 void (*new_free)(void *)
1170);
1171
1172#ifdef __cplusplus
1173}
1174#endif
1175
1176#if defined(__GNUC__) || defined(__clang__)
1177#pragma GCC visibility pop
1178#endif
1179
1180#endif // TREE_SITTER_API_H_
diff --git a/vendor/tree-sitter/lib/include/tree_sitter/parser.h b/vendor/tree-sitter/lib/include/tree_sitter/parser.h
new file mode 100644
index 0000000..d210325
--- /dev/null
+++ b/vendor/tree-sitter/lib/include/tree_sitter/parser.h
@@ -0,0 +1,224 @@
1#ifndef TREE_SITTER_PARSER_H_
2#define TREE_SITTER_PARSER_H_
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8#include <stdbool.h>
9#include <stdint.h>
10#include <stdlib.h>
11
12#define ts_builtin_sym_error ((TSSymbol)-1)
13#define ts_builtin_sym_end 0
14#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
15
16#ifndef TREE_SITTER_API_H_
17typedef uint16_t TSStateId;
18typedef uint16_t TSSymbol;
19typedef uint16_t TSFieldId;
20typedef struct TSLanguage TSLanguage;
21#endif
22
23typedef struct {
24 TSFieldId field_id;
25 uint8_t child_index;
26 bool inherited;
27} TSFieldMapEntry;
28
29typedef struct {
30 uint16_t index;
31 uint16_t length;
32} TSFieldMapSlice;
33
34typedef struct {
35 bool visible;
36 bool named;
37 bool supertype;
38} TSSymbolMetadata;
39
40typedef struct TSLexer TSLexer;
41
42struct TSLexer {
43 int32_t lookahead;
44 TSSymbol result_symbol;
45 void (*advance)(TSLexer *, bool);
46 void (*mark_end)(TSLexer *);
47 uint32_t (*get_column)(TSLexer *);
48 bool (*is_at_included_range_start)(const TSLexer *);
49 bool (*eof)(const TSLexer *);
50};
51
52typedef enum {
53 TSParseActionTypeShift,
54 TSParseActionTypeReduce,
55 TSParseActionTypeAccept,
56 TSParseActionTypeRecover,
57} TSParseActionType;
58
59typedef union {
60 struct {
61 uint8_t type;
62 TSStateId state;
63 bool extra;
64 bool repetition;
65 } shift;
66 struct {
67 uint8_t type;
68 uint8_t child_count;
69 TSSymbol symbol;
70 int16_t dynamic_precedence;
71 uint16_t production_id;
72 } reduce;
73 uint8_t type;
74} TSParseAction;
75
76typedef struct {
77 uint16_t lex_state;
78 uint16_t external_lex_state;
79} TSLexMode;
80
81typedef union {
82 TSParseAction action;
83 struct {
84 uint8_t count;
85 bool reusable;
86 } entry;
87} TSParseActionEntry;
88
89struct TSLanguage {
90 uint32_t version;
91 uint32_t symbol_count;
92 uint32_t alias_count;
93 uint32_t token_count;
94 uint32_t external_token_count;
95 uint32_t state_count;
96 uint32_t large_state_count;
97 uint32_t production_id_count;
98 uint32_t field_count;
99 uint16_t max_alias_sequence_length;
100 const uint16_t *parse_table;
101 const uint16_t *small_parse_table;
102 const uint32_t *small_parse_table_map;
103 const TSParseActionEntry *parse_actions;
104 const char * const *symbol_names;
105 const char * const *field_names;
106 const TSFieldMapSlice *field_map_slices;
107 const TSFieldMapEntry *field_map_entries;
108 const TSSymbolMetadata *symbol_metadata;
109 const TSSymbol *public_symbol_map;
110 const uint16_t *alias_map;
111 const TSSymbol *alias_sequences;
112 const TSLexMode *lex_modes;
113 bool (*lex_fn)(TSLexer *, TSStateId);
114 bool (*keyword_lex_fn)(TSLexer *, TSStateId);
115 TSSymbol keyword_capture_token;
116 struct {
117 const bool *states;
118 const TSSymbol *symbol_map;
119 void *(*create)(void);
120 void (*destroy)(void *);
121 bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
122 unsigned (*serialize)(void *, char *);
123 void (*deserialize)(void *, const char *, unsigned);
124 } external_scanner;
125 const TSStateId *primary_state_ids;
126};
127
128/*
129 * Lexer Macros
130 */
131
132#define START_LEXER() \
133 bool result = false; \
134 bool skip = false; \
135 bool eof = false; \
136 int32_t lookahead; \
137 goto start; \
138 next_state: \
139 lexer->advance(lexer, skip); \
140 start: \
141 skip = false; \
142 lookahead = lexer->lookahead; \
143 eof = lexer->eof(lexer);
144
145#define ADVANCE(state_value) \
146 { \
147 state = state_value; \
148 goto next_state; \
149 }
150
151#define SKIP(state_value) \
152 { \
153 skip = true; \
154 state = state_value; \
155 goto next_state; \
156 }
157
158#define ACCEPT_TOKEN(symbol_value) \
159 result = true; \
160 lexer->result_symbol = symbol_value; \
161 lexer->mark_end(lexer);
162
163#define END_STATE() return result;
164
165/*
166 * Parse Table Macros
167 */
168
169#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
170
171#define STATE(id) id
172
173#define ACTIONS(id) id
174
175#define SHIFT(state_value) \
176 {{ \
177 .shift = { \
178 .type = TSParseActionTypeShift, \
179 .state = (state_value) \
180 } \
181 }}
182
183#define SHIFT_REPEAT(state_value) \
184 {{ \
185 .shift = { \
186 .type = TSParseActionTypeShift, \
187 .state = (state_value), \
188 .repetition = true \
189 } \
190 }}
191
192#define SHIFT_EXTRA() \
193 {{ \
194 .shift = { \
195 .type = TSParseActionTypeShift, \
196 .extra = true \
197 } \
198 }}
199
200#define REDUCE(symbol_val, child_count_val, ...) \
201 {{ \
202 .reduce = { \
203 .type = TSParseActionTypeReduce, \
204 .symbol = symbol_val, \
205 .child_count = child_count_val, \
206 __VA_ARGS__ \
207 }, \
208 }}
209
210#define RECOVER() \
211 {{ \
212 .type = TSParseActionTypeRecover \
213 }}
214
215#define ACCEPT_INPUT() \
216 {{ \
217 .type = TSParseActionTypeAccept \
218 }}
219
220#ifdef __cplusplus
221}
222#endif
223
224#endif // TREE_SITTER_PARSER_H_
diff --git a/vendor/tree-sitter/lib/src/alloc.c b/vendor/tree-sitter/lib/src/alloc.c
new file mode 100644
index 0000000..78b8057
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/alloc.c
@@ -0,0 +1,48 @@
1#include "alloc.h"
2#include <stdlib.h>
3
4static void *ts_malloc_default(size_t size) {
5 void *result = malloc(size);
6 if (size > 0 && !result) {
7 fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
8 abort();
9 }
10 return result;
11}
12
13static void *ts_calloc_default(size_t count, size_t size) {
14 void *result = calloc(count, size);
15 if (count > 0 && !result) {
16 fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
17 abort();
18 }
19 return result;
20}
21
22static void *ts_realloc_default(void *buffer, size_t size) {
23 void *result = realloc(buffer, size);
24 if (size > 0 && !result) {
25 fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
26 abort();
27 }
28 return result;
29}
30
31// Allow clients to override allocation functions dynamically
32void *(*ts_current_malloc)(size_t) = ts_malloc_default;
33void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
34void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
35void (*ts_current_free)(void *) = free;
36
37void ts_set_allocator(
38 void *(*new_malloc)(size_t size),
39 void *(*new_calloc)(size_t count, size_t size),
40 void *(*new_realloc)(void *ptr, size_t size),
41 void (*new_free)(void *ptr)
42) {
43 ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default;
44 ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default;
45 ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default;
46 ts_current_free = new_free ? new_free : free;
47}
48
diff --git a/vendor/tree-sitter/lib/src/alloc.h b/vendor/tree-sitter/lib/src/alloc.h
new file mode 100644
index 0000000..c51f84a
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/alloc.h
@@ -0,0 +1,37 @@
1#ifndef TREE_SITTER_ALLOC_H_
2#define TREE_SITTER_ALLOC_H_
3
4#include "tree_sitter/api.h"
5
6#ifdef __cplusplus
7extern "C" {
8#endif
9
10#include <stdlib.h>
11#include <stdbool.h>
12#include <stdio.h>
13
14extern void *(*ts_current_malloc)(size_t);
15extern void *(*ts_current_calloc)(size_t, size_t);
16extern void *(*ts_current_realloc)(void *, size_t);
17extern void (*ts_current_free)(void *);
18
19// Allow clients to override allocation functions
20#ifndef ts_malloc
21#define ts_malloc ts_current_malloc
22#endif
23#ifndef ts_calloc
24#define ts_calloc ts_current_calloc
25#endif
26#ifndef ts_realloc
27#define ts_realloc ts_current_realloc
28#endif
29#ifndef ts_free
30#define ts_free ts_current_free
31#endif
32
33#ifdef __cplusplus
34}
35#endif
36
37#endif // TREE_SITTER_ALLOC_H_
diff --git a/vendor/tree-sitter/lib/src/array.h b/vendor/tree-sitter/lib/src/array.h
new file mode 100644
index 0000000..e026f6b
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/array.h
@@ -0,0 +1,249 @@
1#ifndef TREE_SITTER_ARRAY_H_
2#define TREE_SITTER_ARRAY_H_
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8#include <string.h>
9#include <stdlib.h>
10#include <stdint.h>
11#include <assert.h>
12#include <stdbool.h>
13#include "./alloc.h"
14
15#define Array(T) \
16 struct { \
17 T *contents; \
18 uint32_t size; \
19 uint32_t capacity; \
20 }
21
22#define array_init(self) \
23 ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
24
25#define array_new() \
26 { NULL, 0, 0 }
27
28#define array_get(self, _index) \
29 (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
30
31#define array_front(self) array_get(self, 0)
32
33#define array_back(self) array_get(self, (self)->size - 1)
34
35#define array_clear(self) ((self)->size = 0)
36
37#define array_reserve(self, new_capacity) \
38 array__reserve((VoidArray *)(self), array__elem_size(self), new_capacity)
39
40// Free any memory allocated for this array.
41#define array_delete(self) array__delete((VoidArray *)(self))
42
43#define array_push(self, element) \
44 (array__grow((VoidArray *)(self), 1, array__elem_size(self)), \
45 (self)->contents[(self)->size++] = (element))
46
47// Increase the array's size by a given number of elements, reallocating
48// if necessary. New elements are zero-initialized.
49#define array_grow_by(self, count) \
50 (array__grow((VoidArray *)(self), count, array__elem_size(self)), \
51 memset((self)->contents + (self)->size, 0, (count) * array__elem_size(self)), \
52 (self)->size += (count))
53
54#define array_push_all(self, other) \
55 array_extend((self), (other)->size, (other)->contents)
56
57// Append `count` elements to the end of the array, reading their values from the
58// `contents` pointer.
59#define array_extend(self, count, contents) \
60 array__splice( \
61 (VoidArray *)(self), array__elem_size(self), (self)->size, \
62 0, count, contents \
63 )
64
65// Remove `old_count` elements from the array starting at the given `index`. At
66// the same index, insert `new_count` new elements, reading their values from the
67// `new_contents` pointer.
68#define array_splice(self, _index, old_count, new_count, new_contents) \
69 array__splice( \
70 (VoidArray *)(self), array__elem_size(self), _index, \
71 old_count, new_count, new_contents \
72 )
73
74// Insert one `element` into the array at the given `index`.
75#define array_insert(self, _index, element) \
76 array__splice((VoidArray *)(self), array__elem_size(self), _index, 0, 1, &(element))
77
78// Remove one `element` from the array at the given `index`.
79#define array_erase(self, _index) \
80 array__erase((VoidArray *)(self), array__elem_size(self), _index)
81
82#define array_pop(self) ((self)->contents[--(self)->size])
83
84#define array_assign(self, other) \
85 array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self))
86
87#define array_swap(self, other) \
88 array__swap((VoidArray *)(self), (VoidArray *)(other))
89
90// Search a sorted array for a given `needle` value, using the given `compare`
91// callback to determine the order.
92//
93// If an existing element is found to be equal to `needle`, then the `index`
94// out-parameter is set to the existing value's index, and the `exists`
95// out-parameter is set to true. Otherwise, `index` is set to an index where
96// `needle` should be inserted in order to preserve the sorting, and `exists`
97// is set to false.
98#define array_search_sorted_with(self, compare, needle, _index, _exists) \
99 array__search_sorted(self, 0, compare, , needle, _index, _exists)
100
101// Search a sorted array for a given `needle` value, using integer comparisons
102// of a given struct field (specified with a leading dot) to determine the order.
103//
104// See also `array_search_sorted_with`.
105#define array_search_sorted_by(self, field, needle, _index, _exists) \
106 array__search_sorted(self, 0, compare_int, field, needle, _index, _exists)
107
108// Insert a given `value` into a sorted array, using the given `compare`
109// callback to determine the order.
110#define array_insert_sorted_with(self, compare, value) \
111 do { \
112 unsigned _index, _exists; \
113 array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
114 if (!_exists) array_insert(self, _index, value); \
115 } while (0)
116
117// Insert a given `value` into a sorted array, using integer comparisons of
118// a given struct field (specified with a leading dot) to determine the order.
119//
120// See also `array_search_sorted_by`.
121#define array_insert_sorted_by(self, field, value) \
122 do { \
123 unsigned _index, _exists; \
124 array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
125 if (!_exists) array_insert(self, _index, value); \
126 } while (0)
127
128// Private
129
130typedef Array(void) VoidArray;
131
132#define array__elem_size(self) sizeof(*(self)->contents)
133
134static inline void array__delete(VoidArray *self) {
135 if (self->contents) {
136 ts_free(self->contents);
137 self->contents = NULL;
138 self->size = 0;
139 self->capacity = 0;
140 }
141}
142
143static inline void array__erase(VoidArray *self, size_t element_size,
144 uint32_t index) {
145 assert(index < self->size);
146 char *contents = (char *)self->contents;
147 memmove(contents + index * element_size, contents + (index + 1) * element_size,
148 (self->size - index - 1) * element_size);
149 self->size--;
150}
151
152static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t new_capacity) {
153 if (new_capacity > self->capacity) {
154 if (self->contents) {
155 self->contents = ts_realloc(self->contents, new_capacity * element_size);
156 } else {
157 self->contents = ts_malloc(new_capacity * element_size);
158 }
159 self->capacity = new_capacity;
160 }
161}
162
163static inline void array__assign(VoidArray *self, const VoidArray *other, size_t element_size) {
164 array__reserve(self, element_size, other->size);
165 self->size = other->size;
166 memcpy(self->contents, other->contents, self->size * element_size);
167}
168
169static inline void array__swap(VoidArray *self, VoidArray *other) {
170 VoidArray swap = *other;
171 *other = *self;
172 *self = swap;
173}
174
175static inline void array__grow(VoidArray *self, uint32_t count, size_t element_size) {
176 uint32_t new_size = self->size + count;
177 if (new_size > self->capacity) {
178 uint32_t new_capacity = self->capacity * 2;
179 if (new_capacity < 8) new_capacity = 8;
180 if (new_capacity < new_size) new_capacity = new_size;
181 array__reserve(self, element_size, new_capacity);
182 }
183}
184
185static inline void array__splice(VoidArray *self, size_t element_size,
186 uint32_t index, uint32_t old_count,
187 uint32_t new_count, const void *elements) {
188 uint32_t new_size = self->size + new_count - old_count;
189 uint32_t old_end = index + old_count;
190 uint32_t new_end = index + new_count;
191 assert(old_end <= self->size);
192
193 array__reserve(self, element_size, new_size);
194
195 char *contents = (char *)self->contents;
196 if (self->size > old_end) {
197 memmove(
198 contents + new_end * element_size,
199 contents + old_end * element_size,
200 (self->size - old_end) * element_size
201 );
202 }
203 if (new_count > 0) {
204 if (elements) {
205 memcpy(
206 (contents + index * element_size),
207 elements,
208 new_count * element_size
209 );
210 } else {
211 memset(
212 (contents + index * element_size),
213 0,
214 new_count * element_size
215 );
216 }
217 }
218 self->size += new_count - old_count;
219}
220
221// A binary search routine, based on Rust's `std::slice::binary_search_by`.
222#define array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
223 do { \
224 *(_index) = start; \
225 *(_exists) = false; \
226 uint32_t size = (self)->size - *(_index); \
227 if (size == 0) break; \
228 int comparison; \
229 while (size > 1) { \
230 uint32_t half_size = size / 2; \
231 uint32_t mid_index = *(_index) + half_size; \
232 comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
233 if (comparison <= 0) *(_index) = mid_index; \
234 size -= half_size; \
235 } \
236 comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
237 if (comparison == 0) *(_exists) = true; \
238 else if (comparison < 0) *(_index) += 1; \
239 } while (0)
240
241// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
242// parameter by reference in order to work with the generic sorting function above.
243#define compare_int(a, b) ((int)*(a) - (int)(b))
244
245#ifdef __cplusplus
246}
247#endif
248
249#endif // TREE_SITTER_ARRAY_H_
diff --git a/vendor/tree-sitter/lib/src/atomic.h b/vendor/tree-sitter/lib/src/atomic.h
new file mode 100644
index 0000000..9e9269c
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/atomic.h
@@ -0,0 +1,67 @@
1#ifndef TREE_SITTER_ATOMIC_H_
2#define TREE_SITTER_ATOMIC_H_
3
4#include <stddef.h>
5#include <stdint.h>
6
7#ifdef __TINYC__
8
9static inline size_t atomic_load(const volatile size_t *p) {
10 return *p;
11}
12
13static inline uint32_t atomic_inc(volatile uint32_t *p) {
14 *p += 1;
15 return *p;
16}
17
18static inline uint32_t atomic_dec(volatile uint32_t *p) {
19 *p-= 1;
20 return *p;
21}
22
23#elif defined(_WIN32)
24
25#include <windows.h>
26
27static inline size_t atomic_load(const volatile size_t *p) {
28 return *p;
29}
30
31static inline uint32_t atomic_inc(volatile uint32_t *p) {
32 return InterlockedIncrement((long volatile *)p);
33}
34
35static inline uint32_t atomic_dec(volatile uint32_t *p) {
36 return InterlockedDecrement((long volatile *)p);
37}
38
39#else
40
41static inline size_t atomic_load(const volatile size_t *p) {
42#ifdef __ATOMIC_RELAXED
43 return __atomic_load_n(p, __ATOMIC_RELAXED);
44#else
45 return __sync_fetch_and_add((volatile size_t *)p, 0);
46#endif
47}
48
49static inline uint32_t atomic_inc(volatile uint32_t *p) {
50 #ifdef __ATOMIC_RELAXED
51 return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST);
52 #else
53 return __sync_add_and_fetch(p, 1U);
54 #endif
55}
56
57static inline uint32_t atomic_dec(volatile uint32_t *p) {
58 #ifdef __ATOMIC_RELAXED
59 return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST);
60 #else
61 return __sync_sub_and_fetch(p, 1U);
62 #endif
63}
64
65#endif
66
67#endif // TREE_SITTER_ATOMIC_H_
diff --git a/vendor/tree-sitter/lib/src/clock.h b/vendor/tree-sitter/lib/src/clock.h
new file mode 100644
index 0000000..6e75729
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/clock.h
@@ -0,0 +1,146 @@
1#ifndef TREE_SITTER_CLOCK_H_
2#define TREE_SITTER_CLOCK_H_
3
4#include <stdbool.h>
5#include <stdint.h>
6
7typedef uint64_t TSDuration;
8
9#ifdef _WIN32
10
11// Windows:
12// * Represent a time as a performance counter value.
13// * Represent a duration as a number of performance counter ticks.
14
15#include <windows.h>
16typedef uint64_t TSClock;
17
18static inline TSDuration duration_from_micros(uint64_t micros) {
19 LARGE_INTEGER frequency;
20 QueryPerformanceFrequency(&frequency);
21 return micros * (uint64_t)frequency.QuadPart / 1000000;
22}
23
24static inline uint64_t duration_to_micros(TSDuration self) {
25 LARGE_INTEGER frequency;
26 QueryPerformanceFrequency(&frequency);
27 return self * 1000000 / (uint64_t)frequency.QuadPart;
28}
29
30static inline TSClock clock_null(void) {
31 return 0;
32}
33
34static inline TSClock clock_now(void) {
35 LARGE_INTEGER result;
36 QueryPerformanceCounter(&result);
37 return (uint64_t)result.QuadPart;
38}
39
40static inline TSClock clock_after(TSClock base, TSDuration duration) {
41 return base + duration;
42}
43
44static inline bool clock_is_null(TSClock self) {
45 return !self;
46}
47
48static inline bool clock_is_gt(TSClock self, TSClock other) {
49 return self > other;
50}
51
52#elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__)
53
54// POSIX with monotonic clock support (Linux)
55// * Represent a time as a monotonic (seconds, nanoseconds) pair.
56// * Represent a duration as a number of microseconds.
57//
58// On these platforms, parse timeouts will correspond accurately to
59// real time, regardless of what other processes are running.
60
61#include <time.h>
62typedef struct timespec TSClock;
63
64static inline TSDuration duration_from_micros(uint64_t micros) {
65 return micros;
66}
67
68static inline uint64_t duration_to_micros(TSDuration self) {
69 return self;
70}
71
72static inline TSClock clock_now(void) {
73 TSClock result;
74 clock_gettime(CLOCK_MONOTONIC, &result);
75 return result;
76}
77
78static inline TSClock clock_null(void) {
79 return (TSClock) {0, 0};
80}
81
82static inline TSClock clock_after(TSClock base, TSDuration duration) {
83 TSClock result = base;
84 result.tv_sec += duration / 1000000;
85 result.tv_nsec += (duration % 1000000) * 1000;
86 if (result.tv_nsec >= 1000000000) {
87 result.tv_nsec -= 1000000000;
88 ++(result.tv_sec);
89 }
90 return result;
91}
92
93static inline bool clock_is_null(TSClock self) {
94 return !self.tv_sec;
95}
96
97static inline bool clock_is_gt(TSClock self, TSClock other) {
98 if (self.tv_sec > other.tv_sec) return true;
99 if (self.tv_sec < other.tv_sec) return false;
100 return self.tv_nsec > other.tv_nsec;
101}
102
103#else
104
105// macOS or POSIX without monotonic clock support
106// * Represent a time as a process clock value.
107// * Represent a duration as a number of process clock ticks.
108//
109// On these platforms, parse timeouts may be affected by other processes,
110// which is not ideal, but is better than using a non-monotonic time API
111// like `gettimeofday`.
112
113#include <time.h>
114typedef uint64_t TSClock;
115
116static inline TSDuration duration_from_micros(uint64_t micros) {
117 return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
118}
119
120static inline uint64_t duration_to_micros(TSDuration self) {
121 return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
122}
123
124static inline TSClock clock_null(void) {
125 return 0;
126}
127
128static inline TSClock clock_now(void) {
129 return (uint64_t)clock();
130}
131
132static inline TSClock clock_after(TSClock base, TSDuration duration) {
133 return base + duration;
134}
135
136static inline bool clock_is_null(TSClock self) {
137 return !self;
138}
139
140static inline bool clock_is_gt(TSClock self, TSClock other) {
141 return self > other;
142}
143
144#endif
145
146#endif // TREE_SITTER_CLOCK_H_
diff --git a/vendor/tree-sitter/lib/src/error_costs.h b/vendor/tree-sitter/lib/src/error_costs.h
new file mode 100644
index 0000000..32d3666
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/error_costs.h
@@ -0,0 +1,11 @@
1#ifndef TREE_SITTER_ERROR_COSTS_H_
2#define TREE_SITTER_ERROR_COSTS_H_
3
4#define ERROR_STATE 0
5#define ERROR_COST_PER_RECOVERY 500
6#define ERROR_COST_PER_MISSING_TREE 110
7#define ERROR_COST_PER_SKIPPED_TREE 100
8#define ERROR_COST_PER_SKIPPED_LINE 30
9#define ERROR_COST_PER_SKIPPED_CHAR 1
10
11#endif
diff --git a/vendor/tree-sitter/lib/src/get_changed_ranges.c b/vendor/tree-sitter/lib/src/get_changed_ranges.c
new file mode 100644
index 0000000..bcf8da9
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/get_changed_ranges.c
@@ -0,0 +1,501 @@
1#include "./get_changed_ranges.h"
2#include "./subtree.h"
3#include "./language.h"
4#include "./error_costs.h"
5#include "./tree_cursor.h"
6#include <assert.h>
7
8// #define DEBUG_GET_CHANGED_RANGES
9
10static void ts_range_array_add(
11 TSRangeArray *self,
12 Length start,
13 Length end
14) {
15 if (self->size > 0) {
16 TSRange *last_range = array_back(self);
17 if (start.bytes <= last_range->end_byte) {
18 last_range->end_byte = end.bytes;
19 last_range->end_point = end.extent;
20 return;
21 }
22 }
23
24 if (start.bytes < end.bytes) {
25 TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
26 array_push(self, range);
27 }
28}
29
30bool ts_range_array_intersects(
31 const TSRangeArray *self,
32 unsigned start_index,
33 uint32_t start_byte,
34 uint32_t end_byte
35) {
36 for (unsigned i = start_index; i < self->size; i++) {
37 TSRange *range = &self->contents[i];
38 if (range->end_byte > start_byte) {
39 if (range->start_byte >= end_byte) break;
40 return true;
41 }
42 }
43 return false;
44}
45
46void ts_range_array_get_changed_ranges(
47 const TSRange *old_ranges, unsigned old_range_count,
48 const TSRange *new_ranges, unsigned new_range_count,
49 TSRangeArray *differences
50) {
51 unsigned new_index = 0;
52 unsigned old_index = 0;
53 Length current_position = length_zero();
54 bool in_old_range = false;
55 bool in_new_range = false;
56
57 while (old_index < old_range_count || new_index < new_range_count) {
58 const TSRange *old_range = &old_ranges[old_index];
59 const TSRange *new_range = &new_ranges[new_index];
60
61 Length next_old_position;
62 if (in_old_range) {
63 next_old_position = (Length) {old_range->end_byte, old_range->end_point};
64 } else if (old_index < old_range_count) {
65 next_old_position = (Length) {old_range->start_byte, old_range->start_point};
66 } else {
67 next_old_position = LENGTH_MAX;
68 }
69
70 Length next_new_position;
71 if (in_new_range) {
72 next_new_position = (Length) {new_range->end_byte, new_range->end_point};
73 } else if (new_index < new_range_count) {
74 next_new_position = (Length) {new_range->start_byte, new_range->start_point};
75 } else {
76 next_new_position = LENGTH_MAX;
77 }
78
79 if (next_old_position.bytes < next_new_position.bytes) {
80 if (in_old_range != in_new_range) {
81 ts_range_array_add(differences, current_position, next_old_position);
82 }
83 if (in_old_range) old_index++;
84 current_position = next_old_position;
85 in_old_range = !in_old_range;
86 } else if (next_new_position.bytes < next_old_position.bytes) {
87 if (in_old_range != in_new_range) {
88 ts_range_array_add(differences, current_position, next_new_position);
89 }
90 if (in_new_range) new_index++;
91 current_position = next_new_position;
92 in_new_range = !in_new_range;
93 } else {
94 if (in_old_range != in_new_range) {
95 ts_range_array_add(differences, current_position, next_new_position);
96 }
97 if (in_old_range) old_index++;
98 if (in_new_range) new_index++;
99 in_old_range = !in_old_range;
100 in_new_range = !in_new_range;
101 current_position = next_new_position;
102 }
103 }
104}
105
106typedef struct {
107 TreeCursor cursor;
108 const TSLanguage *language;
109 unsigned visible_depth;
110 bool in_padding;
111} Iterator;
112
113static Iterator iterator_new(
114 TreeCursor *cursor,
115 const Subtree *tree,
116 const TSLanguage *language
117) {
118 array_clear(&cursor->stack);
119 array_push(&cursor->stack, ((TreeCursorEntry) {
120 .subtree = tree,
121 .position = length_zero(),
122 .child_index = 0,
123 .structural_child_index = 0,
124 }));
125 return (Iterator) {
126 .cursor = *cursor,
127 .language = language,
128 .visible_depth = 1,
129 .in_padding = false,
130 };
131}
132
133static bool iterator_done(Iterator *self) {
134 return self->cursor.stack.size == 0;
135}
136
137static Length iterator_start_position(Iterator *self) {
138 TreeCursorEntry entry = *array_back(&self->cursor.stack);
139 if (self->in_padding) {
140 return entry.position;
141 } else {
142 return length_add(entry.position, ts_subtree_padding(*entry.subtree));
143 }
144}
145
146static Length iterator_end_position(Iterator *self) {
147 TreeCursorEntry entry = *array_back(&self->cursor.stack);
148 Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
149 if (self->in_padding) {
150 return result;
151 } else {
152 return length_add(result, ts_subtree_size(*entry.subtree));
153 }
154}
155
156static bool iterator_tree_is_visible(const Iterator *self) {
157 TreeCursorEntry entry = *array_back(&self->cursor.stack);
158 if (ts_subtree_visible(*entry.subtree)) return true;
159 if (self->cursor.stack.size > 1) {
160 Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
161 return ts_language_alias_at(
162 self->language,
163 parent.ptr->production_id,
164 entry.structural_child_index
165 ) != 0;
166 }
167 return false;
168}
169
170static void iterator_get_visible_state(
171 const Iterator *self,
172 Subtree *tree,
173 TSSymbol *alias_symbol,
174 uint32_t *start_byte
175) {
176 uint32_t i = self->cursor.stack.size - 1;
177
178 if (self->in_padding) {
179 if (i == 0) return;
180 i--;
181 }
182
183 for (; i + 1 > 0; i--) {
184 TreeCursorEntry entry = self->cursor.stack.contents[i];
185
186 if (i > 0) {
187 const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
188 *alias_symbol = ts_language_alias_at(
189 self->language,
190 parent->ptr->production_id,
191 entry.structural_child_index
192 );
193 }
194
195 if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
196 *tree = *entry.subtree;
197 *start_byte = entry.position.bytes;
198 break;
199 }
200 }
201}
202
203static void iterator_ascend(Iterator *self) {
204 if (iterator_done(self)) return;
205 if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
206 if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
207 self->cursor.stack.size--;
208}
209
210static bool iterator_descend(Iterator *self, uint32_t goal_position) {
211 if (self->in_padding) return false;
212
213 bool did_descend = false;
214 do {
215 did_descend = false;
216 TreeCursorEntry entry = *array_back(&self->cursor.stack);
217 Length position = entry.position;
218 uint32_t structural_child_index = 0;
219 for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
220 const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
221 Length child_left = length_add(position, ts_subtree_padding(*child));
222 Length child_right = length_add(child_left, ts_subtree_size(*child));
223
224 if (child_right.bytes > goal_position) {
225 array_push(&self->cursor.stack, ((TreeCursorEntry) {
226 .subtree = child,
227 .position = position,
228 .child_index = i,
229 .structural_child_index = structural_child_index,
230 }));
231
232 if (iterator_tree_is_visible(self)) {
233 if (child_left.bytes > goal_position) {
234 self->in_padding = true;
235 } else {
236 self->visible_depth++;
237 }
238 return true;
239 }
240
241 did_descend = true;
242 break;
243 }
244
245 position = child_right;
246 if (!ts_subtree_extra(*child)) structural_child_index++;
247 }
248 } while (did_descend);
249
250 return false;
251}
252
253static void iterator_advance(Iterator *self) {
254 if (self->in_padding) {
255 self->in_padding = false;
256 if (iterator_tree_is_visible(self)) {
257 self->visible_depth++;
258 } else {
259 iterator_descend(self, 0);
260 }
261 return;
262 }
263
264 for (;;) {
265 if (iterator_tree_is_visible(self)) self->visible_depth--;
266 TreeCursorEntry entry = array_pop(&self->cursor.stack);
267 if (iterator_done(self)) return;
268
269 const Subtree *parent = array_back(&self->cursor.stack)->subtree;
270 uint32_t child_index = entry.child_index + 1;
271 if (ts_subtree_child_count(*parent) > child_index) {
272 Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
273 uint32_t structural_child_index = entry.structural_child_index;
274 if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
275 const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
276
277 array_push(&self->cursor.stack, ((TreeCursorEntry) {
278 .subtree = next_child,
279 .position = position,
280 .child_index = child_index,
281 .structural_child_index = structural_child_index,
282 }));
283
284 if (iterator_tree_is_visible(self)) {
285 if (ts_subtree_padding(*next_child).bytes > 0) {
286 self->in_padding = true;
287 } else {
288 self->visible_depth++;
289 }
290 } else {
291 iterator_descend(self, 0);
292 }
293 break;
294 }
295 }
296}
297
298typedef enum {
299 IteratorDiffers,
300 IteratorMayDiffer,
301 IteratorMatches,
302} IteratorComparison;
303
304static IteratorComparison iterator_compare(
305 const Iterator *old_iter,
306 const Iterator *new_iter
307) {
308 Subtree old_tree = NULL_SUBTREE;
309 Subtree new_tree = NULL_SUBTREE;
310 uint32_t old_start = 0;
311 uint32_t new_start = 0;
312 TSSymbol old_alias_symbol = 0;
313 TSSymbol new_alias_symbol = 0;
314 iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
315 iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
316
317 if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
318 if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
319
320 if (
321 old_alias_symbol == new_alias_symbol &&
322 ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
323 ) {
324 if (old_start == new_start &&
325 !ts_subtree_has_changes(old_tree) &&
326 ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
327 ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
328 ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
329 ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
330 (ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
331 (ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
332 return IteratorMatches;
333 } else {
334 return IteratorMayDiffer;
335 }
336 }
337
338 return IteratorDiffers;
339}
340
341#ifdef DEBUG_GET_CHANGED_RANGES
342static inline void iterator_print_state(Iterator *self) {
343 TreeCursorEntry entry = *array_back(&self->cursor.stack);
344 TSPoint start = iterator_start_position(self).extent;
345 TSPoint end = iterator_end_position(self).extent;
346 const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
347 printf(
348 "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
349 name, self->in_padding ? "(p)" : " ",
350 self->visible_depth,
351 start.row + 1, start.column,
352 end.row + 1, end.column
353 );
354}
355#endif
356
357unsigned ts_subtree_get_changed_ranges(
358 const Subtree *old_tree, const Subtree *new_tree,
359 TreeCursor *cursor1, TreeCursor *cursor2,
360 const TSLanguage *language,
361 const TSRangeArray *included_range_differences,
362 TSRange **ranges
363) {
364 TSRangeArray results = array_new();
365
366 Iterator old_iter = iterator_new(cursor1, old_tree, language);
367 Iterator new_iter = iterator_new(cursor2, new_tree, language);
368
369 unsigned included_range_difference_index = 0;
370
371 Length position = iterator_start_position(&old_iter);
372 Length next_position = iterator_start_position(&new_iter);
373 if (position.bytes < next_position.bytes) {
374 ts_range_array_add(&results, position, next_position);
375 position = next_position;
376 } else if (position.bytes > next_position.bytes) {
377 ts_range_array_add(&results, next_position, position);
378 next_position = position;
379 }
380
381 do {
382 #ifdef DEBUG_GET_CHANGED_RANGES
383 printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
384 iterator_print_state(&old_iter);
385 printf("\tvs\t");
386 iterator_print_state(&new_iter);
387 puts("");
388 #endif
389
390 // Compare the old and new subtrees.
391 IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
392
393 // Even if the two subtrees appear to be identical, they could differ
394 // internally if they contain a range of text that was previously
395 // excluded from the parse, and is now included, or vice-versa.
396 if (comparison == IteratorMatches && ts_range_array_intersects(
397 included_range_differences,
398 included_range_difference_index,
399 position.bytes,
400 iterator_end_position(&old_iter).bytes
401 )) {
402 comparison = IteratorMayDiffer;
403 }
404
405 bool is_changed = false;
406 switch (comparison) {
407 // If the subtrees are definitely identical, move to the end
408 // of both subtrees.
409 case IteratorMatches:
410 next_position = iterator_end_position(&old_iter);
411 break;
412
413 // If the subtrees might differ internally, descend into both
414 // subtrees, finding the first child that spans the current position.
415 case IteratorMayDiffer:
416 if (iterator_descend(&old_iter, position.bytes)) {
417 if (!iterator_descend(&new_iter, position.bytes)) {
418 is_changed = true;
419 next_position = iterator_end_position(&old_iter);
420 }
421 } else if (iterator_descend(&new_iter, position.bytes)) {
422 is_changed = true;
423 next_position = iterator_end_position(&new_iter);
424 } else {
425 next_position = length_min(
426 iterator_end_position(&old_iter),
427 iterator_end_position(&new_iter)
428 );
429 }
430 break;
431
432 // If the subtrees are different, record a change and then move
433 // to the end of both subtrees.
434 case IteratorDiffers:
435 is_changed = true;
436 next_position = length_min(
437 iterator_end_position(&old_iter),
438 iterator_end_position(&new_iter)
439 );
440 break;
441 }
442
443 // Ensure that both iterators are caught up to the current position.
444 while (
445 !iterator_done(&old_iter) &&
446 iterator_end_position(&old_iter).bytes <= next_position.bytes
447 ) iterator_advance(&old_iter);
448 while (
449 !iterator_done(&new_iter) &&
450 iterator_end_position(&new_iter).bytes <= next_position.bytes
451 ) iterator_advance(&new_iter);
452
453 // Ensure that both iterators are at the same depth in the tree.
454 while (old_iter.visible_depth > new_iter.visible_depth) {
455 iterator_ascend(&old_iter);
456 }
457 while (new_iter.visible_depth > old_iter.visible_depth) {
458 iterator_ascend(&new_iter);
459 }
460
461 if (is_changed) {
462 #ifdef DEBUG_GET_CHANGED_RANGES
463 printf(
464 " change: [[%u, %u] - [%u, %u]]\n",
465 position.extent.row + 1, position.extent.column,
466 next_position.extent.row + 1, next_position.extent.column
467 );
468 #endif
469
470 ts_range_array_add(&results, position, next_position);
471 }
472
473 position = next_position;
474
475 // Keep track of the current position in the included range differences
476 // array in order to avoid scanning the entire array on each iteration.
477 while (included_range_difference_index < included_range_differences->size) {
478 const TSRange *range = &included_range_differences->contents[
479 included_range_difference_index
480 ];
481 if (range->end_byte <= position.bytes) {
482 included_range_difference_index++;
483 } else {
484 break;
485 }
486 }
487 } while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
488
489 Length old_size = ts_subtree_total_size(*old_tree);
490 Length new_size = ts_subtree_total_size(*new_tree);
491 if (old_size.bytes < new_size.bytes) {
492 ts_range_array_add(&results, old_size, new_size);
493 } else if (new_size.bytes < old_size.bytes) {
494 ts_range_array_add(&results, new_size, old_size);
495 }
496
497 *cursor1 = old_iter.cursor;
498 *cursor2 = new_iter.cursor;
499 *ranges = results.contents;
500 return results.size;
501}
diff --git a/vendor/tree-sitter/lib/src/get_changed_ranges.h b/vendor/tree-sitter/lib/src/get_changed_ranges.h
new file mode 100644
index 0000000..a1f1dbb
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/get_changed_ranges.h
@@ -0,0 +1,36 @@
1#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
2#define TREE_SITTER_GET_CHANGED_RANGES_H_
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8#include "./tree_cursor.h"
9#include "./subtree.h"
10
11typedef Array(TSRange) TSRangeArray;
12
13void ts_range_array_get_changed_ranges(
14 const TSRange *old_ranges, unsigned old_range_count,
15 const TSRange *new_ranges, unsigned new_range_count,
16 TSRangeArray *differences
17);
18
19bool ts_range_array_intersects(
20 const TSRangeArray *self, unsigned start_index,
21 uint32_t start_byte, uint32_t end_byte
22);
23
24unsigned ts_subtree_get_changed_ranges(
25 const Subtree *old_tree, const Subtree *new_tree,
26 TreeCursor *cursor1, TreeCursor *cursor2,
27 const TSLanguage *language,
28 const TSRangeArray *included_range_differences,
29 TSRange **ranges
30);
31
32#ifdef __cplusplus
33}
34#endif
35
36#endif // TREE_SITTER_GET_CHANGED_RANGES_H_
diff --git a/vendor/tree-sitter/lib/src/host.h b/vendor/tree-sitter/lib/src/host.h
new file mode 100644
index 0000000..a07e9f8
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/host.h
@@ -0,0 +1,21 @@
1
2// Determine endian and pointer size based on known defines.
3// TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments
4// to override this.
5
6#if !defined(TS_BIG_ENDIAN)
7#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
8 || (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__)))
9#define TS_BIG_ENDIAN 1
10#else
11#define TS_BIG_ENDIAN 0
12#endif
13#endif
14
15#if !defined(TS_PTR_SIZE)
16#if UINTPTR_MAX == 0xFFFFFFFF
17#define TS_PTR_SIZE 32
18#else
19#define TS_PTR_SIZE 64
20#endif
21#endif
diff --git a/vendor/tree-sitter/lib/src/language.c b/vendor/tree-sitter/lib/src/language.c
new file mode 100644
index 0000000..f30329d
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/language.c
@@ -0,0 +1,208 @@
1#include "./language.h"
2#include "./subtree.h"
3#include "./error_costs.h"
4#include <string.h>
5
6uint32_t ts_language_symbol_count(const TSLanguage *self) {
7 return self->symbol_count + self->alias_count;
8}
9
10uint32_t ts_language_state_count(const TSLanguage *self) {
11 return self->state_count;
12}
13
14uint32_t ts_language_version(const TSLanguage *self) {
15 return self->version;
16}
17
18uint32_t ts_language_field_count(const TSLanguage *self) {
19 return self->field_count;
20}
21
22void ts_language_table_entry(
23 const TSLanguage *self,
24 TSStateId state,
25 TSSymbol symbol,
26 TableEntry *result
27) {
28 if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
29 result->action_count = 0;
30 result->is_reusable = false;
31 result->actions = NULL;
32 } else {
33 assert(symbol < self->token_count);
34 uint32_t action_index = ts_language_lookup(self, state, symbol);
35 const TSParseActionEntry *entry = &self->parse_actions[action_index];
36 result->action_count = entry->entry.count;
37 result->is_reusable = entry->entry.reusable;
38 result->actions = (const TSParseAction *)(entry + 1);
39 }
40}
41
42TSSymbolMetadata ts_language_symbol_metadata(
43 const TSLanguage *self,
44 TSSymbol symbol
45) {
46 if (symbol == ts_builtin_sym_error) {
47 return (TSSymbolMetadata) {.visible = true, .named = true};
48 } else if (symbol == ts_builtin_sym_error_repeat) {
49 return (TSSymbolMetadata) {.visible = false, .named = false};
50 } else {
51 return self->symbol_metadata[symbol];
52 }
53}
54
55TSSymbol ts_language_public_symbol(
56 const TSLanguage *self,
57 TSSymbol symbol
58) {
59 if (symbol == ts_builtin_sym_error) return symbol;
60 return self->public_symbol_map[symbol];
61}
62
63TSStateId ts_language_next_state(
64 const TSLanguage *self,
65 TSStateId state,
66 TSSymbol symbol
67) {
68 if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
69 return 0;
70 } else if (symbol < self->token_count) {
71 uint32_t count;
72 const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
73 if (count > 0) {
74 TSParseAction action = actions[count - 1];
75 if (action.type == TSParseActionTypeShift) {
76 return action.shift.extra ? state : action.shift.state;
77 }
78 }
79 return 0;
80 } else {
81 return ts_language_lookup(self, state, symbol);
82 }
83}
84
85const char *ts_language_symbol_name(
86 const TSLanguage *self,
87 TSSymbol symbol
88) {
89 if (symbol == ts_builtin_sym_error) {
90 return "ERROR";
91 } else if (symbol == ts_builtin_sym_error_repeat) {
92 return "_ERROR";
93 } else if (symbol < ts_language_symbol_count(self)) {
94 return self->symbol_names[symbol];
95 } else {
96 return NULL;
97 }
98}
99
100TSSymbol ts_language_symbol_for_name(
101 const TSLanguage *self,
102 const char *string,
103 uint32_t length,
104 bool is_named
105) {
106 if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
107 uint16_t count = (uint16_t)ts_language_symbol_count(self);
108 for (TSSymbol i = 0; i < count; i++) {
109 TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
110 if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
111 const char *symbol_name = self->symbol_names[i];
112 if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
113 return self->public_symbol_map[i];
114 }
115 }
116 return 0;
117}
118
119TSSymbolType ts_language_symbol_type(
120 const TSLanguage *self,
121 TSSymbol symbol
122) {
123 TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
124 if (metadata.named && metadata.visible) {
125 return TSSymbolTypeRegular;
126 } else if (metadata.visible) {
127 return TSSymbolTypeAnonymous;
128 } else {
129 return TSSymbolTypeAuxiliary;
130 }
131}
132
133const char *ts_language_field_name_for_id(
134 const TSLanguage *self,
135 TSFieldId id
136) {
137 uint32_t count = ts_language_field_count(self);
138 if (count && id <= count) {
139 return self->field_names[id];
140 } else {
141 return NULL;
142 }
143}
144
145TSFieldId ts_language_field_id_for_name(
146 const TSLanguage *self,
147 const char *name,
148 uint32_t name_length
149) {
150 uint16_t count = (uint16_t)ts_language_field_count(self);
151 for (TSSymbol i = 1; i < count + 1; i++) {
152 switch (strncmp(name, self->field_names[i], name_length)) {
153 case 0:
154 if (self->field_names[i][name_length] == 0) return i;
155 break;
156 case -1:
157 return 0;
158 default:
159 break;
160 }
161 }
162 return 0;
163}
164
165TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) {
166 if (state >= self->state_count) return NULL;
167 LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator));
168 *iterator = ts_language_lookaheads(self, state);
169 return (TSLookaheadIterator *)iterator;
170}
171
172void ts_lookahead_iterator_delete(TSLookaheadIterator *self) {
173 ts_free(self);
174}
175
176bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) {
177 LookaheadIterator *iterator = (LookaheadIterator *)self;
178 if (state >= iterator->language->state_count) return false;
179 *iterator = ts_language_lookaheads(iterator->language, state);
180 return true;
181}
182
183const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) {
184 const LookaheadIterator *iterator = (const LookaheadIterator *)self;
185 return iterator->language;
186}
187
188bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) {
189 if (state >= language->state_count) return false;
190 LookaheadIterator *iterator = (LookaheadIterator *)self;
191 *iterator = ts_language_lookaheads(language, state);
192 return true;
193}
194
195bool ts_lookahead_iterator_next(TSLookaheadIterator *self) {
196 LookaheadIterator *iterator = (LookaheadIterator *)self;
197 return ts_lookahead_iterator__next(iterator);
198}
199
200TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) {
201 const LookaheadIterator *iterator = (const LookaheadIterator *)self;
202 return iterator->symbol;
203}
204
205const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) {
206 const LookaheadIterator *iterator = (const LookaheadIterator *)self;
207 return ts_language_symbol_name(iterator->language, iterator->symbol);
208}
diff --git a/vendor/tree-sitter/lib/src/language.h b/vendor/tree-sitter/lib/src/language.h
new file mode 100644
index 0000000..55b5d89
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/language.h
@@ -0,0 +1,296 @@
1#ifndef TREE_SITTER_LANGUAGE_H_
2#define TREE_SITTER_LANGUAGE_H_
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8#include "./subtree.h"
9#include "tree_sitter/parser.h"
10
11#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
12
13typedef struct {
14 const TSParseAction *actions;
15 uint32_t action_count;
16 bool is_reusable;
17} TableEntry;
18
19typedef struct {
20 const TSLanguage *language;
21 const uint16_t *data;
22 const uint16_t *group_end;
23 TSStateId state;
24 uint16_t table_value;
25 uint16_t section_index;
26 uint16_t group_count;
27 bool is_small_state;
28
29 const TSParseAction *actions;
30 TSSymbol symbol;
31 TSStateId next_state;
32 uint16_t action_count;
33} LookaheadIterator;
34
35void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
36
37TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
38
39TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
40
41TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol);
42
43static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
44 return 0 < symbol && symbol < self->external_token_count + 1;
45}
46
47static inline const TSParseAction *ts_language_actions(
48 const TSLanguage *self,
49 TSStateId state,
50 TSSymbol symbol,
51 uint32_t *count
52) {
53 TableEntry entry;
54 ts_language_table_entry(self, state, symbol, &entry);
55 *count = entry.action_count;
56 return entry.actions;
57}
58
59static inline bool ts_language_has_reduce_action(
60 const TSLanguage *self,
61 TSStateId state,
62 TSSymbol symbol
63) {
64 TableEntry entry;
65 ts_language_table_entry(self, state, symbol, &entry);
66 return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
67}
68
69// Lookup the table value for a given symbol and state.
70//
71// For non-terminal symbols, the table value represents a successor state.
72// For terminal symbols, it represents an index in the actions table.
73// For 'large' parse states, this is a direct lookup. For 'small' parse
74// states, this requires searching through the symbol groups to find
75// the given symbol.
76static inline uint16_t ts_language_lookup(
77 const TSLanguage *self,
78 TSStateId state,
79 TSSymbol symbol
80) {
81 if (state >= self->large_state_count) {
82 uint32_t index = self->small_parse_table_map[state - self->large_state_count];
83 const uint16_t *data = &self->small_parse_table[index];
84 uint16_t group_count = *(data++);
85 for (unsigned i = 0; i < group_count; i++) {
86 uint16_t section_value = *(data++);
87 uint16_t symbol_count = *(data++);
88 for (unsigned j = 0; j < symbol_count; j++) {
89 if (*(data++) == symbol) return section_value;
90 }
91 }
92 return 0;
93 } else {
94 return self->parse_table[state * self->symbol_count + symbol];
95 }
96}
97
98static inline bool ts_language_has_actions(
99 const TSLanguage *self,
100 TSStateId state,
101 TSSymbol symbol
102) {
103 return ts_language_lookup(self, state, symbol) != 0;
104}
105
106// Iterate over all of the symbols that are valid in the given state.
107//
108// For 'large' parse states, this just requires iterating through
109// all possible symbols and checking the parse table for each one.
110// For 'small' parse states, this exploits the structure of the
111// table to only visit the valid symbols.
112static inline LookaheadIterator ts_language_lookaheads(
113 const TSLanguage *self,
114 TSStateId state
115) {
116 bool is_small_state = state >= self->large_state_count;
117 const uint16_t *data;
118 const uint16_t *group_end = NULL;
119 uint16_t group_count = 0;
120 if (is_small_state) {
121 uint32_t index = self->small_parse_table_map[state - self->large_state_count];
122 data = &self->small_parse_table[index];
123 group_end = data + 1;
124 group_count = *data;
125 } else {
126 data = &self->parse_table[state * self->symbol_count] - 1;
127 }
128 return (LookaheadIterator) {
129 .language = self,
130 .data = data,
131 .group_end = group_end,
132 .group_count = group_count,
133 .is_small_state = is_small_state,
134 .symbol = UINT16_MAX,
135 .next_state = 0,
136 };
137}
138
139static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) {
140 // For small parse states, valid symbols are listed explicitly,
141 // grouped by their value. There's no need to look up the actions
142 // again until moving to the next group.
143 if (self->is_small_state) {
144 self->data++;
145 if (self->data == self->group_end) {
146 if (self->group_count == 0) return false;
147 self->group_count--;
148 self->table_value = *(self->data++);
149 unsigned symbol_count = *(self->data++);
150 self->group_end = self->data + symbol_count;
151 self->symbol = *self->data;
152 } else {
153 self->symbol = *self->data;
154 return true;
155 }
156 }
157
158 // For large parse states, iterate through every symbol until one
159 // is found that has valid actions.
160 else {
161 do {
162 self->data++;
163 self->symbol++;
164 if (self->symbol >= self->language->symbol_count) return false;
165 self->table_value = *self->data;
166 } while (!self->table_value);
167 }
168
169 // Depending on if the symbols is terminal or non-terminal, the table value either
170 // represents a list of actions or a successor state.
171 if (self->symbol < self->language->token_count) {
172 const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
173 self->action_count = entry->entry.count;
174 self->actions = (const TSParseAction *)(entry + 1);
175 self->next_state = 0;
176 } else {
177 self->action_count = 0;
178 self->next_state = self->table_value;
179 }
180 return true;
181}
182
183// Whether the state is a "primary state". If this returns false, it indicates that there exists
184// another state that behaves identically to this one with respect to query analysis.
185static inline bool ts_language_state_is_primary(
186 const TSLanguage *self,
187 TSStateId state
188) {
189 if (self->version >= 14) {
190 return state == self->primary_state_ids[state];
191 } else {
192 return true;
193 }
194}
195
196static inline const bool *ts_language_enabled_external_tokens(
197 const TSLanguage *self,
198 unsigned external_scanner_state
199) {
200 if (external_scanner_state == 0) {
201 return NULL;
202 } else {
203 return self->external_scanner.states + self->external_token_count * external_scanner_state;
204 }
205}
206
207static inline const TSSymbol *ts_language_alias_sequence(
208 const TSLanguage *self,
209 uint32_t production_id
210) {
211 return production_id ?
212 &self->alias_sequences[production_id * self->max_alias_sequence_length] :
213 NULL;
214}
215
216static inline TSSymbol ts_language_alias_at(
217 const TSLanguage *self,
218 uint32_t production_id,
219 uint32_t child_index
220) {
221 return production_id ?
222 self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
223 0;
224}
225
226static inline void ts_language_field_map(
227 const TSLanguage *self,
228 uint32_t production_id,
229 const TSFieldMapEntry **start,
230 const TSFieldMapEntry **end
231) {
232 if (self->field_count == 0) {
233 *start = NULL;
234 *end = NULL;
235 return;
236 }
237
238 TSFieldMapSlice slice = self->field_map_slices[production_id];
239 *start = &self->field_map_entries[slice.index];
240 *end = &self->field_map_entries[slice.index] + slice.length;
241}
242
243static inline void ts_language_aliases_for_symbol(
244 const TSLanguage *self,
245 TSSymbol original_symbol,
246 const TSSymbol **start,
247 const TSSymbol **end
248) {
249 *start = &self->public_symbol_map[original_symbol];
250 *end = *start + 1;
251
252 unsigned idx = 0;
253 for (;;) {
254 TSSymbol symbol = self->alias_map[idx++];
255 if (symbol == 0 || symbol > original_symbol) break;
256 uint16_t count = self->alias_map[idx++];
257 if (symbol == original_symbol) {
258 *start = &self->alias_map[idx];
259 *end = &self->alias_map[idx + count];
260 break;
261 }
262 idx += count;
263 }
264}
265
266static inline void ts_language_write_symbol_as_dot_string(
267 const TSLanguage *self,
268 FILE *f,
269 TSSymbol symbol
270) {
271 const char *name = ts_language_symbol_name(self, symbol);
272 for (const char *chr = name; *chr; chr++) {
273 switch (*chr) {
274 case '"':
275 case '\\':
276 fputc('\\', f);
277 fputc(*chr, f);
278 break;
279 case '\n':
280 fputs("\\n", f);
281 break;
282 case '\t':
283 fputs("\\t", f);
284 break;
285 default:
286 fputc(*chr, f);
287 break;
288 }
289 }
290}
291
292#ifdef __cplusplus
293}
294#endif
295
296#endif // TREE_SITTER_LANGUAGE_H_
diff --git a/vendor/tree-sitter/lib/src/length.h b/vendor/tree-sitter/lib/src/length.h
new file mode 100644
index 0000000..42d61ef
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/length.h
@@ -0,0 +1,52 @@
1#ifndef TREE_SITTER_LENGTH_H_
2#define TREE_SITTER_LENGTH_H_
3
4#include <stdlib.h>
5#include <stdbool.h>
6#include "./point.h"
7#include "tree_sitter/api.h"
8
9typedef struct {
10 uint32_t bytes;
11 TSPoint extent;
12} Length;
13
14static const Length LENGTH_UNDEFINED = {0, {0, 1}};
15static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};
16
17static inline bool length_is_undefined(Length length) {
18 return length.bytes == 0 && length.extent.column != 0;
19}
20
21static inline Length length_min(Length len1, Length len2) {
22 return (len1.bytes < len2.bytes) ? len1 : len2;
23}
24
25static inline Length length_add(Length len1, Length len2) {
26 Length result;
27 result.bytes = len1.bytes + len2.bytes;
28 result.extent = point_add(len1.extent, len2.extent);
29 return result;
30}
31
32static inline Length length_sub(Length len1, Length len2) {
33 Length result;
34 result.bytes = len1.bytes - len2.bytes;
35 result.extent = point_sub(len1.extent, len2.extent);
36 return result;
37}
38
39static inline Length length_zero(void) {
40 Length result = {0, {0, 0}};
41 return result;
42}
43
44static inline Length length_saturating_sub(Length len1, Length len2) {
45 if (len1.bytes > len2.bytes) {
46 return length_sub(len1, len2);
47 } else {
48 return length_zero();
49 }
50}
51
52#endif
diff --git a/vendor/tree-sitter/lib/src/lexer.c b/vendor/tree-sitter/lib/src/lexer.c
new file mode 100644
index 0000000..d108c04
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/lexer.c
@@ -0,0 +1,419 @@
1#include <stdio.h>
2#include "./lexer.h"
3#include "./subtree.h"
4#include "./length.h"
5#include "./unicode.h"
6
7#define LOG(message, character) \
8 if (self->logger.log) { \
9 snprintf( \
10 self->debug_buffer, \
11 TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
12 32 <= character && character < 127 ? \
13 message " character:'%c'" : \
14 message " character:%d", \
15 character \
16 ); \
17 self->logger.log( \
18 self->logger.payload, \
19 TSLogTypeLex, \
20 self->debug_buffer \
21 ); \
22 }
23
24static const int32_t BYTE_ORDER_MARK = 0xFEFF;
25
26static const TSRange DEFAULT_RANGE = {
27 .start_point = {
28 .row = 0,
29 .column = 0,
30 },
31 .end_point = {
32 .row = UINT32_MAX,
33 .column = UINT32_MAX,
34 },
35 .start_byte = 0,
36 .end_byte = UINT32_MAX
37};
38
39// Check if the lexer has reached EOF. This state is stored
40// by setting the lexer's `current_included_range_index` such that
41// it has consumed all of its available ranges.
42static bool ts_lexer__eof(const TSLexer *_self) {
43 Lexer *self = (Lexer *)_self;
44 return self->current_included_range_index == self->included_range_count;
45}
46
47// Clear the currently stored chunk of source code, because the lexer's
48// position has changed.
49static void ts_lexer__clear_chunk(Lexer *self) {
50 self->chunk = NULL;
51 self->chunk_size = 0;
52 self->chunk_start = 0;
53}
54
55// Call the lexer's input callback to obtain a new chunk of source code
56// for the current position.
57static void ts_lexer__get_chunk(Lexer *self) {
58 self->chunk_start = self->current_position.bytes;
59 self->chunk = self->input.read(
60 self->input.payload,
61 self->current_position.bytes,
62 self->current_position.extent,
63 &self->chunk_size
64 );
65 if (!self->chunk_size) {
66 self->current_included_range_index = self->included_range_count;
67 self->chunk = NULL;
68 }
69}
70
71// Decode the next unicode character in the current chunk of source code.
72// This assumes that the lexer has already retrieved a chunk of source
73// code that spans the current position.
74static void ts_lexer__get_lookahead(Lexer *self) {
75 uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
76 uint32_t size = self->chunk_size - position_in_chunk;
77
78 if (size == 0) {
79 self->lookahead_size = 1;
80 self->data.lookahead = '\0';
81 return;
82 }
83
84 const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
85 UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8
86 ? ts_decode_utf8
87 : ts_decode_utf16;
88
89 self->lookahead_size = decode(chunk, size, &self->data.lookahead);
90
91 // If this chunk ended in the middle of a multi-byte character,
92 // try again with a fresh chunk.
93 if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
94 ts_lexer__get_chunk(self);
95 chunk = (const uint8_t *)self->chunk;
96 size = self->chunk_size;
97 self->lookahead_size = decode(chunk, size, &self->data.lookahead);
98 }
99
100 if (self->data.lookahead == TS_DECODE_ERROR) {
101 self->lookahead_size = 1;
102 }
103}
104
105static void ts_lexer_goto(Lexer *self, Length position) {
106 self->current_position = position;
107
108 // Move to the first valid position at or after the given position.
109 bool found_included_range = false;
110 for (unsigned i = 0; i < self->included_range_count; i++) {
111 TSRange *included_range = &self->included_ranges[i];
112 if (
113 included_range->end_byte > self->current_position.bytes &&
114 included_range->end_byte > included_range->start_byte
115 ) {
116 if (included_range->start_byte >= self->current_position.bytes) {
117 self->current_position = (Length) {
118 .bytes = included_range->start_byte,
119 .extent = included_range->start_point,
120 };
121 }
122
123 self->current_included_range_index = i;
124 found_included_range = true;
125 break;
126 }
127 }
128
129 if (found_included_range) {
130 // If the current position is outside of the current chunk of text,
131 // then clear out the current chunk of text.
132 if (self->chunk && (
133 self->current_position.bytes < self->chunk_start ||
134 self->current_position.bytes >= self->chunk_start + self->chunk_size
135 )) {
136 ts_lexer__clear_chunk(self);
137 }
138
139 self->lookahead_size = 0;
140 self->data.lookahead = '\0';
141 }
142
143 // If the given position is beyond any of included ranges, move to the EOF
144 // state - past the end of the included ranges.
145 else {
146 self->current_included_range_index = self->included_range_count;
147 TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
148 self->current_position = (Length) {
149 .bytes = last_included_range->end_byte,
150 .extent = last_included_range->end_point,
151 };
152 ts_lexer__clear_chunk(self);
153 self->lookahead_size = 1;
154 self->data.lookahead = '\0';
155 }
156}
157
158// Intended to be called only from functions that control logging.
159static void ts_lexer__do_advance(Lexer *self, bool skip) {
160 if (self->lookahead_size) {
161 self->current_position.bytes += self->lookahead_size;
162 if (self->data.lookahead == '\n') {
163 self->current_position.extent.row++;
164 self->current_position.extent.column = 0;
165 } else {
166 self->current_position.extent.column += self->lookahead_size;
167 }
168 }
169
170 const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
171 while (
172 self->current_position.bytes >= current_range->end_byte ||
173 current_range->end_byte == current_range->start_byte
174 ) {
175 if (self->current_included_range_index < self->included_range_count) {
176 self->current_included_range_index++;
177 }
178 if (self->current_included_range_index < self->included_range_count) {
179 current_range++;
180 self->current_position = (Length) {
181 current_range->start_byte,
182 current_range->start_point,
183 };
184 } else {
185 current_range = NULL;
186 break;
187 }
188 }
189
190 if (skip) self->token_start_position = self->current_position;
191
192 if (current_range) {
193 if (
194 self->current_position.bytes < self->chunk_start ||
195 self->current_position.bytes >= self->chunk_start + self->chunk_size
196 ) {
197 ts_lexer__get_chunk(self);
198 }
199 ts_lexer__get_lookahead(self);
200 } else {
201 ts_lexer__clear_chunk(self);
202 self->data.lookahead = '\0';
203 self->lookahead_size = 1;
204 }
205}
206
207// Advance to the next character in the source code, retrieving a new
208// chunk of source code if needed.
209static void ts_lexer__advance(TSLexer *_self, bool skip) {
210 Lexer *self = (Lexer *)_self;
211 if (!self->chunk) return;
212
213 if (skip) {
214 LOG("skip", self->data.lookahead)
215 } else {
216 LOG("consume", self->data.lookahead)
217 }
218
219 ts_lexer__do_advance(self, skip);
220}
221
222// Mark that a token match has completed. This can be called multiple
223// times if a longer match is found later.
224static void ts_lexer__mark_end(TSLexer *_self) {
225 Lexer *self = (Lexer *)_self;
226 if (!ts_lexer__eof(&self->data)) {
227 // If the lexer is right at the beginning of included range,
228 // then the token should be considered to end at the *end* of the
229 // previous included range, rather than here.
230 TSRange *current_included_range = &self->included_ranges[
231 self->current_included_range_index
232 ];
233 if (
234 self->current_included_range_index > 0 &&
235 self->current_position.bytes == current_included_range->start_byte
236 ) {
237 TSRange *previous_included_range = current_included_range - 1;
238 self->token_end_position = (Length) {
239 previous_included_range->end_byte,
240 previous_included_range->end_point,
241 };
242 return;
243 }
244 }
245 self->token_end_position = self->current_position;
246}
247
248static uint32_t ts_lexer__get_column(TSLexer *_self) {
249 Lexer *self = (Lexer *)_self;
250
251 uint32_t goal_byte = self->current_position.bytes;
252
253 self->did_get_column = true;
254 self->current_position.bytes -= self->current_position.extent.column;
255 self->current_position.extent.column = 0;
256
257 if (self->current_position.bytes < self->chunk_start) {
258 ts_lexer__get_chunk(self);
259 }
260
261 uint32_t result = 0;
262 if (!ts_lexer__eof(_self)) {
263 ts_lexer__get_lookahead(self);
264 while (self->current_position.bytes < goal_byte && self->chunk) {
265 result++;
266 ts_lexer__do_advance(self, false);
267 if (ts_lexer__eof(_self)) break;
268 }
269 }
270
271 return result;
272}
273
274// Is the lexer at a boundary between two disjoint included ranges of
275// source code? This is exposed as an API because some languages' external
276// scanners need to perform custom actions at these boundaries.
277static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
278 const Lexer *self = (const Lexer *)_self;
279 if (self->current_included_range_index < self->included_range_count) {
280 TSRange *current_range = &self->included_ranges[self->current_included_range_index];
281 return self->current_position.bytes == current_range->start_byte;
282 } else {
283 return false;
284 }
285}
286
287void ts_lexer_init(Lexer *self) {
288 *self = (Lexer) {
289 .data = {
290 // The lexer's methods are stored as struct fields so that generated
291 // parsers can call them without needing to be linked against this
292 // library.
293 .advance = ts_lexer__advance,
294 .mark_end = ts_lexer__mark_end,
295 .get_column = ts_lexer__get_column,
296 .is_at_included_range_start = ts_lexer__is_at_included_range_start,
297 .eof = ts_lexer__eof,
298 .lookahead = 0,
299 .result_symbol = 0,
300 },
301 .chunk = NULL,
302 .chunk_size = 0,
303 .chunk_start = 0,
304 .current_position = {0, {0, 0}},
305 .logger = {
306 .payload = NULL,
307 .log = NULL
308 },
309 .included_ranges = NULL,
310 .included_range_count = 0,
311 .current_included_range_index = 0,
312 };
313 ts_lexer_set_included_ranges(self, NULL, 0);
314}
315
316void ts_lexer_delete(Lexer *self) {
317 ts_free(self->included_ranges);
318}
319
320void ts_lexer_set_input(Lexer *self, TSInput input) {
321 self->input = input;
322 ts_lexer__clear_chunk(self);
323 ts_lexer_goto(self, self->current_position);
324}
325
326// Move the lexer to the given position. This doesn't do any work
327// if the parser is already at the given position.
328void ts_lexer_reset(Lexer *self, Length position) {
329 if (position.bytes != self->current_position.bytes) {
330 ts_lexer_goto(self, position);
331 }
332}
333
334void ts_lexer_start(Lexer *self) {
335 self->token_start_position = self->current_position;
336 self->token_end_position = LENGTH_UNDEFINED;
337 self->data.result_symbol = 0;
338 self->did_get_column = false;
339 if (!ts_lexer__eof(&self->data)) {
340 if (!self->chunk_size) ts_lexer__get_chunk(self);
341 if (!self->lookahead_size) ts_lexer__get_lookahead(self);
342 if (
343 self->current_position.bytes == 0 &&
344 self->data.lookahead == BYTE_ORDER_MARK
345 ) ts_lexer__advance(&self->data, true);
346 }
347}
348
349void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
350 if (length_is_undefined(self->token_end_position)) {
351 ts_lexer__mark_end(&self->data);
352 }
353
354 // If the token ended at an included range boundary, then its end position
355 // will have been reset to the end of the preceding range. Reset the start
356 // position to match.
357 if (self->token_end_position.bytes < self->token_start_position.bytes) {
358 self->token_start_position = self->token_end_position;
359 }
360
361 uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
362
363 // In order to determine that a byte sequence is invalid UTF8 or UTF16,
364 // the character decoding algorithm may have looked at the following byte.
365 // Therefore, the next byte *after* the current (invalid) character
366 // affects the interpretation of the current character.
367 if (self->data.lookahead == TS_DECODE_ERROR) {
368 current_lookahead_end_byte++;
369 }
370
371 if (current_lookahead_end_byte > *lookahead_end_byte) {
372 *lookahead_end_byte = current_lookahead_end_byte;
373 }
374}
375
376void ts_lexer_advance_to_end(Lexer *self) {
377 while (self->chunk) {
378 ts_lexer__advance(&self->data, false);
379 }
380}
381
382void ts_lexer_mark_end(Lexer *self) {
383 ts_lexer__mark_end(&self->data);
384}
385
386bool ts_lexer_set_included_ranges(
387 Lexer *self,
388 const TSRange *ranges,
389 uint32_t count
390) {
391 if (count == 0 || !ranges) {
392 ranges = &DEFAULT_RANGE;
393 count = 1;
394 } else {
395 uint32_t previous_byte = 0;
396 for (unsigned i = 0; i < count; i++) {
397 const TSRange *range = &ranges[i];
398 if (
399 range->start_byte < previous_byte ||
400 range->end_byte < range->start_byte
401 ) return false;
402 previous_byte = range->end_byte;
403 }
404 }
405
406 size_t size = count * sizeof(TSRange);
407 self->included_ranges = ts_realloc(self->included_ranges, size);
408 memcpy(self->included_ranges, ranges, size);
409 self->included_range_count = count;
410 ts_lexer_goto(self, self->current_position);
411 return true;
412}
413
414TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
415 *count = self->included_range_count;
416 return self->included_ranges;
417}
418
419#undef LOG
diff --git a/vendor/tree-sitter/lib/src/lexer.h b/vendor/tree-sitter/lib/src/lexer.h
new file mode 100644
index 0000000..c1a5bfd
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/lexer.h
@@ -0,0 +1,49 @@
1#ifndef TREE_SITTER_LEXER_H_
2#define TREE_SITTER_LEXER_H_
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8#include "./length.h"
9#include "./subtree.h"
10#include "tree_sitter/api.h"
11#include "tree_sitter/parser.h"
12
13typedef struct {
14 TSLexer data;
15 Length current_position;
16 Length token_start_position;
17 Length token_end_position;
18
19 TSRange *included_ranges;
20 const char *chunk;
21 TSInput input;
22 TSLogger logger;
23
24 uint32_t included_range_count;
25 uint32_t current_included_range_index;
26 uint32_t chunk_start;
27 uint32_t chunk_size;
28 uint32_t lookahead_size;
29 bool did_get_column;
30
31 char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
32} Lexer;
33
34void ts_lexer_init(Lexer *);
35void ts_lexer_delete(Lexer *);
36void ts_lexer_set_input(Lexer *, TSInput);
37void ts_lexer_reset(Lexer *, Length);
38void ts_lexer_start(Lexer *);
39void ts_lexer_finish(Lexer *, uint32_t *);
40void ts_lexer_advance_to_end(Lexer *);
41void ts_lexer_mark_end(Lexer *);
42bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
43TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
44
45#ifdef __cplusplus
46}
47#endif
48
49#endif // TREE_SITTER_LEXER_H_
diff --git a/vendor/tree-sitter/lib/src/lib.c b/vendor/tree-sitter/lib/src/lib.c
new file mode 100644
index 0000000..5aab20d
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/lib.c
@@ -0,0 +1,18 @@
1// The Tree-sitter library can be built by compiling this one source file.
2//
3// The following directories must be added to the include path:
4// - include
5
6#define _POSIX_C_SOURCE 200112L
7
8#include "./alloc.c"
9#include "./get_changed_ranges.c"
10#include "./language.c"
11#include "./lexer.c"
12#include "./node.c"
13#include "./parser.c"
14#include "./query.c"
15#include "./stack.c"
16#include "./subtree.c"
17#include "./tree_cursor.c"
18#include "./tree.c"
diff --git a/vendor/tree-sitter/lib/src/node.c b/vendor/tree-sitter/lib/src/node.c
new file mode 100644
index 0000000..546b909
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/node.c
@@ -0,0 +1,767 @@
1#include <stdbool.h>
2#include "./subtree.h"
3#include "./tree.h"
4#include "./language.h"
5
6typedef struct {
7 Subtree parent;
8 const TSTree *tree;
9 Length position;
10 uint32_t child_index;
11 uint32_t structural_child_index;
12 const TSSymbol *alias_sequence;
13} NodeChildIterator;
14
15// TSNode - constructors
16
17TSNode ts_node_new(
18 const TSTree *tree,
19 const Subtree *subtree,
20 Length position,
21 TSSymbol alias
22) {
23 return (TSNode) {
24 {position.bytes, position.extent.row, position.extent.column, alias},
25 subtree,
26 tree,
27 };
28}
29
30static inline TSNode ts_node__null(void) {
31 return ts_node_new(NULL, NULL, length_zero(), 0);
32}
33
34// TSNode - accessors
35
36uint32_t ts_node_start_byte(TSNode self) {
37 return self.context[0];
38}
39
40TSPoint ts_node_start_point(TSNode self) {
41 return (TSPoint) {self.context[1], self.context[2]};
42}
43
44static inline uint32_t ts_node__alias(const TSNode *self) {
45 return self->context[3];
46}
47
48static inline Subtree ts_node__subtree(TSNode self) {
49 return *(const Subtree *)self.id;
50}
51
52// NodeChildIterator
53
54static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
55 Subtree subtree = ts_node__subtree(*node);
56 if (ts_subtree_child_count(subtree) == 0) {
57 return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL};
58 }
59 const TSSymbol *alias_sequence = ts_language_alias_sequence(
60 node->tree->language,
61 subtree.ptr->production_id
62 );
63 return (NodeChildIterator) {
64 .tree = node->tree,
65 .parent = subtree,
66 .position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
67 .child_index = 0,
68 .structural_child_index = 0,
69 .alias_sequence = alias_sequence,
70 };
71}
72
73static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
74 return self->child_index == self->parent.ptr->child_count;
75}
76
77static inline bool ts_node_child_iterator_next(
78 NodeChildIterator *self,
79 TSNode *result
80) {
81 if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
82 const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
83 TSSymbol alias_symbol = 0;
84 if (!ts_subtree_extra(*child)) {
85 if (self->alias_sequence) {
86 alias_symbol = self->alias_sequence[self->structural_child_index];
87 }
88 self->structural_child_index++;
89 }
90 if (self->child_index > 0) {
91 self->position = length_add(self->position, ts_subtree_padding(*child));
92 }
93 *result = ts_node_new(
94 self->tree,
95 child,
96 self->position,
97 alias_symbol
98 );
99 self->position = length_add(self->position, ts_subtree_size(*child));
100 self->child_index++;
101 return true;
102}
103
104// TSNode - private
105
106static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
107 Subtree tree = ts_node__subtree(self);
108 if (include_anonymous) {
109 return ts_subtree_visible(tree) || ts_node__alias(&self);
110 } else {
111 TSSymbol alias = ts_node__alias(&self);
112 if (alias) {
113 return ts_language_symbol_metadata(self.tree->language, alias).named;
114 } else {
115 return ts_subtree_visible(tree) && ts_subtree_named(tree);
116 }
117 }
118}
119
120static inline uint32_t ts_node__relevant_child_count(
121 TSNode self,
122 bool include_anonymous
123) {
124 Subtree tree = ts_node__subtree(self);
125 if (ts_subtree_child_count(tree) > 0) {
126 if (include_anonymous) {
127 return tree.ptr->visible_child_count;
128 } else {
129 return tree.ptr->named_child_count;
130 }
131 } else {
132 return 0;
133 }
134}
135
136static inline TSNode ts_node__child(
137 TSNode self,
138 uint32_t child_index,
139 bool include_anonymous
140) {
141 TSNode result = self;
142 bool did_descend = true;
143
144 while (did_descend) {
145 did_descend = false;
146
147 TSNode child;
148 uint32_t index = 0;
149 NodeChildIterator iterator = ts_node_iterate_children(&result);
150 while (ts_node_child_iterator_next(&iterator, &child)) {
151 if (ts_node__is_relevant(child, include_anonymous)) {
152 if (index == child_index) {
153 return child;
154 }
155 index++;
156 } else {
157 uint32_t grandchild_index = child_index - index;
158 uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous);
159 if (grandchild_index < grandchild_count) {
160 did_descend = true;
161 result = child;
162 child_index = grandchild_index;
163 break;
164 }
165 index += grandchild_count;
166 }
167 }
168 }
169
170 return ts_node__null();
171}
172
173static bool ts_subtree_has_trailing_empty_descendant(
174 Subtree self,
175 Subtree other
176) {
177 for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
178 Subtree child = ts_subtree_children(self)[i];
179 if (ts_subtree_total_bytes(child) > 0) break;
180 if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
181 return true;
182 }
183 }
184 return false;
185}
186
187static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) {
188 Subtree self_subtree = ts_node__subtree(self);
189 bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0;
190 uint32_t target_end_byte = ts_node_end_byte(self);
191
192 TSNode node = ts_node_parent(self);
193 TSNode earlier_node = ts_node__null();
194 bool earlier_node_is_relevant = false;
195
196 while (!ts_node_is_null(node)) {
197 TSNode earlier_child = ts_node__null();
198 bool earlier_child_is_relevant = false;
199 bool found_child_containing_target = false;
200
201 TSNode child;
202 NodeChildIterator iterator = ts_node_iterate_children(&node);
203 while (ts_node_child_iterator_next(&iterator, &child)) {
204 if (child.id == self.id) break;
205 if (iterator.position.bytes > target_end_byte) {
206 found_child_containing_target = true;
207 break;
208 }
209
210 if (iterator.position.bytes == target_end_byte &&
211 (!self_is_empty ||
212 ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) {
213 found_child_containing_target = true;
214 break;
215 }
216
217 if (ts_node__is_relevant(child, include_anonymous)) {
218 earlier_child = child;
219 earlier_child_is_relevant = true;
220 } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
221 earlier_child = child;
222 earlier_child_is_relevant = false;
223 }
224 }
225
226 if (found_child_containing_target) {
227 if (!ts_node_is_null(earlier_child)) {
228 earlier_node = earlier_child;
229 earlier_node_is_relevant = earlier_child_is_relevant;
230 }
231 node = child;
232 } else if (earlier_child_is_relevant) {
233 return earlier_child;
234 } else if (!ts_node_is_null(earlier_child)) {
235 node = earlier_child;
236 } else if (earlier_node_is_relevant) {
237 return earlier_node;
238 } else {
239 node = earlier_node;
240 earlier_node = ts_node__null();
241 earlier_node_is_relevant = false;
242 }
243 }
244
245 return ts_node__null();
246}
247
248static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) {
249 uint32_t target_end_byte = ts_node_end_byte(self);
250
251 TSNode node = ts_node_parent(self);
252 TSNode later_node = ts_node__null();
253 bool later_node_is_relevant = false;
254
255 while (!ts_node_is_null(node)) {
256 TSNode later_child = ts_node__null();
257 bool later_child_is_relevant = false;
258 TSNode child_containing_target = ts_node__null();
259
260 TSNode child;
261 NodeChildIterator iterator = ts_node_iterate_children(&node);
262 while (ts_node_child_iterator_next(&iterator, &child)) {
263 if (iterator.position.bytes < target_end_byte) continue;
264 if (ts_node_start_byte(child) <= ts_node_start_byte(self)) {
265 if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) {
266 child_containing_target = child;
267 }
268 } else if (ts_node__is_relevant(child, include_anonymous)) {
269 later_child = child;
270 later_child_is_relevant = true;
271 break;
272 } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
273 later_child = child;
274 later_child_is_relevant = false;
275 break;
276 }
277 }
278
279 if (!ts_node_is_null(child_containing_target)) {
280 if (!ts_node_is_null(later_child)) {
281 later_node = later_child;
282 later_node_is_relevant = later_child_is_relevant;
283 }
284 node = child_containing_target;
285 } else if (later_child_is_relevant) {
286 return later_child;
287 } else if (!ts_node_is_null(later_child)) {
288 node = later_child;
289 } else if (later_node_is_relevant) {
290 return later_node;
291 } else {
292 node = later_node;
293 }
294 }
295
296 return ts_node__null();
297}
298
299static inline TSNode ts_node__first_child_for_byte(
300 TSNode self,
301 uint32_t goal,
302 bool include_anonymous
303) {
304 TSNode node = self;
305 bool did_descend = true;
306
307 while (did_descend) {
308 did_descend = false;
309
310 TSNode child;
311 NodeChildIterator iterator = ts_node_iterate_children(&node);
312 while (ts_node_child_iterator_next(&iterator, &child)) {
313 if (ts_node_end_byte(child) > goal) {
314 if (ts_node__is_relevant(child, include_anonymous)) {
315 return child;
316 } else if (ts_node_child_count(child) > 0) {
317 did_descend = true;
318 node = child;
319 break;
320 }
321 }
322 }
323 }
324
325 return ts_node__null();
326}
327
328static inline TSNode ts_node__descendant_for_byte_range(
329 TSNode self,
330 uint32_t range_start,
331 uint32_t range_end,
332 bool include_anonymous
333) {
334 TSNode node = self;
335 TSNode last_visible_node = self;
336
337 bool did_descend = true;
338 while (did_descend) {
339 did_descend = false;
340
341 TSNode child;
342 NodeChildIterator iterator = ts_node_iterate_children(&node);
343 while (ts_node_child_iterator_next(&iterator, &child)) {
344 uint32_t node_end = iterator.position.bytes;
345
346 // The end of this node must extend far enough forward to touch
347 // the end of the range and exceed the start of the range.
348 if (node_end < range_end) continue;
349 if (node_end <= range_start) continue;
350
351 // The start of this node must extend far enough backward to
352 // touch the start of the range.
353 if (range_start < ts_node_start_byte(child)) break;
354
355 node = child;
356 if (ts_node__is_relevant(node, include_anonymous)) {
357 last_visible_node = node;
358 }
359 did_descend = true;
360 break;
361 }
362 }
363
364 return last_visible_node;
365}
366
367static inline TSNode ts_node__descendant_for_point_range(
368 TSNode self,
369 TSPoint range_start,
370 TSPoint range_end,
371 bool include_anonymous
372) {
373 TSNode node = self;
374 TSNode last_visible_node = self;
375
376 bool did_descend = true;
377 while (did_descend) {
378 did_descend = false;
379
380 TSNode child;
381 NodeChildIterator iterator = ts_node_iterate_children(&node);
382 while (ts_node_child_iterator_next(&iterator, &child)) {
383 TSPoint node_end = iterator.position.extent;
384
385 // The end of this node must extend far enough forward to touch
386 // the end of the range and exceed the start of the range.
387 if (point_lt(node_end, range_end)) continue;
388 if (point_lte(node_end, range_start)) continue;
389
390 // The start of this node must extend far enough backward to
391 // touch the start of the range.
392 if (point_lt(range_start, ts_node_start_point(child))) break;
393
394 node = child;
395 if (ts_node__is_relevant(node, include_anonymous)) {
396 last_visible_node = node;
397 }
398 did_descend = true;
399 break;
400 }
401 }
402
403 return last_visible_node;
404}
405
406// TSNode - public
407
408uint32_t ts_node_end_byte(TSNode self) {
409 return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes;
410}
411
412TSPoint ts_node_end_point(TSNode self) {
413 return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent);
414}
415
416TSSymbol ts_node_symbol(TSNode self) {
417 TSSymbol symbol = ts_node__alias(&self);
418 if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
419 return ts_language_public_symbol(self.tree->language, symbol);
420}
421
422const char *ts_node_type(TSNode self) {
423 TSSymbol symbol = ts_node__alias(&self);
424 if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
425 return ts_language_symbol_name(self.tree->language, symbol);
426}
427
428const TSLanguage *ts_node_language(TSNode self) {
429 return self.tree->language;
430}
431
432TSSymbol ts_node_grammar_symbol(TSNode self) {
433 return ts_subtree_symbol(ts_node__subtree(self));
434}
435
436const char *ts_node_grammar_type(TSNode self) {
437 TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self));
438 return ts_language_symbol_name(self.tree->language, symbol);
439}
440
441char *ts_node_string(TSNode self) {
442 return ts_subtree_string(ts_node__subtree(self), self.tree->language, false);
443}
444
445bool ts_node_eq(TSNode self, TSNode other) {
446 return self.tree == other.tree && self.id == other.id;
447}
448
449bool ts_node_is_null(TSNode self) {
450 return self.id == 0;
451}
452
453bool ts_node_is_extra(TSNode self) {
454 return ts_subtree_extra(ts_node__subtree(self));
455}
456
457bool ts_node_is_named(TSNode self) {
458 TSSymbol alias = ts_node__alias(&self);
459 return alias
460 ? ts_language_symbol_metadata(self.tree->language, alias).named
461 : ts_subtree_named(ts_node__subtree(self));
462}
463
464bool ts_node_is_missing(TSNode self) {
465 return ts_subtree_missing(ts_node__subtree(self));
466}
467
468bool ts_node_has_changes(TSNode self) {
469 return ts_subtree_has_changes(ts_node__subtree(self));
470}
471
472bool ts_node_has_error(TSNode self) {
473 return ts_subtree_error_cost(ts_node__subtree(self)) > 0;
474}
475
476bool ts_node_is_error(TSNode self) {
477 TSSymbol symbol = ts_node_symbol(self);
478 return symbol == ts_builtin_sym_error;
479}
480
481uint32_t ts_node_descendant_count(TSNode self) {
482 return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1;
483}
484
485TSStateId ts_node_parse_state(TSNode self) {
486 return ts_subtree_parse_state(ts_node__subtree(self));
487}
488
489TSStateId ts_node_next_parse_state(TSNode self) {
490 const TSLanguage *language = self.tree->language;
491 uint16_t state = ts_node_parse_state(self);
492 if (state == TS_TREE_STATE_NONE) {
493 return TS_TREE_STATE_NONE;
494 }
495 uint16_t symbol = ts_node_grammar_symbol(self);
496 return ts_language_next_state(language, state, symbol);
497}
498
499TSNode ts_node_parent(TSNode self) {
500 TSNode node = ts_tree_root_node(self.tree);
501 uint32_t end_byte = ts_node_end_byte(self);
502 if (node.id == self.id) return ts_node__null();
503
504 TSNode last_visible_node = node;
505 bool did_descend = true;
506 while (did_descend) {
507 did_descend = false;
508
509 TSNode child;
510 NodeChildIterator iterator = ts_node_iterate_children(&node);
511 while (ts_node_child_iterator_next(&iterator, &child)) {
512 if (
513 ts_node_start_byte(child) > ts_node_start_byte(self) ||
514 child.id == self.id
515 ) break;
516 if (iterator.position.bytes >= end_byte) {
517 node = child;
518 if (ts_node__is_relevant(child, true)) {
519 last_visible_node = node;
520 }
521 did_descend = true;
522 break;
523 }
524 }
525 }
526
527 return last_visible_node;
528}
529
530TSNode ts_node_child(TSNode self, uint32_t child_index) {
531 return ts_node__child(self, child_index, true);
532}
533
534TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
535 return ts_node__child(self, child_index, false);
536}
537
538TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
539recur:
540 if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
541
542 const TSFieldMapEntry *field_map, *field_map_end;
543 ts_language_field_map(
544 self.tree->language,
545 ts_node__subtree(self).ptr->production_id,
546 &field_map,
547 &field_map_end
548 );
549 if (field_map == field_map_end) return ts_node__null();
550
551 // The field mappings are sorted by their field id. Scan all
552 // the mappings to find the ones for the given field id.
553 while (field_map->field_id < field_id) {
554 field_map++;
555 if (field_map == field_map_end) return ts_node__null();
556 }
557 while (field_map_end[-1].field_id > field_id) {
558 field_map_end--;
559 if (field_map == field_map_end) return ts_node__null();
560 }
561
562 TSNode child;
563 NodeChildIterator iterator = ts_node_iterate_children(&self);
564 while (ts_node_child_iterator_next(&iterator, &child)) {
565 if (!ts_subtree_extra(ts_node__subtree(child))) {
566 uint32_t index = iterator.structural_child_index - 1;
567 if (index < field_map->child_index) continue;
568
569 // Hidden nodes' fields are "inherited" by their visible parent.
570 if (field_map->inherited) {
571
572 // If this is the *last* possible child node for this field,
573 // then perform a tail call to avoid recursion.
574 if (field_map + 1 == field_map_end) {
575 self = child;
576 goto recur;
577 }
578
579 // Otherwise, descend into this child, but if it doesn't contain
580 // the field, continue searching subsequent children.
581 else {
582 TSNode result = ts_node_child_by_field_id(child, field_id);
583 if (result.id) return result;
584 field_map++;
585 if (field_map == field_map_end) return ts_node__null();
586 }
587 }
588
589 else if (ts_node__is_relevant(child, true)) {
590 return child;
591 }
592
593 // If the field refers to a hidden node with visible children,
594 // return the first visible child.
595 else if (ts_node_child_count(child) > 0 ) {
596 return ts_node_child(child, 0);
597 }
598
599 // Otherwise, continue searching subsequent children.
600 else {
601 field_map++;
602 if (field_map == field_map_end) return ts_node__null();
603 }
604 }
605 }
606
607 return ts_node__null();
608}
609
610static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) {
611 const TSFieldMapEntry *field_map, *field_map_end;
612 ts_language_field_map(
613 self.tree->language,
614 ts_node__subtree(self).ptr->production_id,
615 &field_map,
616 &field_map_end
617 );
618 for (; field_map != field_map_end; field_map++) {
619 if (!field_map->inherited && field_map->child_index == structural_child_index) {
620 return self.tree->language->field_names[field_map->field_id];
621 }
622 }
623 return NULL;
624}
625
626const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) {
627 TSNode result = self;
628 bool did_descend = true;
629 const char *inherited_field_name = NULL;
630
631 while (did_descend) {
632 did_descend = false;
633
634 TSNode child;
635 uint32_t index = 0;
636 NodeChildIterator iterator = ts_node_iterate_children(&result);
637 while (ts_node_child_iterator_next(&iterator, &child)) {
638 if (ts_node__is_relevant(child, true)) {
639 if (index == child_index) {
640 const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
641 if (field_name) return field_name;
642 return inherited_field_name;
643 }
644 index++;
645 } else {
646 uint32_t grandchild_index = child_index - index;
647 uint32_t grandchild_count = ts_node__relevant_child_count(child, true);
648 if (grandchild_index < grandchild_count) {
649 const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
650 if (field_name) inherited_field_name = field_name;
651
652 did_descend = true;
653 result = child;
654 child_index = grandchild_index;
655 break;
656 }
657 index += grandchild_count;
658 }
659 }
660 }
661
662 return NULL;
663}
664
665TSNode ts_node_child_by_field_name(
666 TSNode self,
667 const char *name,
668 uint32_t name_length
669) {
670 TSFieldId field_id = ts_language_field_id_for_name(
671 self.tree->language,
672 name,
673 name_length
674 );
675 return ts_node_child_by_field_id(self, field_id);
676}
677
678uint32_t ts_node_child_count(TSNode self) {
679 Subtree tree = ts_node__subtree(self);
680 if (ts_subtree_child_count(tree) > 0) {
681 return tree.ptr->visible_child_count;
682 } else {
683 return 0;
684 }
685}
686
687uint32_t ts_node_named_child_count(TSNode self) {
688 Subtree tree = ts_node__subtree(self);
689 if (ts_subtree_child_count(tree) > 0) {
690 return tree.ptr->named_child_count;
691 } else {
692 return 0;
693 }
694}
695
696TSNode ts_node_next_sibling(TSNode self) {
697 return ts_node__next_sibling(self, true);
698}
699
700TSNode ts_node_next_named_sibling(TSNode self) {
701 return ts_node__next_sibling(self, false);
702}
703
704TSNode ts_node_prev_sibling(TSNode self) {
705 return ts_node__prev_sibling(self, true);
706}
707
708TSNode ts_node_prev_named_sibling(TSNode self) {
709 return ts_node__prev_sibling(self, false);
710}
711
712TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) {
713 return ts_node__first_child_for_byte(self, byte, true);
714}
715
716TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) {
717 return ts_node__first_child_for_byte(self, byte, false);
718}
719
720TSNode ts_node_descendant_for_byte_range(
721 TSNode self,
722 uint32_t start,
723 uint32_t end
724) {
725 return ts_node__descendant_for_byte_range(self, start, end, true);
726}
727
728TSNode ts_node_named_descendant_for_byte_range(
729 TSNode self,
730 uint32_t start,
731 uint32_t end
732) {
733 return ts_node__descendant_for_byte_range(self, start, end, false);
734}
735
736TSNode ts_node_descendant_for_point_range(
737 TSNode self,
738 TSPoint start,
739 TSPoint end
740) {
741 return ts_node__descendant_for_point_range(self, start, end, true);
742}
743
744TSNode ts_node_named_descendant_for_point_range(
745 TSNode self,
746 TSPoint start,
747 TSPoint end
748) {
749 return ts_node__descendant_for_point_range(self, start, end, false);
750}
751
752void ts_node_edit(TSNode *self, const TSInputEdit *edit) {
753 uint32_t start_byte = ts_node_start_byte(*self);
754 TSPoint start_point = ts_node_start_point(*self);
755
756 if (start_byte >= edit->old_end_byte) {
757 start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte);
758 start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point));
759 } else if (start_byte > edit->start_byte) {
760 start_byte = edit->new_end_byte;
761 start_point = edit->new_end_point;
762 }
763
764 self->context[0] = start_byte;
765 self->context[1] = start_point.row;
766 self->context[2] = start_point.column;
767}
diff --git a/vendor/tree-sitter/lib/src/parser.c b/vendor/tree-sitter/lib/src/parser.c
new file mode 100644
index 0000000..cc93162
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/parser.c
@@ -0,0 +1,2011 @@
1#include <time.h>
2#include <assert.h>
3#include <stdio.h>
4#include <limits.h>
5#include <stdbool.h>
6#include "tree_sitter/api.h"
7#include "./alloc.h"
8#include "./array.h"
9#include "./atomic.h"
10#include "./clock.h"
11#include "./error_costs.h"
12#include "./get_changed_ranges.h"
13#include "./language.h"
14#include "./length.h"
15#include "./lexer.h"
16#include "./reduce_action.h"
17#include "./reusable_node.h"
18#include "./stack.h"
19#include "./subtree.h"
20#include "./tree.h"
21
22#define LOG(...) \
23 if (self->lexer.logger.log || self->dot_graph_file) { \
24 snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
25 ts_parser__log(self); \
26 }
27
28#define LOG_LOOKAHEAD(symbol_name, size) \
29 if (self->lexer.logger.log || self->dot_graph_file) { \
30 char *buf = self->lexer.debug_buffer; \
31 const char *symbol = symbol_name; \
32 int off = sprintf(buf, "lexed_lookahead sym:"); \
33 for ( \
34 int i = 0; \
35 symbol[i] != '\0' \
36 && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \
37 i++ \
38 ) { \
39 switch (symbol[i]) { \
40 case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \
41 case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \
42 case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \
43 case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \
44 case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \
45 case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \
46 default: buf[off++] = symbol[i]; break; \
47 } \
48 } \
49 snprintf( \
50 buf + off, \
51 TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \
52 ", size:%u", \
53 size \
54 ); \
55 ts_parser__log(self); \
56 }
57
58#define LOG_STACK() \
59 if (self->dot_graph_file) { \
60 ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \
61 fputs("\n\n", self->dot_graph_file); \
62 }
63
64#define LOG_TREE(tree) \
65 if (self->dot_graph_file) { \
66 ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
67 fputs("\n", self->dot_graph_file); \
68 }
69
70#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
71
72#define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree))
73
74static const unsigned MAX_VERSION_COUNT = 6;
75static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
76static const unsigned MAX_SUMMARY_DEPTH = 16;
77static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
78static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
79
80typedef struct {
81 Subtree token;
82 Subtree last_external_token;
83 uint32_t byte_index;
84} TokenCache;
85
86struct TSParser {
87 Lexer lexer;
88 Stack *stack;
89 SubtreePool tree_pool;
90 const TSLanguage *language;
91 ReduceActionSet reduce_actions;
92 Subtree finished_tree;
93 SubtreeArray trailing_extras;
94 SubtreeArray trailing_extras2;
95 SubtreeArray scratch_trees;
96 TokenCache token_cache;
97 ReusableNode reusable_node;
98 void *external_scanner_payload;
99 FILE *dot_graph_file;
100 TSClock end_clock;
101 TSDuration timeout_duration;
102 unsigned accept_count;
103 unsigned operation_count;
104 const volatile size_t *cancellation_flag;
105 Subtree old_tree;
106 TSRangeArray included_range_differences;
107 unsigned included_range_difference_index;
108};
109
110typedef struct {
111 unsigned cost;
112 unsigned node_count;
113 int dynamic_precedence;
114 bool is_in_error;
115} ErrorStatus;
116
117typedef enum {
118 ErrorComparisonTakeLeft,
119 ErrorComparisonPreferLeft,
120 ErrorComparisonNone,
121 ErrorComparisonPreferRight,
122 ErrorComparisonTakeRight,
123} ErrorComparison;
124
125typedef struct {
126 const char *string;
127 uint32_t length;
128} TSStringInput;
129
130// StringInput
131
132static const char *ts_string_input_read(
133 void *_self,
134 uint32_t byte,
135 TSPoint point,
136 uint32_t *length
137) {
138 (void)point;
139 TSStringInput *self = (TSStringInput *)_self;
140 if (byte >= self->length) {
141 *length = 0;
142 return "";
143 } else {
144 *length = self->length - byte;
145 return self->string + byte;
146 }
147}
148
149// Parser - Private
150
151static void ts_parser__log(TSParser *self) {
152 if (self->lexer.logger.log) {
153 self->lexer.logger.log(
154 self->lexer.logger.payload,
155 TSLogTypeParse,
156 self->lexer.debug_buffer
157 );
158 }
159
160 if (self->dot_graph_file) {
161 fprintf(self->dot_graph_file, "graph {\nlabel=\"");
162 for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) {
163 if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file);
164 fputc(*chr, self->dot_graph_file);
165 }
166 fprintf(self->dot_graph_file, "\"\n}\n\n");
167 }
168}
169
170static bool ts_parser__breakdown_top_of_stack(
171 TSParser *self,
172 StackVersion version
173) {
174 bool did_break_down = false;
175 bool pending = false;
176
177 do {
178 StackSliceArray pop = ts_stack_pop_pending(self->stack, version);
179 if (!pop.size) break;
180
181 did_break_down = true;
182 pending = false;
183 for (uint32_t i = 0; i < pop.size; i++) {
184 StackSlice slice = pop.contents[i];
185 TSStateId state = ts_stack_state(self->stack, slice.version);
186 Subtree parent = *array_front(&slice.subtrees);
187
188 for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
189 Subtree child = ts_subtree_children(parent)[j];
190 pending = ts_subtree_child_count(child) > 0;
191
192 if (ts_subtree_is_error(child)) {
193 state = ERROR_STATE;
194 } else if (!ts_subtree_extra(child)) {
195 state = ts_language_next_state(self->language, state, ts_subtree_symbol(child));
196 }
197
198 ts_subtree_retain(child);
199 ts_stack_push(self->stack, slice.version, child, pending, state);
200 }
201
202 for (uint32_t j = 1; j < slice.subtrees.size; j++) {
203 Subtree tree = slice.subtrees.contents[j];
204 ts_stack_push(self->stack, slice.version, tree, false, state);
205 }
206
207 ts_subtree_release(&self->tree_pool, parent);
208 array_delete(&slice.subtrees);
209
210 LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent));
211 LOG_STACK();
212 }
213 } while (pending);
214
215 return did_break_down;
216}
217
218static void ts_parser__breakdown_lookahead(
219 TSParser *self,
220 Subtree *lookahead,
221 TSStateId state,
222 ReusableNode *reusable_node
223) {
224 bool did_descend = false;
225 Subtree tree = reusable_node_tree(reusable_node);
226 while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) {
227 LOG("state_mismatch sym:%s", TREE_NAME(tree));
228 reusable_node_descend(reusable_node);
229 tree = reusable_node_tree(reusable_node);
230 did_descend = true;
231 }
232
233 if (did_descend) {
234 ts_subtree_release(&self->tree_pool, *lookahead);
235 *lookahead = tree;
236 ts_subtree_retain(*lookahead);
237 }
238}
239
240static ErrorComparison ts_parser__compare_versions(
241 TSParser *self,
242 ErrorStatus a,
243 ErrorStatus b
244) {
245 (void)self;
246 if (!a.is_in_error && b.is_in_error) {
247 if (a.cost < b.cost) {
248 return ErrorComparisonTakeLeft;
249 } else {
250 return ErrorComparisonPreferLeft;
251 }
252 }
253
254 if (a.is_in_error && !b.is_in_error) {
255 if (b.cost < a.cost) {
256 return ErrorComparisonTakeRight;
257 } else {
258 return ErrorComparisonPreferRight;
259 }
260 }
261
262 if (a.cost < b.cost) {
263 if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) {
264 return ErrorComparisonTakeLeft;
265 } else {
266 return ErrorComparisonPreferLeft;
267 }
268 }
269
270 if (b.cost < a.cost) {
271 if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) {
272 return ErrorComparisonTakeRight;
273 } else {
274 return ErrorComparisonPreferRight;
275 }
276 }
277
278 if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
279 if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
280 return ErrorComparisonNone;
281}
282
283static ErrorStatus ts_parser__version_status(
284 TSParser *self,
285 StackVersion version
286) {
287 unsigned cost = ts_stack_error_cost(self->stack, version);
288 bool is_paused = ts_stack_is_paused(self->stack, version);
289 if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE;
290 return (ErrorStatus) {
291 .cost = cost,
292 .node_count = ts_stack_node_count_since_error(self->stack, version),
293 .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
294 .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE
295 };
296}
297
298static bool ts_parser__better_version_exists(
299 TSParser *self,
300 StackVersion version,
301 bool is_in_error,
302 unsigned cost
303) {
304 if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) {
305 return true;
306 }
307
308 Length position = ts_stack_position(self->stack, version);
309 ErrorStatus status = {
310 .cost = cost,
311 .is_in_error = is_in_error,
312 .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
313 .node_count = ts_stack_node_count_since_error(self->stack, version),
314 };
315
316 for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
317 if (i == version ||
318 !ts_stack_is_active(self->stack, i) ||
319 ts_stack_position(self->stack, i).bytes < position.bytes) continue;
320 ErrorStatus status_i = ts_parser__version_status(self, i);
321 switch (ts_parser__compare_versions(self, status, status_i)) {
322 case ErrorComparisonTakeRight:
323 return true;
324 case ErrorComparisonPreferRight:
325 if (ts_stack_can_merge(self->stack, i, version)) return true;
326 break;
327 default:
328 break;
329 }
330 }
331
332 return false;
333}
334
335static void ts_parser__restore_external_scanner(
336 TSParser *self,
337 Subtree external_token
338) {
339 if (external_token.ptr) {
340 self->language->external_scanner.deserialize(
341 self->external_scanner_payload,
342 ts_external_scanner_state_data(&external_token.ptr->external_scanner_state),
343 external_token.ptr->external_scanner_state.length
344 );
345 } else {
346 self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
347 }
348}
349
350static bool ts_parser__can_reuse_first_leaf(
351 TSParser *self,
352 TSStateId state,
353 Subtree tree,
354 TableEntry *table_entry
355) {
356 TSLexMode current_lex_mode = self->language->lex_modes[state];
357 TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree);
358 TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
359 TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state];
360
361 // At the end of a non-terminal extra node, the lexer normally returns
362 // NULL, which indicates that the parser should look for a reduce action
363 // at symbol `0`. Avoid reusing tokens in this situation to ensure that
364 // the same thing happens when incrementally reparsing.
365 if (current_lex_mode.lex_state == (uint16_t)(-1)) return false;
366
367 // If the token was created in a state with the same set of lookaheads, it is reusable.
368 if (
369 table_entry->action_count > 0 &&
370 memcmp(&leaf_lex_mode, &current_lex_mode, sizeof(TSLexMode)) == 0 &&
371 (
372 leaf_symbol != self->language->keyword_capture_token ||
373 (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state)
374 )
375 ) return true;
376
377 // Empty tokens are not reusable in states with different lookaheads.
378 if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false;
379
380 // If the current state allows external tokens or other tokens that conflict with this
381 // token, this token is not reusable.
382 return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
383}
384
385static Subtree ts_parser__lex(
386 TSParser *self,
387 StackVersion version,
388 TSStateId parse_state
389) {
390 TSLexMode lex_mode = self->language->lex_modes[parse_state];
391 if (lex_mode.lex_state == (uint16_t)-1) {
392 LOG("no_lookahead_after_non_terminal_extra");
393 return NULL_SUBTREE;
394 }
395
396 const Length start_position = ts_stack_position(self->stack, version);
397 const Subtree external_token = ts_stack_last_external_token(self->stack, version);
398 const bool *valid_external_tokens = ts_language_enabled_external_tokens(
399 self->language,
400 lex_mode.external_lex_state
401 );
402
403 bool found_external_token = false;
404 bool error_mode = parse_state == ERROR_STATE;
405 bool skipped_error = false;
406 bool called_get_column = false;
407 int32_t first_error_character = 0;
408 Length error_start_position = length_zero();
409 Length error_end_position = length_zero();
410 uint32_t lookahead_end_byte = 0;
411 uint32_t external_scanner_state_len = 0;
412 bool external_scanner_state_changed = false;
413 ts_lexer_reset(&self->lexer, start_position);
414
415 for (;;) {
416 Length current_position = self->lexer.current_position;
417
418 if (valid_external_tokens) {
419 LOG(
420 "lex_external state:%d, row:%u, column:%u",
421 lex_mode.external_lex_state,
422 current_position.extent.row,
423 current_position.extent.column
424 );
425 ts_lexer_start(&self->lexer);
426 ts_parser__restore_external_scanner(self, external_token);
427 bool found_token = self->language->external_scanner.scan(
428 self->external_scanner_payload,
429 &self->lexer.data,
430 valid_external_tokens
431 );
432 ts_lexer_finish(&self->lexer, &lookahead_end_byte);
433
434 if (found_token) {
435 external_scanner_state_len = self->language->external_scanner.serialize(
436 self->external_scanner_payload,
437 self->lexer.debug_buffer
438 );
439 external_scanner_state_changed = !ts_external_scanner_state_eq(
440 ts_subtree_external_scanner_state(external_token),
441 self->lexer.debug_buffer,
442 external_scanner_state_len
443 );
444
445 // When recovering from an error, ignore any zero-length external tokens
446 // unless they have changed the external scanner's state. This helps to
447 // avoid infinite loops which could otherwise occur, because the lexer is
448 // looking for any possible token, instead of looking for the specific set of
449 // tokens that are valid in some parse state.
450 //
451 // Note that it's possible that the token end position may be *before* the
452 // original position of the lexer because of the way that tokens are positioned
453 // at included range boundaries: when a token is terminated at the start of
454 // an included range, it is marked as ending at the *end* of the preceding
455 // included range.
456 if (
457 self->lexer.token_end_position.bytes <= current_position.bytes &&
458 (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) &&
459 !external_scanner_state_changed
460 ) {
461 LOG(
462 "ignore_empty_external_token symbol:%s",
463 SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol])
464 )
465 found_token = false;
466 }
467 }
468
469 if (found_token) {
470 found_external_token = true;
471 called_get_column = self->lexer.did_get_column;
472 break;
473 }
474
475 ts_lexer_reset(&self->lexer, current_position);
476 }
477
478 LOG(
479 "lex_internal state:%d, row:%u, column:%u",
480 lex_mode.lex_state,
481 current_position.extent.row,
482 current_position.extent.column
483 );
484 ts_lexer_start(&self->lexer);
485 bool found_token = self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
486 ts_lexer_finish(&self->lexer, &lookahead_end_byte);
487 if (found_token) break;
488
489 if (!error_mode) {
490 error_mode = true;
491 lex_mode = self->language->lex_modes[ERROR_STATE];
492 valid_external_tokens = ts_language_enabled_external_tokens(
493 self->language,
494 lex_mode.external_lex_state
495 );
496 ts_lexer_reset(&self->lexer, start_position);
497 continue;
498 }
499
500 if (!skipped_error) {
501 LOG("skip_unrecognized_character");
502 skipped_error = true;
503 error_start_position = self->lexer.token_start_position;
504 error_end_position = self->lexer.token_start_position;
505 first_error_character = self->lexer.data.lookahead;
506 }
507
508 if (self->lexer.current_position.bytes == error_end_position.bytes) {
509 if (self->lexer.data.eof(&self->lexer.data)) {
510 self->lexer.data.result_symbol = ts_builtin_sym_error;
511 break;
512 }
513 self->lexer.data.advance(&self->lexer.data, false);
514 }
515
516 error_end_position = self->lexer.current_position;
517 }
518
519 Subtree result;
520 if (skipped_error) {
521 Length padding = length_sub(error_start_position, start_position);
522 Length size = length_sub(error_end_position, error_start_position);
523 uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes;
524 result = ts_subtree_new_error(
525 &self->tree_pool,
526 first_error_character,
527 padding,
528 size,
529 lookahead_bytes,
530 parse_state,
531 self->language
532 );
533 } else {
534 bool is_keyword = false;
535 TSSymbol symbol = self->lexer.data.result_symbol;
536 Length padding = length_sub(self->lexer.token_start_position, start_position);
537 Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
538 uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes;
539
540 if (found_external_token) {
541 symbol = self->language->external_scanner.symbol_map[symbol];
542 } else if (symbol == self->language->keyword_capture_token && symbol != 0) {
543 uint32_t end_byte = self->lexer.token_end_position.bytes;
544 ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
545 ts_lexer_start(&self->lexer);
546 if (
547 self->language->keyword_lex_fn(&self->lexer.data, 0) &&
548 self->lexer.token_end_position.bytes == end_byte &&
549 ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol)
550 ) {
551 is_keyword = true;
552 symbol = self->lexer.data.result_symbol;
553 }
554 }
555
556 result = ts_subtree_new_leaf(
557 &self->tree_pool,
558 symbol,
559 padding,
560 size,
561 lookahead_bytes,
562 parse_state,
563 found_external_token,
564 called_get_column,
565 is_keyword,
566 self->language
567 );
568
569 if (found_external_token) {
570 MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result);
571 ts_external_scanner_state_init(
572 &mut_result.ptr->external_scanner_state,
573 self->lexer.debug_buffer,
574 external_scanner_state_len
575 );
576 mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed;
577 }
578 }
579
580 LOG_LOOKAHEAD(
581 SYM_NAME(ts_subtree_symbol(result)),
582 ts_subtree_total_size(result).bytes
583 );
584 return result;
585}
586
587static Subtree ts_parser__get_cached_token(
588 TSParser *self,
589 TSStateId state,
590 size_t position,
591 Subtree last_external_token,
592 TableEntry *table_entry
593) {
594 TokenCache *cache = &self->token_cache;
595 if (
596 cache->token.ptr && cache->byte_index == position &&
597 ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)
598 ) {
599 ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry);
600 if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) {
601 ts_subtree_retain(cache->token);
602 return cache->token;
603 }
604 }
605 return NULL_SUBTREE;
606}
607
608static void ts_parser__set_cached_token(
609 TSParser *self,
610 uint32_t byte_index,
611 Subtree last_external_token,
612 Subtree token
613) {
614 TokenCache *cache = &self->token_cache;
615 if (token.ptr) ts_subtree_retain(token);
616 if (last_external_token.ptr) ts_subtree_retain(last_external_token);
617 if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token);
618 if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token);
619 cache->token = token;
620 cache->byte_index = byte_index;
621 cache->last_external_token = last_external_token;
622}
623
624static bool ts_parser__has_included_range_difference(
625 const TSParser *self,
626 uint32_t start_position,
627 uint32_t end_position
628) {
629 return ts_range_array_intersects(
630 &self->included_range_differences,
631 self->included_range_difference_index,
632 start_position,
633 end_position
634 );
635}
636
637static Subtree ts_parser__reuse_node(
638 TSParser *self,
639 StackVersion version,
640 TSStateId *state,
641 uint32_t position,
642 Subtree last_external_token,
643 TableEntry *table_entry
644) {
645 Subtree result;
646 while ((result = reusable_node_tree(&self->reusable_node)).ptr) {
647 uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
648 uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
649
650 // Do not reuse an EOF node if the included ranges array has changes
651 // later on in the file.
652 if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX;
653
654 if (byte_offset > position) {
655 LOG("before_reusable_node symbol:%s", TREE_NAME(result));
656 break;
657 }
658
659 if (byte_offset < position) {
660 LOG("past_reusable_node symbol:%s", TREE_NAME(result));
661 if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) {
662 reusable_node_advance(&self->reusable_node);
663 }
664 continue;
665 }
666
667 if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) {
668 LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result));
669 reusable_node_advance(&self->reusable_node);
670 continue;
671 }
672
673 const char *reason = NULL;
674 if (ts_subtree_has_changes(result)) {
675 reason = "has_changes";
676 } else if (ts_subtree_is_error(result)) {
677 reason = "is_error";
678 } else if (ts_subtree_missing(result)) {
679 reason = "is_missing";
680 } else if (ts_subtree_is_fragile(result)) {
681 reason = "is_fragile";
682 } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) {
683 reason = "contains_different_included_range";
684 }
685
686 if (reason) {
687 LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result));
688 if (!reusable_node_descend(&self->reusable_node)) {
689 reusable_node_advance(&self->reusable_node);
690 ts_parser__breakdown_top_of_stack(self, version);
691 *state = ts_stack_state(self->stack, version);
692 }
693 continue;
694 }
695
696 TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result);
697 ts_language_table_entry(self->language, *state, leaf_symbol, table_entry);
698 if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
699 LOG(
700 "cant_reuse_node symbol:%s, first_leaf_symbol:%s",
701 TREE_NAME(result),
702 SYM_NAME(leaf_symbol)
703 );
704 reusable_node_advance_past_leaf(&self->reusable_node);
705 break;
706 }
707
708 LOG("reuse_node symbol:%s", TREE_NAME(result));
709 ts_subtree_retain(result);
710 return result;
711 }
712
713 return NULL_SUBTREE;
714}
715
716// Determine if a given tree should be replaced by an alternative tree.
717//
718// The decision is based on the trees' error costs (if any), their dynamic precedence,
719// and finally, as a default, by a recursive comparison of the trees' symbols.
720static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
721 if (!left.ptr) return true;
722 if (!right.ptr) return false;
723
724 if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) {
725 LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
726 return true;
727 }
728
729 if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) {
730 LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
731 return false;
732 }
733
734 if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) {
735 LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
736 TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left),
737 ts_subtree_dynamic_precedence(left));
738 return true;
739 }
740
741 if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) {
742 LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
743 TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right),
744 ts_subtree_dynamic_precedence(right));
745 return false;
746 }
747
748 if (ts_subtree_error_cost(left) > 0) return true;
749
750 int comparison = ts_subtree_compare(left, right);
751 switch (comparison) {
752 case -1:
753 LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
754 return false;
755 break;
756 case 1:
757 LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
758 return true;
759 default:
760 LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
761 return false;
762 }
763}
764
765// Determine if a given tree's children should be replaced by an alternative
766// array of children.
767static bool ts_parser__select_children(
768 TSParser *self,
769 Subtree left,
770 const SubtreeArray *children
771) {
772 array_assign(&self->scratch_trees, children);
773
774 // Create a temporary subtree using the scratch trees array. This node does
775 // not perform any allocation except for possibly growing the array to make
776 // room for its own heap data. The scratch tree is never explicitly released,
777 // so the same 'scratch trees' array can be reused again later.
778 MutableSubtree scratch_tree = ts_subtree_new_node(
779 ts_subtree_symbol(left),
780 &self->scratch_trees,
781 0,
782 self->language
783 );
784
785 return ts_parser__select_tree(
786 self,
787 left,
788 ts_subtree_from_mut(scratch_tree)
789 );
790}
791
792static void ts_parser__shift(
793 TSParser *self,
794 StackVersion version,
795 TSStateId state,
796 Subtree lookahead,
797 bool extra
798) {
799 bool is_leaf = ts_subtree_child_count(lookahead) == 0;
800 Subtree subtree_to_push = lookahead;
801 if (extra != ts_subtree_extra(lookahead) && is_leaf) {
802 MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead);
803 ts_subtree_set_extra(&result, extra);
804 subtree_to_push = ts_subtree_from_mut(result);
805 }
806
807 ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state);
808 if (ts_subtree_has_external_tokens(subtree_to_push)) {
809 ts_stack_set_last_external_token(
810 self->stack, version, ts_subtree_last_external_token(subtree_to_push)
811 );
812 }
813}
814
815static StackVersion ts_parser__reduce(
816 TSParser *self,
817 StackVersion version,
818 TSSymbol symbol,
819 uint32_t count,
820 int dynamic_precedence,
821 uint16_t production_id,
822 bool is_fragile,
823 bool end_of_non_terminal_extra
824) {
825 uint32_t initial_version_count = ts_stack_version_count(self->stack);
826
827 // Pop the given number of nodes from the given version of the parse stack.
828 // If stack versions have previously merged, then there may be more than one
829 // path back through the stack. For each path, create a new parent node to
830 // contain the popped children, and push it onto the stack in place of the
831 // children.
832 StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
833 uint32_t removed_version_count = 0;
834 for (uint32_t i = 0; i < pop.size; i++) {
835 StackSlice slice = pop.contents[i];
836 StackVersion slice_version = slice.version - removed_version_count;
837
838 // This is where new versions are added to the parse stack. The versions
839 // will all be sorted and truncated at the end of the outer parsing loop.
840 // Allow the maximum version count to be temporarily exceeded, but only
841 // by a limited threshold.
842 if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
843 ts_stack_remove_version(self->stack, slice_version);
844 ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
845 removed_version_count++;
846 while (i + 1 < pop.size) {
847 StackSlice next_slice = pop.contents[i + 1];
848 if (next_slice.version != slice.version) break;
849 ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
850 i++;
851 }
852 continue;
853 }
854
855 // Extra tokens on top of the stack should not be included in this new parent
856 // node. They will be re-pushed onto the stack after the parent node is
857 // created and pushed.
858 SubtreeArray children = slice.subtrees;
859 ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras);
860
861 MutableSubtree parent = ts_subtree_new_node(
862 symbol, &children, production_id, self->language
863 );
864
865 // This pop operation may have caused multiple stack versions to collapse
866 // into one, because they all diverged from a common state. In that case,
867 // choose one of the arrays of trees to be the parent node's children, and
868 // delete the rest of the tree arrays.
869 while (i + 1 < pop.size) {
870 StackSlice next_slice = pop.contents[i + 1];
871 if (next_slice.version != slice.version) break;
872 i++;
873
874 SubtreeArray next_slice_children = next_slice.subtrees;
875 ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2);
876
877 if (ts_parser__select_children(
878 self,
879 ts_subtree_from_mut(parent),
880 &next_slice_children
881 )) {
882 ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras);
883 ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent));
884 array_swap(&self->trailing_extras, &self->trailing_extras2);
885 parent = ts_subtree_new_node(
886 symbol, &next_slice_children, production_id, self->language
887 );
888 } else {
889 array_clear(&self->trailing_extras2);
890 ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
891 }
892 }
893
894 TSStateId state = ts_stack_state(self->stack, slice_version);
895 TSStateId next_state = ts_language_next_state(self->language, state, symbol);
896 if (end_of_non_terminal_extra && next_state == state) {
897 parent.ptr->extra = true;
898 }
899 if (is_fragile || pop.size > 1 || initial_version_count > 1) {
900 parent.ptr->fragile_left = true;
901 parent.ptr->fragile_right = true;
902 parent.ptr->parse_state = TS_TREE_STATE_NONE;
903 } else {
904 parent.ptr->parse_state = state;
905 }
906 parent.ptr->dynamic_precedence += dynamic_precedence;
907
908 // Push the parent node onto the stack, along with any extra tokens that
909 // were previously on top of the stack.
910 ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
911 for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
912 ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state);
913 }
914
915 for (StackVersion j = 0; j < slice_version; j++) {
916 if (j == version) continue;
917 if (ts_stack_merge(self->stack, j, slice_version)) {
918 removed_version_count++;
919 break;
920 }
921 }
922 }
923
924 // Return the first new stack version that was created.
925 return ts_stack_version_count(self->stack) > initial_version_count
926 ? initial_version_count
927 : STACK_VERSION_NONE;
928}
929
930static void ts_parser__accept(
931 TSParser *self,
932 StackVersion version,
933 Subtree lookahead
934) {
935 assert(ts_subtree_is_eof(lookahead));
936 ts_stack_push(self->stack, version, lookahead, false, 1);
937
938 StackSliceArray pop = ts_stack_pop_all(self->stack, version);
939 for (uint32_t i = 0; i < pop.size; i++) {
940 SubtreeArray trees = pop.contents[i].subtrees;
941
942 Subtree root = NULL_SUBTREE;
943 for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
944 Subtree tree = trees.contents[j];
945 if (!ts_subtree_extra(tree)) {
946 assert(!tree.data.is_inline);
947 uint32_t child_count = ts_subtree_child_count(tree);
948 const Subtree *children = ts_subtree_children(tree);
949 for (uint32_t k = 0; k < child_count; k++) {
950 ts_subtree_retain(children[k]);
951 }
952 array_splice(&trees, j, 1, child_count, children);
953 root = ts_subtree_from_mut(ts_subtree_new_node(
954 ts_subtree_symbol(tree),
955 &trees,
956 tree.ptr->production_id,
957 self->language
958 ));
959 ts_subtree_release(&self->tree_pool, tree);
960 break;
961 }
962 }
963
964 assert(root.ptr);
965 self->accept_count++;
966
967 if (self->finished_tree.ptr) {
968 if (ts_parser__select_tree(self, self->finished_tree, root)) {
969 ts_subtree_release(&self->tree_pool, self->finished_tree);
970 self->finished_tree = root;
971 } else {
972 ts_subtree_release(&self->tree_pool, root);
973 }
974 } else {
975 self->finished_tree = root;
976 }
977 }
978
979 ts_stack_remove_version(self->stack, pop.contents[0].version);
980 ts_stack_halt(self->stack, version);
981}
982
983static bool ts_parser__do_all_potential_reductions(
984 TSParser *self,
985 StackVersion starting_version,
986 TSSymbol lookahead_symbol
987) {
988 uint32_t initial_version_count = ts_stack_version_count(self->stack);
989
990 bool can_shift_lookahead_symbol = false;
991 StackVersion version = starting_version;
992 for (unsigned i = 0; true; i++) {
993 uint32_t version_count = ts_stack_version_count(self->stack);
994 if (version >= version_count) break;
995
996 bool merged = false;
997 for (StackVersion j = initial_version_count; j < version; j++) {
998 if (ts_stack_merge(self->stack, j, version)) {
999 merged = true;
1000 break;
1001 }
1002 }
1003 if (merged) continue;
1004
1005 TSStateId state = ts_stack_state(self->stack, version);
1006 bool has_shift_action = false;
1007 array_clear(&self->reduce_actions);
1008
1009 TSSymbol first_symbol, end_symbol;
1010 if (lookahead_symbol != 0) {
1011 first_symbol = lookahead_symbol;
1012 end_symbol = lookahead_symbol + 1;
1013 } else {
1014 first_symbol = 1;
1015 end_symbol = self->language->token_count;
1016 }
1017
1018 for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
1019 TableEntry entry;
1020 ts_language_table_entry(self->language, state, symbol, &entry);
1021 for (uint32_t j = 0; j < entry.action_count; j++) {
1022 TSParseAction action = entry.actions[j];
1023 switch (action.type) {
1024 case TSParseActionTypeShift:
1025 case TSParseActionTypeRecover:
1026 if (!action.shift.extra && !action.shift.repetition) has_shift_action = true;
1027 break;
1028 case TSParseActionTypeReduce:
1029 if (action.reduce.child_count > 0)
1030 ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) {
1031 .symbol = action.reduce.symbol,
1032 .count = action.reduce.child_count,
1033 .dynamic_precedence = action.reduce.dynamic_precedence,
1034 .production_id = action.reduce.production_id,
1035 });
1036 break;
1037 default:
1038 break;
1039 }
1040 }
1041 }
1042
1043 StackVersion reduction_version = STACK_VERSION_NONE;
1044 for (uint32_t j = 0; j < self->reduce_actions.size; j++) {
1045 ReduceAction action = self->reduce_actions.contents[j];
1046
1047 reduction_version = ts_parser__reduce(
1048 self, version, action.symbol, action.count,
1049 action.dynamic_precedence, action.production_id,
1050 true, false
1051 );
1052 }
1053
1054 if (has_shift_action) {
1055 can_shift_lookahead_symbol = true;
1056 } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) {
1057 ts_stack_renumber_version(self->stack, reduction_version, version);
1058 continue;
1059 } else if (lookahead_symbol != 0) {
1060 ts_stack_remove_version(self->stack, version);
1061 }
1062
1063 if (version == starting_version) {
1064 version = version_count;
1065 } else {
1066 version++;
1067 }
1068 }
1069
1070 return can_shift_lookahead_symbol;
1071}
1072
1073static bool ts_parser__recover_to_state(
1074 TSParser *self,
1075 StackVersion version,
1076 unsigned depth,
1077 TSStateId goal_state
1078) {
1079 StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth);
1080 StackVersion previous_version = STACK_VERSION_NONE;
1081
1082 for (unsigned i = 0; i < pop.size; i++) {
1083 StackSlice slice = pop.contents[i];
1084
1085 if (slice.version == previous_version) {
1086 ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1087 array_erase(&pop, i--);
1088 continue;
1089 }
1090
1091 if (ts_stack_state(self->stack, slice.version) != goal_state) {
1092 ts_stack_halt(self->stack, slice.version);
1093 ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1094 array_erase(&pop, i--);
1095 continue;
1096 }
1097
1098 SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version);
1099 if (error_trees.size > 0) {
1100 assert(error_trees.size == 1);
1101 Subtree error_tree = error_trees.contents[0];
1102 uint32_t error_child_count = ts_subtree_child_count(error_tree);
1103 if (error_child_count > 0) {
1104 array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree));
1105 for (unsigned j = 0; j < error_child_count; j++) {
1106 ts_subtree_retain(slice.subtrees.contents[j]);
1107 }
1108 }
1109 ts_subtree_array_delete(&self->tree_pool, &error_trees);
1110 }
1111
1112 ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras);
1113
1114 if (slice.subtrees.size > 0) {
1115 Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
1116 ts_stack_push(self->stack, slice.version, error, false, goal_state);
1117 } else {
1118 array_delete(&slice.subtrees);
1119 }
1120
1121 for (unsigned j = 0; j < self->trailing_extras.size; j++) {
1122 Subtree tree = self->trailing_extras.contents[j];
1123 ts_stack_push(self->stack, slice.version, tree, false, goal_state);
1124 }
1125
1126 previous_version = slice.version;
1127 }
1128
1129 return previous_version != STACK_VERSION_NONE;
1130}
1131
1132static void ts_parser__recover(
1133 TSParser *self,
1134 StackVersion version,
1135 Subtree lookahead
1136) {
1137 bool did_recover = false;
1138 unsigned previous_version_count = ts_stack_version_count(self->stack);
1139 Length position = ts_stack_position(self->stack, version);
1140 StackSummary *summary = ts_stack_get_summary(self->stack, version);
1141 unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version);
1142 unsigned current_error_cost = ts_stack_error_cost(self->stack, version);
1143
1144 // When the parser is in the error state, there are two strategies for recovering with a
1145 // given lookahead token:
1146 // 1. Find a previous state on the stack in which that lookahead token would be valid. Then,
1147 // create a new stack version that is in that state again. This entails popping all of the
1148 // subtrees that have been pushed onto the stack since that previous state, and wrapping
1149 // them in an ERROR node.
1150 // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and
1151 // move on to the next lookahead token, remaining in the error state.
1152 //
1153 // First, try the strategy 1. Upon entering the error state, the parser recorded a summary
1154 // of the previous parse states and their depths. Look at each state in the summary, to see
1155 // if the current lookahead token would be valid in that state.
1156 if (summary && !ts_subtree_is_error(lookahead)) {
1157 for (unsigned i = 0; i < summary->size; i++) {
1158 StackSummaryEntry entry = summary->contents[i];
1159
1160 if (entry.state == ERROR_STATE) continue;
1161 if (entry.position.bytes == position.bytes) continue;
1162 unsigned depth = entry.depth;
1163 if (node_count_since_error > 0) depth++;
1164
1165 // Do not recover in ways that create redundant stack versions.
1166 bool would_merge = false;
1167 for (unsigned j = 0; j < previous_version_count; j++) {
1168 if (
1169 ts_stack_state(self->stack, j) == entry.state &&
1170 ts_stack_position(self->stack, j).bytes == position.bytes
1171 ) {
1172 would_merge = true;
1173 break;
1174 }
1175 }
1176 if (would_merge) continue;
1177
1178 // Do not recover if the result would clearly be worse than some existing stack version.
1179 unsigned new_cost =
1180 current_error_cost +
1181 entry.depth * ERROR_COST_PER_SKIPPED_TREE +
1182 (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR +
1183 (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
1184 if (ts_parser__better_version_exists(self, version, false, new_cost)) break;
1185
1186 // If the current lookahead token is valid in some previous state, recover to that state.
1187 // Then stop looking for further recoveries.
1188 if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) {
1189 if (ts_parser__recover_to_state(self, version, depth, entry.state)) {
1190 did_recover = true;
1191 LOG("recover_to_previous state:%u, depth:%u", entry.state, depth);
1192 LOG_STACK();
1193 break;
1194 }
1195 }
1196 }
1197 }
1198
1199 // In the process of attempting to recover, some stack versions may have been created
1200 // and subsequently halted. Remove those versions.
1201 for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
1202 if (!ts_stack_is_active(self->stack, i)) {
1203 ts_stack_remove_version(self->stack, i--);
1204 }
1205 }
1206
1207 // If strategy 1 succeeded, a new stack version will have been created which is able to handle
1208 // the current lookahead token. Now, in addition, try strategy 2 described above: skip the
1209 // current lookahead token by wrapping it in an ERROR node.
1210
1211 // Don't pursue this additional strategy if there are already too many stack versions.
1212 if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1213 ts_stack_halt(self->stack, version);
1214 ts_subtree_release(&self->tree_pool, lookahead);
1215 return;
1216 }
1217
1218 if (
1219 did_recover &&
1220 ts_subtree_has_external_scanner_state_change(lookahead)
1221 ) {
1222 ts_stack_halt(self->stack, version);
1223 ts_subtree_release(&self->tree_pool, lookahead);
1224 return;
1225 }
1226
1227 // If the parser is still in the error state at the end of the file, just wrap everything
1228 // in an ERROR node and terminate.
1229 if (ts_subtree_is_eof(lookahead)) {
1230 LOG("recover_eof");
1231 SubtreeArray children = array_new();
1232 Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
1233 ts_stack_push(self->stack, version, parent, false, 1);
1234 ts_parser__accept(self, version, lookahead);
1235 return;
1236 }
1237
1238 // Do not recover if the result would clearly be worse than some existing stack version.
1239 unsigned new_cost =
1240 current_error_cost + ERROR_COST_PER_SKIPPED_TREE +
1241 ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR +
1242 ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE;
1243 if (ts_parser__better_version_exists(self, version, false, new_cost)) {
1244 ts_stack_halt(self->stack, version);
1245 ts_subtree_release(&self->tree_pool, lookahead);
1246 return;
1247 }
1248
1249 // If the current lookahead token is an extra token, mark it as extra. This means it won't
1250 // be counted in error cost calculations.
1251 unsigned n;
1252 const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
1253 if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) {
1254 MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1255 ts_subtree_set_extra(&mutable_lookahead, true);
1256 lookahead = ts_subtree_from_mut(mutable_lookahead);
1257 }
1258
1259 // Wrap the lookahead token in an ERROR.
1260 LOG("skip_token symbol:%s", TREE_NAME(lookahead));
1261 SubtreeArray children = array_new();
1262 array_reserve(&children, 1);
1263 array_push(&children, lookahead);
1264 MutableSubtree error_repeat = ts_subtree_new_node(
1265 ts_builtin_sym_error_repeat,
1266 &children,
1267 0,
1268 self->language
1269 );
1270
1271 // If other tokens have already been skipped, so there is already an ERROR at the top of the
1272 // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger
1273 // ERROR.
1274 if (node_count_since_error > 0) {
1275 StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1);
1276
1277 // TODO: Figure out how to make this condition occur.
1278 // See https://github.com/atom/atom/issues/18450#issuecomment-439579778
1279 // If multiple stack versions have merged at this point, just pick one of the errors
1280 // arbitrarily and discard the rest.
1281 if (pop.size > 1) {
1282 for (unsigned i = 1; i < pop.size; i++) {
1283 ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees);
1284 }
1285 while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) {
1286 ts_stack_remove_version(self->stack, pop.contents[0].version + 1);
1287 }
1288 }
1289
1290 ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
1291 array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
1292 error_repeat = ts_subtree_new_node(
1293 ts_builtin_sym_error_repeat,
1294 &pop.contents[0].subtrees,
1295 0,
1296 self->language
1297 );
1298 }
1299
1300 // Push the new ERROR onto the stack.
1301 ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
1302 if (ts_subtree_has_external_tokens(lookahead)) {
1303 ts_stack_set_last_external_token(
1304 self->stack, version, ts_subtree_last_external_token(lookahead)
1305 );
1306 }
1307}
1308
1309static void ts_parser__handle_error(
1310 TSParser *self,
1311 StackVersion version,
1312 Subtree lookahead
1313) {
1314 uint32_t previous_version_count = ts_stack_version_count(self->stack);
1315
1316 // Perform any reductions that can happen in this state, regardless of the lookahead. After
1317 // skipping one or more invalid tokens, the parser might find a token that would have allowed
1318 // a reduction to take place.
1319 ts_parser__do_all_potential_reductions(self, version, 0);
1320 uint32_t version_count = ts_stack_version_count(self->stack);
1321 Length position = ts_stack_position(self->stack, version);
1322
1323 // Push a discontinuity onto the stack. Merge all of the stack versions that
1324 // were created in the previous step.
1325 bool did_insert_missing_token = false;
1326 for (StackVersion v = version; v < version_count;) {
1327 if (!did_insert_missing_token) {
1328 TSStateId state = ts_stack_state(self->stack, v);
1329 for (
1330 TSSymbol missing_symbol = 1;
1331 missing_symbol < (uint16_t)self->language->token_count;
1332 missing_symbol++
1333 ) {
1334 TSStateId state_after_missing_symbol = ts_language_next_state(
1335 self->language, state, missing_symbol
1336 );
1337 if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
1338 continue;
1339 }
1340
1341 if (ts_language_has_reduce_action(
1342 self->language,
1343 state_after_missing_symbol,
1344 ts_subtree_leaf_symbol(lookahead)
1345 )) {
1346 // In case the parser is currently outside of any included range, the lexer will
1347 // snap to the beginning of the next included range. The missing token's padding
1348 // must be assigned to position it within the next included range.
1349 ts_lexer_reset(&self->lexer, position);
1350 ts_lexer_mark_end(&self->lexer);
1351 Length padding = length_sub(self->lexer.token_end_position, position);
1352 uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead);
1353
1354 StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
1355 Subtree missing_tree = ts_subtree_new_missing_leaf(
1356 &self->tree_pool, missing_symbol,
1357 padding, lookahead_bytes,
1358 self->language
1359 );
1360 ts_stack_push(
1361 self->stack, version_with_missing_tree,
1362 missing_tree, false,
1363 state_after_missing_symbol
1364 );
1365
1366 if (ts_parser__do_all_potential_reductions(
1367 self, version_with_missing_tree,
1368 ts_subtree_leaf_symbol(lookahead)
1369 )) {
1370 LOG(
1371 "recover_with_missing symbol:%s, state:%u",
1372 SYM_NAME(missing_symbol),
1373 ts_stack_state(self->stack, version_with_missing_tree)
1374 );
1375 did_insert_missing_token = true;
1376 break;
1377 }
1378 }
1379 }
1380 }
1381
1382 ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
1383 v = (v == version) ? previous_version_count : v + 1;
1384 }
1385
1386 for (unsigned i = previous_version_count; i < version_count; i++) {
1387 bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
1388 assert(did_merge);
1389 (void)did_merge; // fix warning/error with clang -Os
1390 }
1391
1392 ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH);
1393
1394 // Begin recovery with the current lookahead node, rather than waiting for the
1395 // next turn of the parse loop. This ensures that the tree accounts for the the
1396 // current lookahead token's "lookahead bytes" value, which describes how far
1397 // the lexer needed to look ahead beyond the content of the token in order to
1398 // recognize it.
1399 if (ts_subtree_child_count(lookahead) > 0) {
1400 ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1401 }
1402 ts_parser__recover(self, version, lookahead);
1403
1404 LOG_STACK();
1405}
1406
1407static bool ts_parser__advance(
1408 TSParser *self,
1409 StackVersion version,
1410 bool allow_node_reuse
1411) {
1412 TSStateId state = ts_stack_state(self->stack, version);
1413 uint32_t position = ts_stack_position(self->stack, version).bytes;
1414 Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
1415
1416 bool did_reuse = true;
1417 Subtree lookahead = NULL_SUBTREE;
1418 TableEntry table_entry = {.action_count = 0};
1419
1420 // If possible, reuse a node from the previous syntax tree.
1421 if (allow_node_reuse) {
1422 lookahead = ts_parser__reuse_node(
1423 self, version, &state, position, last_external_token, &table_entry
1424 );
1425 }
1426
1427 // If no node from the previous syntax tree could be reused, then try to
1428 // reuse the token previously returned by the lexer.
1429 if (!lookahead.ptr) {
1430 did_reuse = false;
1431 lookahead = ts_parser__get_cached_token(
1432 self, state, position, last_external_token, &table_entry
1433 );
1434 }
1435
1436 bool needs_lex = !lookahead.ptr;
1437 for (;;) {
1438 // Otherwise, re-run the lexer.
1439 if (needs_lex) {
1440 needs_lex = false;
1441 lookahead = ts_parser__lex(self, version, state);
1442
1443 if (lookahead.ptr) {
1444 ts_parser__set_cached_token(self, position, last_external_token, lookahead);
1445 ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
1446 }
1447
1448 // When parsing a non-terminal extra, a null lookahead indicates the
1449 // end of the rule. The reduction is stored in the EOF table entry.
1450 // After the reduction, the lexer needs to be run again.
1451 else {
1452 ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
1453 }
1454 }
1455
1456 // If a cancellation flag or a timeout was provided, then check every
1457 // time a fixed number of parse actions has been processed.
1458 if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
1459 self->operation_count = 0;
1460 }
1461 if (
1462 self->operation_count == 0 &&
1463 ((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
1464 (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
1465 ) {
1466 if (lookahead.ptr) {
1467 ts_subtree_release(&self->tree_pool, lookahead);
1468 }
1469 return false;
1470 }
1471
1472 // Process each parse action for the current lookahead token in
1473 // the current state. If there are multiple actions, then this is
1474 // an ambiguous state. REDUCE actions always create a new stack
1475 // version, whereas SHIFT actions update the existing stack version
1476 // and terminate this loop.
1477 StackVersion last_reduction_version = STACK_VERSION_NONE;
1478 for (uint32_t i = 0; i < table_entry.action_count; i++) {
1479 TSParseAction action = table_entry.actions[i];
1480
1481 switch (action.type) {
1482 case TSParseActionTypeShift: {
1483 if (action.shift.repetition) break;
1484 TSStateId next_state;
1485 if (action.shift.extra) {
1486 next_state = state;
1487 LOG("shift_extra");
1488 } else {
1489 next_state = action.shift.state;
1490 LOG("shift state:%u", next_state);
1491 }
1492
1493 if (ts_subtree_child_count(lookahead) > 0) {
1494 ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
1495 next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
1496 }
1497
1498 ts_parser__shift(self, version, next_state, lookahead, action.shift.extra);
1499 if (did_reuse) reusable_node_advance(&self->reusable_node);
1500 return true;
1501 }
1502
1503 case TSParseActionTypeReduce: {
1504 bool is_fragile = table_entry.action_count > 1;
1505 bool end_of_non_terminal_extra = lookahead.ptr == NULL;
1506 LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count);
1507 StackVersion reduction_version = ts_parser__reduce(
1508 self, version, action.reduce.symbol, action.reduce.child_count,
1509 action.reduce.dynamic_precedence, action.reduce.production_id,
1510 is_fragile, end_of_non_terminal_extra
1511 );
1512 if (reduction_version != STACK_VERSION_NONE) {
1513 last_reduction_version = reduction_version;
1514 }
1515 break;
1516 }
1517
1518 case TSParseActionTypeAccept: {
1519 LOG("accept");
1520 ts_parser__accept(self, version, lookahead);
1521 return true;
1522 }
1523
1524 case TSParseActionTypeRecover: {
1525 if (ts_subtree_child_count(lookahead) > 0) {
1526 ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1527 }
1528
1529 ts_parser__recover(self, version, lookahead);
1530 if (did_reuse) reusable_node_advance(&self->reusable_node);
1531 return true;
1532 }
1533 }
1534 }
1535
1536 // If a reduction was performed, then replace the current stack version
1537 // with one of the stack versions created by a reduction, and continue
1538 // processing this version of the stack with the same lookahead symbol.
1539 if (last_reduction_version != STACK_VERSION_NONE) {
1540 ts_stack_renumber_version(self->stack, last_reduction_version, version);
1541 LOG_STACK();
1542 state = ts_stack_state(self->stack, version);
1543
1544 // At the end of a non-terminal extra rule, the lexer will return a
1545 // null subtree, because the parser needs to perform a fixed reduction
1546 // regardless of the lookahead node. After performing that reduction,
1547 // (and completing the non-terminal extra rule) run the lexer again based
1548 // on the current parse state.
1549 if (!lookahead.ptr) {
1550 needs_lex = true;
1551 } else {
1552 ts_language_table_entry(
1553 self->language,
1554 state,
1555 ts_subtree_leaf_symbol(lookahead),
1556 &table_entry
1557 );
1558 }
1559
1560 continue;
1561 }
1562
1563 // A non-terminal extra rule was reduced and merged into an existing
1564 // stack version. This version can be discarded.
1565 if (!lookahead.ptr) {
1566 ts_stack_halt(self->stack, version);
1567 return true;
1568 }
1569
1570 // If there were no parse actions for the current lookahead token, then
1571 // it is not valid in this state. If the current lookahead token is a
1572 // keyword, then switch to treating it as the normal word token if that
1573 // token is valid in this state.
1574 if (
1575 ts_subtree_is_keyword(lookahead) &&
1576 ts_subtree_symbol(lookahead) != self->language->keyword_capture_token
1577 ) {
1578 ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry);
1579 if (table_entry.action_count > 0) {
1580 LOG(
1581 "switch from_keyword:%s, to_word_token:%s",
1582 TREE_NAME(lookahead),
1583 SYM_NAME(self->language->keyword_capture_token)
1584 );
1585
1586 MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1587 ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
1588 lookahead = ts_subtree_from_mut(mutable_lookahead);
1589 continue;
1590 }
1591 }
1592
1593 // If the current lookahead token is not valid and the parser is
1594 // already in the error state, restart the error recovery process.
1595 // TODO - can this be unified with the other `RECOVER` case above?
1596 if (state == ERROR_STATE) {
1597 ts_parser__recover(self, version, lookahead);
1598 return true;
1599 }
1600
1601 // If the current lookahead token is not valid and the previous
1602 // subtree on the stack was reused from an old tree, it isn't actually
1603 // valid to reuse it. Remove it from the stack, and in its place,
1604 // push each of its children. Then try again to process the current
1605 // lookahead.
1606 if (ts_parser__breakdown_top_of_stack(self, version)) {
1607 state = ts_stack_state(self->stack, version);
1608 ts_subtree_release(&self->tree_pool, lookahead);
1609 needs_lex = true;
1610 continue;
1611 }
1612
1613 // At this point, the current lookahead token is definitely not valid
1614 // for this parse stack version. Mark this version as paused and continue
1615 // processing any other stack versions that might exist. If some other
1616 // version advances successfully, then this version can simply be removed.
1617 // But if all versions end up paused, then error recovery is needed.
1618 LOG("detect_error");
1619 ts_stack_pause(self->stack, version, lookahead);
1620 return true;
1621 }
1622}
1623
1624static unsigned ts_parser__condense_stack(TSParser *self) {
1625 bool made_changes = false;
1626 unsigned min_error_cost = UINT_MAX;
1627 for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
1628 // Prune any versions that have been marked for removal.
1629 if (ts_stack_is_halted(self->stack, i)) {
1630 ts_stack_remove_version(self->stack, i);
1631 i--;
1632 continue;
1633 }
1634
1635 // Keep track of the minimum error cost of any stack version so
1636 // that it can be returned.
1637 ErrorStatus status_i = ts_parser__version_status(self, i);
1638 if (!status_i.is_in_error && status_i.cost < min_error_cost) {
1639 min_error_cost = status_i.cost;
1640 }
1641
1642 // Examine each pair of stack versions, removing any versions that
1643 // are clearly worse than another version. Ensure that the versions
1644 // are ordered from most promising to least promising.
1645 for (StackVersion j = 0; j < i; j++) {
1646 ErrorStatus status_j = ts_parser__version_status(self, j);
1647
1648 switch (ts_parser__compare_versions(self, status_j, status_i)) {
1649 case ErrorComparisonTakeLeft:
1650 made_changes = true;
1651 ts_stack_remove_version(self->stack, i);
1652 i--;
1653 j = i;
1654 break;
1655
1656 case ErrorComparisonPreferLeft:
1657 case ErrorComparisonNone:
1658 if (ts_stack_merge(self->stack, j, i)) {
1659 made_changes = true;
1660 i--;
1661 j = i;
1662 }
1663 break;
1664
1665 case ErrorComparisonPreferRight:
1666 made_changes = true;
1667 if (ts_stack_merge(self->stack, j, i)) {
1668 i--;
1669 j = i;
1670 } else {
1671 ts_stack_swap_versions(self->stack, i, j);
1672 }
1673 break;
1674
1675 case ErrorComparisonTakeRight:
1676 made_changes = true;
1677 ts_stack_remove_version(self->stack, j);
1678 i--;
1679 j--;
1680 break;
1681 }
1682 }
1683 }
1684
1685 // Enfore a hard upper bound on the number of stack versions by
1686 // discarding the least promising versions.
1687 while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1688 ts_stack_remove_version(self->stack, MAX_VERSION_COUNT);
1689 made_changes = true;
1690 }
1691
1692 // If the best-performing stack version is currently paused, or all
1693 // versions are paused, then resume the best paused version and begin
1694 // the error recovery process. Otherwise, remove the paused versions.
1695 if (ts_stack_version_count(self->stack) > 0) {
1696 bool has_unpaused_version = false;
1697 for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
1698 if (ts_stack_is_paused(self->stack, i)) {
1699 if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) {
1700 LOG("resume version:%u", i);
1701 min_error_cost = ts_stack_error_cost(self->stack, i);
1702 Subtree lookahead = ts_stack_resume(self->stack, i);
1703 ts_parser__handle_error(self, i, lookahead);
1704 has_unpaused_version = true;
1705 } else {
1706 ts_stack_remove_version(self->stack, i);
1707 i--;
1708 n--;
1709 }
1710 } else {
1711 has_unpaused_version = true;
1712 }
1713 }
1714 }
1715
1716 if (made_changes) {
1717 LOG("condense");
1718 LOG_STACK();
1719 }
1720
1721 return min_error_cost;
1722}
1723
1724static bool ts_parser_has_outstanding_parse(TSParser *self) {
1725 return (
1726 ts_stack_state(self->stack, 0) != 1 ||
1727 ts_stack_node_count_since_error(self->stack, 0) != 0
1728 );
1729}
1730
1731// Parser - Public
1732
1733TSParser *ts_parser_new(void) {
1734 TSParser *self = ts_calloc(1, sizeof(TSParser));
1735 ts_lexer_init(&self->lexer);
1736 array_init(&self->reduce_actions);
1737 array_reserve(&self->reduce_actions, 4);
1738 self->tree_pool = ts_subtree_pool_new(32);
1739 self->stack = ts_stack_new(&self->tree_pool);
1740 self->finished_tree = NULL_SUBTREE;
1741 self->reusable_node = reusable_node_new();
1742 self->dot_graph_file = NULL;
1743 self->cancellation_flag = NULL;
1744 self->timeout_duration = 0;
1745 self->end_clock = clock_null();
1746 self->operation_count = 0;
1747 self->old_tree = NULL_SUBTREE;
1748 self->included_range_differences = (TSRangeArray) array_new();
1749 self->included_range_difference_index = 0;
1750 ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1751 return self;
1752}
1753
1754void ts_parser_delete(TSParser *self) {
1755 if (!self) return;
1756
1757 ts_parser_set_language(self, NULL);
1758 ts_stack_delete(self->stack);
1759 if (self->reduce_actions.contents) {
1760 array_delete(&self->reduce_actions);
1761 }
1762 if (self->included_range_differences.contents) {
1763 array_delete(&self->included_range_differences);
1764 }
1765 if (self->old_tree.ptr) {
1766 ts_subtree_release(&self->tree_pool, self->old_tree);
1767 self->old_tree = NULL_SUBTREE;
1768 }
1769 ts_lexer_delete(&self->lexer);
1770 ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1771 ts_subtree_pool_delete(&self->tree_pool);
1772 reusable_node_delete(&self->reusable_node);
1773 array_delete(&self->trailing_extras);
1774 array_delete(&self->trailing_extras2);
1775 array_delete(&self->scratch_trees);
1776 ts_free(self);
1777}
1778
1779const TSLanguage *ts_parser_language(const TSParser *self) {
1780 return self->language;
1781}
1782
1783bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
1784 if (language) {
1785 if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false;
1786 if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false;
1787 }
1788
1789 if (self->external_scanner_payload && self->language->external_scanner.destroy) {
1790 self->language->external_scanner.destroy(self->external_scanner_payload);
1791 }
1792
1793 if (language && language->external_scanner.create) {
1794 self->external_scanner_payload = language->external_scanner.create();
1795 } else {
1796 self->external_scanner_payload = NULL;
1797 }
1798
1799 self->language = language;
1800 ts_parser_reset(self);
1801 return true;
1802}
1803
1804TSLogger ts_parser_logger(const TSParser *self) {
1805 return self->lexer.logger;
1806}
1807
1808void ts_parser_set_logger(TSParser *self, TSLogger logger) {
1809 self->lexer.logger = logger;
1810}
1811
1812void ts_parser_print_dot_graphs(TSParser *self, int fd) {
1813 if (self->dot_graph_file) {
1814 fclose(self->dot_graph_file);
1815 }
1816
1817 if (fd >= 0) {
1818 #ifdef _WIN32
1819 self->dot_graph_file = _fdopen(fd, "a");
1820 #else
1821 self->dot_graph_file = fdopen(fd, "a");
1822 #endif
1823 } else {
1824 self->dot_graph_file = NULL;
1825 }
1826}
1827
1828const size_t *ts_parser_cancellation_flag(const TSParser *self) {
1829 return (const size_t *)self->cancellation_flag;
1830}
1831
1832void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) {
1833 self->cancellation_flag = (const volatile size_t *)flag;
1834}
1835
1836uint64_t ts_parser_timeout_micros(const TSParser *self) {
1837 return duration_to_micros(self->timeout_duration);
1838}
1839
1840void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
1841 self->timeout_duration = duration_from_micros(timeout_micros);
1842}
1843
1844bool ts_parser_set_included_ranges(
1845 TSParser *self,
1846 const TSRange *ranges,
1847 uint32_t count
1848) {
1849 return ts_lexer_set_included_ranges(&self->lexer, ranges, count);
1850}
1851
1852const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) {
1853 return ts_lexer_included_ranges(&self->lexer, count);
1854}
1855
1856void ts_parser_reset(TSParser *self) {
1857 if (self->language && self->language->external_scanner.deserialize) {
1858 self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
1859 }
1860
1861 if (self->old_tree.ptr) {
1862 ts_subtree_release(&self->tree_pool, self->old_tree);
1863 self->old_tree = NULL_SUBTREE;
1864 }
1865
1866 reusable_node_clear(&self->reusable_node);
1867 ts_lexer_reset(&self->lexer, length_zero());
1868 ts_stack_clear(self->stack);
1869 ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1870 if (self->finished_tree.ptr) {
1871 ts_subtree_release(&self->tree_pool, self->finished_tree);
1872 self->finished_tree = NULL_SUBTREE;
1873 }
1874 self->accept_count = 0;
1875}
1876
1877TSTree *ts_parser_parse(
1878 TSParser *self,
1879 const TSTree *old_tree,
1880 TSInput input
1881) {
1882 if (!self->language || !input.read) return NULL;
1883
1884 ts_lexer_set_input(&self->lexer, input);
1885
1886 array_clear(&self->included_range_differences);
1887 self->included_range_difference_index = 0;
1888
1889 if (ts_parser_has_outstanding_parse(self)) {
1890 LOG("resume_parsing");
1891 } else if (old_tree) {
1892 ts_subtree_retain(old_tree->root);
1893 self->old_tree = old_tree->root;
1894 ts_range_array_get_changed_ranges(
1895 old_tree->included_ranges, old_tree->included_range_count,
1896 self->lexer.included_ranges, self->lexer.included_range_count,
1897 &self->included_range_differences
1898 );
1899 reusable_node_reset(&self->reusable_node, old_tree->root);
1900 LOG("parse_after_edit");
1901 LOG_TREE(self->old_tree);
1902 for (unsigned i = 0; i < self->included_range_differences.size; i++) {
1903 TSRange *range = &self->included_range_differences.contents[i];
1904 LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
1905 }
1906 } else {
1907 reusable_node_clear(&self->reusable_node);
1908 LOG("new_parse");
1909 }
1910
1911 self->operation_count = 0;
1912 if (self->timeout_duration) {
1913 self->end_clock = clock_after(clock_now(), self->timeout_duration);
1914 } else {
1915 self->end_clock = clock_null();
1916 }
1917
1918 uint32_t position = 0, last_position = 0, version_count = 0;
1919 do {
1920 for (
1921 StackVersion version = 0;
1922 version_count = ts_stack_version_count(self->stack),
1923 version < version_count;
1924 version++
1925 ) {
1926 bool allow_node_reuse = version_count == 1;
1927 while (ts_stack_is_active(self->stack, version)) {
1928 LOG(
1929 "process version:%d, version_count:%u, state:%d, row:%u, col:%u",
1930 version,
1931 ts_stack_version_count(self->stack),
1932 ts_stack_state(self->stack, version),
1933 ts_stack_position(self->stack, version).extent.row,
1934 ts_stack_position(self->stack, version).extent.column
1935 );
1936
1937 if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
1938 LOG_STACK();
1939
1940 position = ts_stack_position(self->stack, version).bytes;
1941 if (position > last_position || (version > 0 && position == last_position)) {
1942 last_position = position;
1943 break;
1944 }
1945 }
1946 }
1947
1948 // After advancing each version of the stack, re-sort the versions by their cost,
1949 // removing any versions that are no longer worth pursuing.
1950 unsigned min_error_cost = ts_parser__condense_stack(self);
1951
1952 // If there's already a finished parse tree that's better than any in-progress version,
1953 // then terminate parsing. Clear the parse stack to remove any extra references to subtrees
1954 // within the finished tree, ensuring that these subtrees can be safely mutated in-place
1955 // for rebalancing.
1956 if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
1957 ts_stack_clear(self->stack);
1958 break;
1959 }
1960
1961 while (self->included_range_difference_index < self->included_range_differences.size) {
1962 TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index];
1963 if (range->end_byte <= position) {
1964 self->included_range_difference_index++;
1965 } else {
1966 break;
1967 }
1968 }
1969 } while (version_count != 0);
1970
1971 assert(self->finished_tree.ptr);
1972 ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
1973 LOG("done");
1974 LOG_TREE(self->finished_tree);
1975
1976 TSTree *result = ts_tree_new(
1977 self->finished_tree,
1978 self->language,
1979 self->lexer.included_ranges,
1980 self->lexer.included_range_count
1981 );
1982 self->finished_tree = NULL_SUBTREE;
1983 ts_parser_reset(self);
1984 return result;
1985}
1986
1987TSTree *ts_parser_parse_string(
1988 TSParser *self,
1989 const TSTree *old_tree,
1990 const char *string,
1991 uint32_t length
1992) {
1993 return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8);
1994}
1995
1996TSTree *ts_parser_parse_string_encoding(
1997 TSParser *self,
1998 const TSTree *old_tree,
1999 const char *string,
2000 uint32_t length,
2001 TSInputEncoding encoding
2002) {
2003 TSStringInput input = {string, length};
2004 return ts_parser_parse(self, old_tree, (TSInput) {
2005 &input,
2006 ts_string_input_read,
2007 encoding,
2008 });
2009}
2010
2011#undef LOG
diff --git a/vendor/tree-sitter/lib/src/point.h b/vendor/tree-sitter/lib/src/point.h
new file mode 100644
index 0000000..37346c8
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/point.h
@@ -0,0 +1,62 @@
1#ifndef TREE_SITTER_POINT_H_
2#define TREE_SITTER_POINT_H_
3
4#include "tree_sitter/api.h"
5
6#define POINT_ZERO ((TSPoint) {0, 0})
7#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
8
9static inline TSPoint point__new(unsigned row, unsigned column) {
10 TSPoint result = {row, column};
11 return result;
12}
13
14static inline TSPoint point_add(TSPoint a, TSPoint b) {
15 if (b.row > 0)
16 return point__new(a.row + b.row, b.column);
17 else
18 return point__new(a.row, a.column + b.column);
19}
20
21static inline TSPoint point_sub(TSPoint a, TSPoint b) {
22 if (a.row > b.row)
23 return point__new(a.row - b.row, a.column);
24 else
25 return point__new(0, a.column - b.column);
26}
27
28static inline bool point_lte(TSPoint a, TSPoint b) {
29 return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
30}
31
32static inline bool point_lt(TSPoint a, TSPoint b) {
33 return (a.row < b.row) || (a.row == b.row && a.column < b.column);
34}
35
36static inline bool point_gt(TSPoint a, TSPoint b) {
37 return (a.row > b.row) || (a.row == b.row && a.column > b.column);
38}
39
40static inline bool point_gte(TSPoint a, TSPoint b) {
41 return (a.row > b.row) || (a.row == b.row && a.column >= b.column);
42}
43
44static inline bool point_eq(TSPoint a, TSPoint b) {
45 return a.row == b.row && a.column == b.column;
46}
47
48static inline TSPoint point_min(TSPoint a, TSPoint b) {
49 if (a.row < b.row || (a.row == b.row && a.column < b.column))
50 return a;
51 else
52 return b;
53}
54
55static inline TSPoint point_max(TSPoint a, TSPoint b) {
56 if (a.row > b.row || (a.row == b.row && a.column > b.column))
57 return a;
58 else
59 return b;
60}
61
62#endif
diff --git a/vendor/tree-sitter/lib/src/query.c b/vendor/tree-sitter/lib/src/query.c
new file mode 100644
index 0000000..4e623ae
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/query.c
@@ -0,0 +1,4130 @@
1#include "tree_sitter/api.h"
2#include "./alloc.h"
3#include "./array.h"
4#include "./language.h"
5#include "./point.h"
6#include "./tree_cursor.h"
7#include "./unicode.h"
8#include <wctype.h>
9
10// #define DEBUG_ANALYZE_QUERY
11// #define DEBUG_EXECUTE_QUERY
12
13#define MAX_STEP_CAPTURE_COUNT 3
14#define MAX_NEGATED_FIELD_COUNT 8
15#define MAX_STATE_PREDECESSOR_COUNT 256
16#define MAX_ANALYSIS_STATE_DEPTH 8
17#define MAX_ANALYSIS_ITERATION_COUNT 256
18
19/*
20 * Stream - A sequence of unicode characters derived from a UTF8 string.
21 * This struct is used in parsing queries from S-expressions.
22 */
23typedef struct {
24 const char *input;
25 const char *start;
26 const char *end;
27 int32_t next;
28 uint8_t next_size;
29} Stream;
30
31/*
32 * QueryStep - A step in the process of matching a query. Each node within
33 * a query S-expression corresponds to one of these steps. An entire pattern
34 * is represented as a sequence of these steps. The basic properties of a
35 * node are represented by these fields:
36 * - `symbol` - The grammar symbol to match. A zero value represents the
37 * wildcard symbol, '_'.
38 * - `field` - The field name to match. A zero value means that a field name
39 * was not specified.
40 * - `capture_ids` - An array of integers representing the names of captures
41 * associated with this node in the pattern, terminated by a `NONE` value.
42 * - `depth` - The depth where this node occurs in the pattern. The root node
43 * of the pattern has depth zero.
44 * - `negated_field_list_id` - An id representing a set of fields that must
45 * that must not be present on a node matching this step.
46 *
47 * Steps have some additional fields in order to handle the `.` (or "anchor") operator,
48 * which forbids additional child nodes:
49 * - `is_immediate` - Indicates that the node matching this step cannot be preceded
50 * by other sibling nodes that weren't specified in the pattern.
51 * - `is_last_child` - Indicates that the node matching this step cannot have any
52 * subsequent named siblings.
53 *
54 * For simple patterns, steps are matched in sequential order. But in order to
55 * handle alternative/repeated/optional sub-patterns, query steps are not always
56 * structured as a linear sequence; they sometimes need to split and merge. This
57 * is done using the following fields:
58 * - `alternative_index` - The index of a different query step that serves as
59 * an alternative to this step. A `NONE` value represents no alternative.
60 * When a query state reaches a step with an alternative index, the state
61 * is duplicated, with one copy remaining at the original step, and one copy
62 * moving to the alternative step. The alternative may have its own alternative
63 * step, so this splitting is an iterative process.
64 * - `is_dead_end` - Indicates that this state cannot be passed directly, and
65 * exists only in order to redirect to an alternative index, with no splitting.
66 * - `is_pass_through` - Indicates that state has no matching logic of its own,
67 * and exists only to split a state. One copy of the state advances immediately
68 * to the next step, and one moves to the alternative step.
69 * - `alternative_is_immediate` - Indicates that this step's alternative step
70 * should be treated as if `is_immediate` is true.
71 *
72 * Steps also store some derived state that summarizes how they relate to other
73 * steps within the same pattern. This is used to optimize the matching process:
74 * - `contains_captures` - Indicates that this step or one of its child steps
75 * has a non-empty `capture_ids` list.
76 * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then
77 * it and all of its subsequent sibling steps within the same parent pattern
78 * are guaranteed to match.
79 * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but
80 * for the entire top-level pattern. When iterating through a query's
81 * captures using `ts_query_cursor_next_capture`, this field is used to
82 * detect that a capture can safely be returned from a match that has not
83 * even completed yet.
84 */
85typedef struct {
86 TSSymbol symbol;
87 TSSymbol supertype_symbol;
88 TSFieldId field;
89 uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
90 uint16_t depth;
91 uint16_t alternative_index;
92 uint16_t negated_field_list_id;
93 bool is_named: 1;
94 bool is_immediate: 1;
95 bool is_last_child: 1;
96 bool is_pass_through: 1;
97 bool is_dead_end: 1;
98 bool alternative_is_immediate: 1;
99 bool contains_captures: 1;
100 bool root_pattern_guaranteed: 1;
101 bool parent_pattern_guaranteed: 1;
102} QueryStep;
103
104/*
105 * Slice - A slice of an external array. Within a query, capture names,
106 * literal string values, and predicate step information are stored in three
107 * contiguous arrays. Individual captures, string values, and predicates are
108 * represented as slices of these three arrays.
109 */
110typedef struct {
111 uint32_t offset;
112 uint32_t length;
113} Slice;
114
115/*
116 * SymbolTable - a two-way mapping of strings to ids.
117 */
118typedef struct {
119 Array(char) characters;
120 Array(Slice) slices;
121} SymbolTable;
122
123/**
124 * CaptureQuantififers - a data structure holding the quantifiers of pattern captures.
125 */
126typedef Array(uint8_t) CaptureQuantifiers;
127
128/*
129 * PatternEntry - Information about the starting point for matching a particular
130 * pattern. These entries are stored in a 'pattern map' - a sorted array that
131 * makes it possible to efficiently lookup patterns based on the symbol for their
132 * first step. The entry consists of the following fields:
133 * - `pattern_index` - the index of the pattern within the query
134 * - `step_index` - the index of the pattern's first step in the shared `steps` array
135 * - `is_rooted` - whether or not the pattern has a single root node. This property
136 * affects decisions about whether or not to start the pattern for nodes outside
137 * of a QueryCursor's range restriction.
138 */
139typedef struct {
140 uint16_t step_index;
141 uint16_t pattern_index;
142 bool is_rooted;
143} PatternEntry;
144
145typedef struct {
146 Slice step;
147 Slice predicate_step;
148 uint32_t start_byte;
149 bool is_non_local;
150} QueryPattern;
151
152typedef struct {
153 uint32_t byte_offset;
154 uint16_t step_index;
155} StepOffset;
156
157/*
158 * QueryState - The state of an in-progress match of a particular pattern
159 * in a query. While executing, a `TSQueryCursor` must keep track of a number
160 * of possible in-progress matches. Each of those possible matches is
161 * represented as one of these states. Fields:
162 * - `id` - A numeric id that is exposed to the public API. This allows the
163 * caller to remove a given match, preventing any more of its captures
164 * from being returned.
165 * - `start_depth` - The depth in the tree where the first step of the state's
166 * pattern was matched.
167 * - `pattern_index` - The pattern that the state is matching.
168 * - `consumed_capture_count` - The number of captures from this match that
169 * have already been returned.
170 * - `capture_list_id` - A numeric id that can be used to retrieve the state's
171 * list of captures from the `CaptureListPool`.
172 * - `seeking_immediate_match` - A flag that indicates that the state's next
173 * step must be matched by the very next sibling. This is used when
174 * processing repetitions.
175 * - `has_in_progress_alternatives` - A flag that indicates that there is are
176 * other states that have the same captures as this state, but are at
177 * different steps in their pattern. This means that in order to obey the
178 * 'longest-match' rule, this state should not be returned as a match until
179 * it is clear that there can be no other alternative match with more captures.
180 */
181typedef struct {
182 uint32_t id;
183 uint32_t capture_list_id;
184 uint16_t start_depth;
185 uint16_t step_index;
186 uint16_t pattern_index;
187 uint16_t consumed_capture_count: 12;
188 bool seeking_immediate_match: 1;
189 bool has_in_progress_alternatives: 1;
190 bool dead: 1;
191 bool needs_parent: 1;
192} QueryState;
193
194typedef Array(TSQueryCapture) CaptureList;
195
196/*
197 * CaptureListPool - A collection of *lists* of captures. Each query state needs
198 * to maintain its own list of captures. To avoid repeated allocations, this struct
199 * maintains a fixed set of capture lists, and keeps track of which ones are
200 * currently in use by a query state.
201 */
202typedef struct {
203 Array(CaptureList) list;
204 CaptureList empty_list;
205 // The maximum number of capture lists that we are allowed to allocate. We
206 // never allow `list` to allocate more entries than this, dropping pending
207 // matches if needed to stay under the limit.
208 uint32_t max_capture_list_count;
209 // The number of capture lists allocated in `list` that are not currently in
210 // use. We reuse those existing-but-unused capture lists before trying to
211 // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
212 // list's length to indicate that it's not in use.
213 uint32_t free_capture_list_count;
214} CaptureListPool;
215
216/*
217 * AnalysisState - The state needed for walking the parse table when analyzing
218 * a query pattern, to determine at which steps the pattern might fail to match.
219 */
220typedef struct {
221 TSStateId parse_state;
222 TSSymbol parent_symbol;
223 uint16_t child_index;
224 TSFieldId field_id: 15;
225 bool done: 1;
226} AnalysisStateEntry;
227
228typedef struct {
229 AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH];
230 uint16_t depth;
231 uint16_t step_index;
232 TSSymbol root_symbol;
233} AnalysisState;
234
235typedef Array(AnalysisState *) AnalysisStateSet;
236
237typedef struct {
238 AnalysisStateSet states;
239 AnalysisStateSet next_states;
240 AnalysisStateSet deeper_states;
241 AnalysisStateSet state_pool;
242 Array(uint16_t) final_step_indices;
243 Array(TSSymbol) finished_parent_symbols;
244 bool did_abort;
245} QueryAnalysis;
246
247/*
248 * AnalysisSubgraph - A subset of the states in the parse table that are used
249 * in constructing nodes with a certain symbol. Each state is accompanied by
250 * some information about the possible node that could be produced in
251 * downstream states.
252 */
253typedef struct {
254 TSStateId state;
255 uint16_t production_id;
256 uint8_t child_index: 7;
257 bool done: 1;
258} AnalysisSubgraphNode;
259
260typedef struct {
261 TSSymbol symbol;
262 Array(TSStateId) start_states;
263 Array(AnalysisSubgraphNode) nodes;
264} AnalysisSubgraph;
265
266typedef Array(AnalysisSubgraph) AnalysisSubgraphArray;
267
268/*
269 * StatePredecessorMap - A map that stores the predecessors of each parse state.
270 * This is used during query analysis to determine which parse states can lead
271 * to which reduce actions.
272 */
273typedef struct {
274 TSStateId *contents;
275} StatePredecessorMap;
276
277/*
278 * TSQuery - A tree query, compiled from a string of S-expressions. The query
279 * itself is immutable. The mutable state used in the process of executing the
280 * query is stored in a `TSQueryCursor`.
281 */
282struct TSQuery {
283 SymbolTable captures;
284 SymbolTable predicate_values;
285 Array(CaptureQuantifiers) capture_quantifiers;
286 Array(QueryStep) steps;
287 Array(PatternEntry) pattern_map;
288 Array(TSQueryPredicateStep) predicate_steps;
289 Array(QueryPattern) patterns;
290 Array(StepOffset) step_offsets;
291 Array(TSFieldId) negated_fields;
292 Array(char) string_buffer;
293 Array(TSSymbol) repeat_symbols_with_rootless_patterns;
294 const TSLanguage *language;
295 uint16_t wildcard_root_pattern_count;
296};
297
298/*
299 * TSQueryCursor - A stateful struct used to execute a query on a tree.
300 */
301struct TSQueryCursor {
302 const TSQuery *query;
303 TSTreeCursor cursor;
304 Array(QueryState) states;
305 Array(QueryState) finished_states;
306 CaptureListPool capture_list_pool;
307 uint32_t depth;
308 uint32_t max_start_depth;
309 uint32_t start_byte;
310 uint32_t end_byte;
311 TSPoint start_point;
312 TSPoint end_point;
313 uint32_t next_state_id;
314 bool on_visible_node;
315 bool ascending;
316 bool halted;
317 bool did_exceed_match_limit;
318};
319
320static const TSQueryError PARENT_DONE = -1;
321static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
322static const uint16_t NONE = UINT16_MAX;
323static const TSSymbol WILDCARD_SYMBOL = 0;
324
325/**********
326 * Stream
327 **********/
328
329// Advance to the next unicode code point in the stream.
330static bool stream_advance(Stream *self) {
331 self->input += self->next_size;
332 if (self->input < self->end) {
333 uint32_t size = ts_decode_utf8(
334 (const uint8_t *)self->input,
335 (uint32_t)(self->end - self->input),
336 &self->next
337 );
338 if (size > 0) {
339 self->next_size = size;
340 return true;
341 }
342 } else {
343 self->next_size = 0;
344 self->next = '\0';
345 }
346 return false;
347}
348
349// Reset the stream to the given input position, represented as a pointer
350// into the input string.
351static void stream_reset(Stream *self, const char *input) {
352 self->input = input;
353 self->next_size = 0;
354 stream_advance(self);
355}
356
357static Stream stream_new(const char *string, uint32_t length) {
358 Stream self = {
359 .next = 0,
360 .input = string,
361 .start = string,
362 .end = string + length,
363 };
364 stream_advance(&self);
365 return self;
366}
367
368static void stream_skip_whitespace(Stream *self) {
369 for (;;) {
370 if (iswspace(self->next)) {
371 stream_advance(self);
372 } else if (self->next == ';') {
373 // skip over comments
374 stream_advance(self);
375 while (self->next && self->next != '\n') {
376 if (!stream_advance(self)) break;
377 }
378 } else {
379 break;
380 }
381 }
382}
383
384static bool stream_is_ident_start(Stream *self) {
385 return iswalnum(self->next) || self->next == '_' || self->next == '-';
386}
387
388static void stream_scan_identifier(Stream *stream) {
389 do {
390 stream_advance(stream);
391 } while (
392 iswalnum(stream->next) ||
393 stream->next == '_' ||
394 stream->next == '-' ||
395 stream->next == '.' ||
396 stream->next == '?' ||
397 stream->next == '!'
398 );
399}
400
401static uint32_t stream_offset(Stream *self) {
402 return (uint32_t)(self->input - self->start);
403}
404
405/******************
406 * CaptureListPool
407 ******************/
408
409static CaptureListPool capture_list_pool_new(void) {
410 return (CaptureListPool) {
411 .list = array_new(),
412 .empty_list = array_new(),
413 .max_capture_list_count = UINT32_MAX,
414 .free_capture_list_count = 0,
415 };
416}
417
418static void capture_list_pool_reset(CaptureListPool *self) {
419 for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
420 // This invalid size means that the list is not in use.
421 self->list.contents[i].size = UINT32_MAX;
422 }
423 self->free_capture_list_count = self->list.size;
424}
425
426static void capture_list_pool_delete(CaptureListPool *self) {
427 for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
428 array_delete(&self->list.contents[i]);
429 }
430 array_delete(&self->list);
431}
432
433static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
434 if (id >= self->list.size) return &self->empty_list;
435 return &self->list.contents[id];
436}
437
438static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
439 assert(id < self->list.size);
440 return &self->list.contents[id];
441}
442
443static bool capture_list_pool_is_empty(const CaptureListPool *self) {
444 // The capture list pool is empty if all allocated lists are in use, and we
445 // have reached the maximum allowed number of allocated lists.
446 return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
447}
448
449static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
450 // First see if any already allocated capture list is currently unused.
451 if (self->free_capture_list_count > 0) {
452 for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
453 if (self->list.contents[i].size == UINT32_MAX) {
454 array_clear(&self->list.contents[i]);
455 self->free_capture_list_count--;
456 return i;
457 }
458 }
459 }
460
461 // Otherwise allocate and initialize a new capture list, as long as that
462 // doesn't put us over the requested maximum.
463 uint32_t i = self->list.size;
464 if (i >= self->max_capture_list_count) {
465 return NONE;
466 }
467 CaptureList list;
468 array_init(&list);
469 array_push(&self->list, list);
470 return i;
471}
472
473static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
474 if (id >= self->list.size) return;
475 self->list.contents[id].size = UINT32_MAX;
476 self->free_capture_list_count++;
477}
478
479/**************
480 * Quantifiers
481 **************/
482
483static TSQuantifier quantifier_mul(
484 TSQuantifier left,
485 TSQuantifier right
486) {
487 switch (left)
488 {
489 case TSQuantifierZero:
490 return TSQuantifierZero;
491 case TSQuantifierZeroOrOne:
492 switch (right) {
493 case TSQuantifierZero:
494 return TSQuantifierZero;
495 case TSQuantifierZeroOrOne:
496 case TSQuantifierOne:
497 return TSQuantifierZeroOrOne;
498 case TSQuantifierZeroOrMore:
499 case TSQuantifierOneOrMore:
500 return TSQuantifierZeroOrMore;
501 };
502 break;
503 case TSQuantifierZeroOrMore:
504 switch (right) {
505 case TSQuantifierZero:
506 return TSQuantifierZero;
507 case TSQuantifierZeroOrOne:
508 case TSQuantifierZeroOrMore:
509 case TSQuantifierOne:
510 case TSQuantifierOneOrMore:
511 return TSQuantifierZeroOrMore;
512 };
513 break;
514 case TSQuantifierOne:
515 return right;
516 case TSQuantifierOneOrMore:
517 switch (right) {
518 case TSQuantifierZero:
519 return TSQuantifierZero;
520 case TSQuantifierZeroOrOne:
521 case TSQuantifierZeroOrMore:
522 return TSQuantifierZeroOrMore;
523 case TSQuantifierOne:
524 case TSQuantifierOneOrMore:
525 return TSQuantifierOneOrMore;
526 };
527 break;
528 }
529 return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
530}
531
532static TSQuantifier quantifier_join(
533 TSQuantifier left,
534 TSQuantifier right
535) {
536 switch (left)
537 {
538 case TSQuantifierZero:
539 switch (right) {
540 case TSQuantifierZero:
541 return TSQuantifierZero;
542 case TSQuantifierZeroOrOne:
543 case TSQuantifierOne:
544 return TSQuantifierZeroOrOne;
545 case TSQuantifierZeroOrMore:
546 case TSQuantifierOneOrMore:
547 return TSQuantifierZeroOrMore;
548 };
549 break;
550 case TSQuantifierZeroOrOne:
551 switch (right) {
552 case TSQuantifierZero:
553 case TSQuantifierZeroOrOne:
554 case TSQuantifierOne:
555 return TSQuantifierZeroOrOne;
556 break;
557 case TSQuantifierZeroOrMore:
558 case TSQuantifierOneOrMore:
559 return TSQuantifierZeroOrMore;
560 break;
561 };
562 break;
563 case TSQuantifierZeroOrMore:
564 return TSQuantifierZeroOrMore;
565 case TSQuantifierOne:
566 switch (right) {
567 case TSQuantifierZero:
568 case TSQuantifierZeroOrOne:
569 return TSQuantifierZeroOrOne;
570 case TSQuantifierZeroOrMore:
571 return TSQuantifierZeroOrMore;
572 case TSQuantifierOne:
573 return TSQuantifierOne;
574 case TSQuantifierOneOrMore:
575 return TSQuantifierOneOrMore;
576 };
577 break;
578 case TSQuantifierOneOrMore:
579 switch (right) {
580 case TSQuantifierZero:
581 case TSQuantifierZeroOrOne:
582 case TSQuantifierZeroOrMore:
583 return TSQuantifierZeroOrMore;
584 case TSQuantifierOne:
585 case TSQuantifierOneOrMore:
586 return TSQuantifierOneOrMore;
587 };
588 break;
589 }
590 return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
591}
592
593static TSQuantifier quantifier_add(
594 TSQuantifier left,
595 TSQuantifier right
596) {
597 switch (left)
598 {
599 case TSQuantifierZero:
600 return right;
601 case TSQuantifierZeroOrOne:
602 switch (right) {
603 case TSQuantifierZero:
604 return TSQuantifierZeroOrOne;
605 case TSQuantifierZeroOrOne:
606 case TSQuantifierZeroOrMore:
607 return TSQuantifierZeroOrMore;
608 case TSQuantifierOne:
609 case TSQuantifierOneOrMore:
610 return TSQuantifierOneOrMore;
611 };
612 break;
613 case TSQuantifierZeroOrMore:
614 switch (right) {
615 case TSQuantifierZero:
616 return TSQuantifierZeroOrMore;
617 case TSQuantifierZeroOrOne:
618 case TSQuantifierZeroOrMore:
619 return TSQuantifierZeroOrMore;
620 case TSQuantifierOne:
621 case TSQuantifierOneOrMore:
622 return TSQuantifierOneOrMore;
623 };
624 break;
625 case TSQuantifierOne:
626 switch (right) {
627 case TSQuantifierZero:
628 return TSQuantifierOne;
629 case TSQuantifierZeroOrOne:
630 case TSQuantifierZeroOrMore:
631 case TSQuantifierOne:
632 case TSQuantifierOneOrMore:
633 return TSQuantifierOneOrMore;
634 };
635 break;
636 case TSQuantifierOneOrMore:
637 return TSQuantifierOneOrMore;
638 }
639 return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
640}
641
642// Create new capture quantifiers structure
643static CaptureQuantifiers capture_quantifiers_new(void) {
644 return (CaptureQuantifiers) array_new();
645}
646
647// Delete capture quantifiers structure
648static void capture_quantifiers_delete(
649 CaptureQuantifiers *self
650) {
651 array_delete(self);
652}
653
654// Clear capture quantifiers structure
655static void capture_quantifiers_clear(
656 CaptureQuantifiers *self
657) {
658 array_clear(self);
659}
660
661// Replace capture quantifiers with the given quantifiers
662static void capture_quantifiers_replace(
663 CaptureQuantifiers *self,
664 CaptureQuantifiers *quantifiers
665) {
666 array_clear(self);
667 array_push_all(self, quantifiers);
668}
669
670// Return capture quantifier for the given capture id
671static TSQuantifier capture_quantifier_for_id(
672 const CaptureQuantifiers *self,
673 uint16_t id
674) {
675 return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id);
676}
677
678// Add the given quantifier to the current value for id
679static void capture_quantifiers_add_for_id(
680 CaptureQuantifiers *self,
681 uint16_t id,
682 TSQuantifier quantifier
683) {
684 if (self->size <= id) {
685 array_grow_by(self, id + 1 - self->size);
686 }
687 uint8_t *own_quantifier = array_get(self, id);
688 *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier);
689}
690
691// Point-wise add the given quantifiers to the current values
692static void capture_quantifiers_add_all(
693 CaptureQuantifiers *self,
694 CaptureQuantifiers *quantifiers
695) {
696 if (self->size < quantifiers->size) {
697 array_grow_by(self, quantifiers->size - self->size);
698 }
699 for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) {
700 uint8_t *quantifier = array_get(quantifiers, id);
701 uint8_t *own_quantifier = array_get(self, id);
702 *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
703 }
704}
705
706// Join the given quantifier with the current values
707static void capture_quantifiers_mul(
708 CaptureQuantifiers *self,
709 TSQuantifier quantifier
710) {
711 for (uint16_t id = 0; id < (uint16_t)self->size; id++) {
712 uint8_t *own_quantifier = array_get(self, id);
713 *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier);
714 }
715}
716
717// Point-wise join the quantifiers from a list of alternatives with the current values
718static void capture_quantifiers_join_all(
719 CaptureQuantifiers *self,
720 CaptureQuantifiers *quantifiers
721) {
722 if (self->size < quantifiers->size) {
723 array_grow_by(self, quantifiers->size - self->size);
724 }
725 for (uint32_t id = 0; id < quantifiers->size; id++) {
726 uint8_t *quantifier = array_get(quantifiers, id);
727 uint8_t *own_quantifier = array_get(self, id);
728 *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
729 }
730 for (uint32_t id = quantifiers->size; id < self->size; id++) {
731 uint8_t *own_quantifier = array_get(self, id);
732 *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero);
733 }
734}
735
736/**************
737 * SymbolTable
738 **************/
739
740static SymbolTable symbol_table_new(void) {
741 return (SymbolTable) {
742 .characters = array_new(),
743 .slices = array_new(),
744 };
745}
746
747static void symbol_table_delete(SymbolTable *self) {
748 array_delete(&self->characters);
749 array_delete(&self->slices);
750}
751
752static int symbol_table_id_for_name(
753 const SymbolTable *self,
754 const char *name,
755 uint32_t length
756) {
757 for (unsigned i = 0; i < self->slices.size; i++) {
758 Slice slice = self->slices.contents[i];
759 if (
760 slice.length == length &&
761 !strncmp(&self->characters.contents[slice.offset], name, length)
762 ) return i;
763 }
764 return -1;
765}
766
767static const char *symbol_table_name_for_id(
768 const SymbolTable *self,
769 uint16_t id,
770 uint32_t *length
771) {
772 Slice slice = self->slices.contents[id];
773 *length = slice.length;
774 return &self->characters.contents[slice.offset];
775}
776
777static uint16_t symbol_table_insert_name(
778 SymbolTable *self,
779 const char *name,
780 uint32_t length
781) {
782 int id = symbol_table_id_for_name(self, name, length);
783 if (id >= 0) return (uint16_t)id;
784 Slice slice = {
785 .offset = self->characters.size,
786 .length = length,
787 };
788 array_grow_by(&self->characters, length + 1);
789 memcpy(&self->characters.contents[slice.offset], name, length);
790 self->characters.contents[self->characters.size - 1] = 0;
791 array_push(&self->slices, slice);
792 return self->slices.size - 1;
793}
794
795/************
796 * QueryStep
797 ************/
798
799static QueryStep query_step__new(
800 TSSymbol symbol,
801 uint16_t depth,
802 bool is_immediate
803) {
804 QueryStep step = {
805 .symbol = symbol,
806 .depth = depth,
807 .field = 0,
808 .alternative_index = NONE,
809 .negated_field_list_id = 0,
810 .contains_captures = false,
811 .is_last_child = false,
812 .is_named = false,
813 .is_pass_through = false,
814 .is_dead_end = false,
815 .root_pattern_guaranteed = false,
816 .is_immediate = is_immediate,
817 .alternative_is_immediate = false,
818 };
819 for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
820 step.capture_ids[i] = NONE;
821 }
822 return step;
823}
824
825static void query_step__add_capture(QueryStep *self, uint16_t capture_id) {
826 for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
827 if (self->capture_ids[i] == NONE) {
828 self->capture_ids[i] = capture_id;
829 break;
830 }
831 }
832}
833
834static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
835 for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
836 if (self->capture_ids[i] == capture_id) {
837 self->capture_ids[i] = NONE;
838 while (i + 1 < MAX_STEP_CAPTURE_COUNT) {
839 if (self->capture_ids[i + 1] == NONE) break;
840 self->capture_ids[i] = self->capture_ids[i + 1];
841 self->capture_ids[i + 1] = NONE;
842 i++;
843 }
844 break;
845 }
846 }
847}
848
849/**********************
850 * StatePredecessorMap
851 **********************/
852
853static inline StatePredecessorMap state_predecessor_map_new(
854 const TSLanguage *language
855) {
856 return (StatePredecessorMap) {
857 .contents = ts_calloc(
858 (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
859 sizeof(TSStateId)
860 ),
861 };
862}
863
864static inline void state_predecessor_map_delete(StatePredecessorMap *self) {
865 ts_free(self->contents);
866}
867
868static inline void state_predecessor_map_add(
869 StatePredecessorMap *self,
870 TSStateId state,
871 TSStateId predecessor
872) {
873 size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
874 TSStateId *count = &self->contents[index];
875 if (
876 *count == 0 ||
877 (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)
878 ) {
879 (*count)++;
880 self->contents[index + *count] = predecessor;
881 }
882}
883
884static inline const TSStateId *state_predecessor_map_get(
885 const StatePredecessorMap *self,
886 TSStateId state,
887 unsigned *count
888) {
889 size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
890 *count = self->contents[index];
891 return &self->contents[index + 1];
892}
893
894/****************
895 * AnalysisState
896 ****************/
897
898static unsigned analysis_state__recursion_depth(const AnalysisState *self) {
899 unsigned result = 0;
900 for (unsigned i = 0; i < self->depth; i++) {
901 TSSymbol symbol = self->stack[i].parent_symbol;
902 for (unsigned j = 0; j < i; j++) {
903 if (self->stack[j].parent_symbol == symbol) {
904 result++;
905 break;
906 }
907 }
908 }
909 return result;
910}
911
912static inline int analysis_state__compare_position(
913 AnalysisState *const *self,
914 AnalysisState *const *other
915) {
916 for (unsigned i = 0; i < (*self)->depth; i++) {
917 if (i >= (*other)->depth) return -1;
918 if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1;
919 if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1;
920 }
921 if ((*self)->depth < (*other)->depth) return 1;
922 if ((*self)->step_index < (*other)->step_index) return -1;
923 if ((*self)->step_index > (*other)->step_index) return 1;
924 return 0;
925}
926
927static inline int analysis_state__compare(
928 AnalysisState *const *self,
929 AnalysisState *const *other
930) {
931 int result = analysis_state__compare_position(self, other);
932 if (result != 0) return result;
933 for (unsigned i = 0; i < (*self)->depth; i++) {
934 if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1;
935 if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1;
936 if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1;
937 if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1;
938 if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1;
939 if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1;
940 }
941 return 0;
942}
943
944static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) {
945 if (self->depth == 0) {
946 return &self->stack[0];
947 }
948 return &self->stack[self->depth - 1];
949}
950
951static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
952 for (unsigned i = 0; i < self->depth; i++) {
953 if (self->stack[i].parent_symbol == symbol) return true;
954 }
955 return false;
956}
957
958/******************
959 * AnalysisStateSet
960 ******************/
961
962// Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by
963// cloning one from scratch.
964static inline AnalysisState *analysis_state_pool__clone_or_reuse(
965 AnalysisStateSet *self,
966 AnalysisState *borrowed_item
967) {
968 AnalysisState *new_item;
969 if (self->size) {
970 new_item = array_pop(self);
971 } else {
972 new_item = ts_malloc(sizeof(AnalysisState));
973 }
974 *new_item = *borrowed_item;
975 return new_item;
976}
977
978// Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this
979// set. The set does not contain duplicates, so if the item is already present, it will not be
980// inserted, and no clone will be made.
981//
982// The caller retains ownership of the passed-in memory. However, the clone that is created by this
983// function will be managed by the state set.
984static inline void analysis_state_set__insert_sorted(
985 AnalysisStateSet *self,
986 AnalysisStateSet *pool,
987 AnalysisState *borrowed_item
988) {
989 unsigned index, exists;
990 array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists);
991 if (!exists) {
992 AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
993 array_insert(self, index, new_item);
994 }
995}
996
997// Inserts a clone of the passed-in item at the end position of this list.
998//
999// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function
1000// `analysis_state__compare`) than largest item already in this set. If items are inserted in the
1001// wrong order, the set will not function properly for future use.
1002//
1003// The caller retains ownership of the passed-in memory. However, the clone that is created by this
1004// function will be managed by the state set.
1005static inline void analysis_state_set__push(
1006 AnalysisStateSet *self,
1007 AnalysisStateSet *pool,
1008 AnalysisState *borrowed_item
1009) {
1010 AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
1011 array_push(self, new_item);
1012}
1013
1014// Removes all items from this set, returning it to an empty state.
1015static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) {
1016 array_push_all(pool, self);
1017 array_clear(self);
1018}
1019
1020// Releases all memory that is managed with this state set, including any items currently present.
1021// After calling this function, the set is no longer suitable for use.
1022static inline void analysis_state_set__delete(AnalysisStateSet *self) {
1023 for (unsigned i = 0; i < self->size; i++) {
1024 ts_free(self->contents[i]);
1025 }
1026 array_delete(self);
1027}
1028
1029/****************
1030 * QueryAnalyzer
1031 ****************/
1032
1033static inline QueryAnalysis query_analysis__new() {
1034 return (QueryAnalysis) {
1035 .states = array_new(),
1036 .next_states = array_new(),
1037 .deeper_states = array_new(),
1038 .state_pool = array_new(),
1039 .final_step_indices = array_new(),
1040 .finished_parent_symbols = array_new(),
1041 .did_abort = false,
1042 };
1043}
1044
1045static inline void query_analysis__delete(QueryAnalysis *self) {
1046 analysis_state_set__delete(&self->states);
1047 analysis_state_set__delete(&self->next_states);
1048 analysis_state_set__delete(&self->deeper_states);
1049 analysis_state_set__delete(&self->state_pool);
1050 array_delete(&self->final_step_indices);
1051 array_delete(&self->finished_parent_symbols);
1052}
1053
1054/***********************
1055 * AnalysisSubgraphNode
1056 ***********************/
1057
1058static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) {
1059 if (self->state < other->state) return -1;
1060 if (self->state > other->state) return 1;
1061 if (self->child_index < other->child_index) return -1;
1062 if (self->child_index > other->child_index) return 1;
1063 if (self->done < other->done) return -1;
1064 if (self->done > other->done) return 1;
1065 if (self->production_id < other->production_id) return -1;
1066 if (self->production_id > other->production_id) return 1;
1067 return 0;
1068}
1069
1070/*********
1071 * Query
1072 *********/
1073
1074// The `pattern_map` contains a mapping from TSSymbol values to indices in the
1075// `steps` array. For a given syntax node, the `pattern_map` makes it possible
1076// to quickly find the starting steps of all of the patterns whose root matches
1077// that node. Each entry has two fields: a `pattern_index`, which identifies one
1078// of the patterns in the query, and a `step_index`, which indicates the start
1079// offset of that pattern's steps within the `steps` array.
1080//
1081// The entries are sorted by the patterns' root symbols, and lookups use a
1082// binary search. This ensures that the cost of this initial lookup step
1083// scales logarithmically with the number of patterns in the query.
1084//
1085// This returns `true` if the symbol is present and `false` otherwise.
1086// If the symbol is not present `*result` is set to the index where the
1087// symbol should be inserted.
1088static inline bool ts_query__pattern_map_search(
1089 const TSQuery *self,
1090 TSSymbol needle,
1091 uint32_t *result
1092) {
1093 uint32_t base_index = self->wildcard_root_pattern_count;
1094 uint32_t size = self->pattern_map.size - base_index;
1095 if (size == 0) {
1096 *result = base_index;
1097 return false;
1098 }
1099 while (size > 1) {
1100 uint32_t half_size = size / 2;
1101 uint32_t mid_index = base_index + half_size;
1102 TSSymbol mid_symbol = self->steps.contents[
1103 self->pattern_map.contents[mid_index].step_index
1104 ].symbol;
1105 if (needle > mid_symbol) base_index = mid_index;
1106 size -= half_size;
1107 }
1108
1109 TSSymbol symbol = self->steps.contents[
1110 self->pattern_map.contents[base_index].step_index
1111 ].symbol;
1112
1113 if (needle > symbol) {
1114 base_index++;
1115 if (base_index < self->pattern_map.size) {
1116 symbol = self->steps.contents[
1117 self->pattern_map.contents[base_index].step_index
1118 ].symbol;
1119 }
1120 }
1121
1122 *result = base_index;
1123 return needle == symbol;
1124}
1125
1126// Insert a new pattern's start index into the pattern map, maintaining
1127// the pattern map's ordering invariant.
1128static inline void ts_query__pattern_map_insert(
1129 TSQuery *self,
1130 TSSymbol symbol,
1131 PatternEntry new_entry
1132) {
1133 uint32_t index;
1134 ts_query__pattern_map_search(self, symbol, &index);
1135
1136 // Ensure that the entries are sorted not only by symbol, but also
1137 // by pattern_index. This way, states for earlier patterns will be
1138 // initiated first, which allows the ordering of the states array
1139 // to be maintained more efficiently.
1140 while (index < self->pattern_map.size) {
1141 PatternEntry *entry = &self->pattern_map.contents[index];
1142 if (
1143 self->steps.contents[entry->step_index].symbol == symbol &&
1144 entry->pattern_index < new_entry.pattern_index
1145 ) {
1146 index++;
1147 } else {
1148 break;
1149 }
1150 }
1151
1152 array_insert(&self->pattern_map, index, new_entry);
1153}
1154
1155// Walk the subgraph for this non-terminal, tracking all of the possible
1156// sequences of progress within the pattern.
1157static void ts_query__perform_analysis(
1158 TSQuery *self,
1159 const AnalysisSubgraphArray *subgraphs,
1160 QueryAnalysis *analysis
1161) {
1162 unsigned recursion_depth_limit = 0;
1163 unsigned prev_final_step_count = 0;
1164 array_clear(&analysis->final_step_indices);
1165 array_clear(&analysis->finished_parent_symbols);
1166
1167 for (unsigned iteration = 0;; iteration++) {
1168 if (iteration == MAX_ANALYSIS_ITERATION_COUNT) {
1169 analysis->did_abort = true;
1170 break;
1171 }
1172
1173 #ifdef DEBUG_ANALYZE_QUERY
1174 printf("Iteration: %u. Final step indices:", iteration);
1175 for (unsigned j = 0; j < analysis->final_step_indices.size; j++) {
1176 printf(" %4u", analysis->final_step_indices.contents[j]);
1177 }
1178 printf("\n");
1179 for (unsigned j = 0; j < analysis->states.size; j++) {
1180 AnalysisState *state = analysis->states.contents[j];
1181 printf(" %3u: step: %u, stack: [", j, state->step_index);
1182 for (unsigned k = 0; k < state->depth; k++) {
1183 printf(
1184 " {%s, child: %u, state: %4u",
1185 self->language->symbol_names[state->stack[k].parent_symbol],
1186 state->stack[k].child_index,
1187 state->stack[k].parse_state
1188 );
1189 if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]);
1190 if (state->stack[k].done) printf(", DONE");
1191 printf("}");
1192 }
1193 printf(" ]\n");
1194 }
1195 #endif
1196
1197 // If no further progress can be made within the current recursion depth limit, then
1198 // bump the depth limit by one, and continue to process the states the exceeded the
1199 // limit. But only allow this if progress has been made since the last time the depth
1200 // limit was increased.
1201 if (analysis->states.size == 0) {
1202 if (
1203 analysis->deeper_states.size > 0 &&
1204 analysis->final_step_indices.size > prev_final_step_count
1205 ) {
1206 #ifdef DEBUG_ANALYZE_QUERY
1207 printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1);
1208 #endif
1209
1210 prev_final_step_count = analysis->final_step_indices.size;
1211 recursion_depth_limit++;
1212 AnalysisStateSet _states = analysis->states;
1213 analysis->states = analysis->deeper_states;
1214 analysis->deeper_states = _states;
1215 continue;
1216 }
1217
1218 break;
1219 }
1220
1221 analysis_state_set__clear(&analysis->next_states, &analysis->state_pool);
1222 for (unsigned j = 0; j < analysis->states.size; j++) {
1223 AnalysisState * const state = analysis->states.contents[j];
1224
1225 // For efficiency, it's important to avoid processing the same analysis state more
1226 // than once. To achieve this, keep the states in order of ascending position within
1227 // their hypothetical syntax trees. In each iteration of this loop, start by advancing
1228 // the states that have made the least progress. Avoid advancing states that have already
1229 // made more progress.
1230 if (analysis->next_states.size > 0) {
1231 int comparison = analysis_state__compare_position(
1232 &state,
1233 array_back(&analysis->next_states)
1234 );
1235 if (comparison == 0) {
1236 analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state);
1237 continue;
1238 } else if (comparison > 0) {
1239 #ifdef DEBUG_ANALYZE_QUERY
1240 printf("Terminate iteration at state %u\n", j);
1241 #endif
1242 while (j < analysis->states.size) {
1243 analysis_state_set__push(
1244 &analysis->next_states,
1245 &analysis->state_pool,
1246 analysis->states.contents[j]
1247 );
1248 j++;
1249 }
1250 break;
1251 }
1252 }
1253
1254 const TSStateId parse_state = analysis_state__top(state)->parse_state;
1255 const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol;
1256 const TSFieldId parent_field_id = analysis_state__top(state)->field_id;
1257 const unsigned child_index = analysis_state__top(state)->child_index;
1258 const QueryStep * const step = &self->steps.contents[state->step_index];
1259
1260 unsigned subgraph_index, exists;
1261 array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1262 if (!exists) continue;
1263 const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index];
1264
1265 // Follow every possible path in the parse table, but only visit states that
1266 // are part of the subgraph for the current symbol.
1267 LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state);
1268 while (ts_lookahead_iterator__next(&lookahead_iterator)) {
1269 TSSymbol sym = lookahead_iterator.symbol;
1270
1271 AnalysisSubgraphNode successor = {
1272 .state = parse_state,
1273 .child_index = child_index,
1274 };
1275 if (lookahead_iterator.action_count) {
1276 const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1];
1277 if (action->type == TSParseActionTypeShift) {
1278 if (!action->shift.extra) {
1279 successor.state = action->shift.state;
1280 successor.child_index++;
1281 }
1282 } else {
1283 continue;
1284 }
1285 } else if (lookahead_iterator.next_state != 0) {
1286 successor.state = lookahead_iterator.next_state;
1287 successor.child_index++;
1288 } else {
1289 continue;
1290 }
1291
1292 unsigned node_index;
1293 array_search_sorted_with(
1294 &subgraph->nodes,
1295 analysis_subgraph_node__compare, &successor,
1296 &node_index, &exists
1297 );
1298 while (node_index < subgraph->nodes.size) {
1299 AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++];
1300 if (node->state != successor.state || node->child_index != successor.child_index) break;
1301
1302 // Use the subgraph to determine what alias and field will eventually be applied
1303 // to this child node.
1304 TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
1305 TSSymbol visible_symbol = alias
1306 ? alias
1307 : self->language->symbol_metadata[sym].visible
1308 ? self->language->public_symbol_map[sym]
1309 : 0;
1310 TSFieldId field_id = parent_field_id;
1311 if (!field_id) {
1312 const TSFieldMapEntry *field_map, *field_map_end;
1313 ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
1314 for (; field_map != field_map_end; field_map++) {
1315 if (!field_map->inherited && field_map->child_index == child_index) {
1316 field_id = field_map->field_id;
1317 break;
1318 }
1319 }
1320 }
1321
1322 // Create a new state that has advanced past this hypothetical subtree.
1323 AnalysisState next_state = *state;
1324 AnalysisStateEntry *next_state_top = analysis_state__top(&next_state);
1325 next_state_top->child_index = successor.child_index;
1326 next_state_top->parse_state = successor.state;
1327 if (node->done) next_state_top->done = true;
1328
1329 // Determine if this hypothetical child node would match the current step
1330 // of the query pattern.
1331 bool does_match = false;
1332 if (visible_symbol) {
1333 does_match = true;
1334 if (step->symbol == WILDCARD_SYMBOL) {
1335 if (
1336 step->is_named &&
1337 !self->language->symbol_metadata[visible_symbol].named
1338 ) does_match = false;
1339 } else if (step->symbol != visible_symbol) {
1340 does_match = false;
1341 }
1342 if (step->field && step->field != field_id) {
1343 does_match = false;
1344 }
1345 if (
1346 step->supertype_symbol &&
1347 !analysis_state__has_supertype(state, step->supertype_symbol)
1348 ) does_match = false;
1349 }
1350
1351 // If this child is hidden, then descend into it and walk through its children.
1352 // If the top entry of the stack is at the end of its rule, then that entry can
1353 // be replaced. Otherwise, push a new entry onto the stack.
1354 else if (sym >= self->language->token_count) {
1355 if (!next_state_top->done) {
1356 if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) {
1357 #ifdef DEBUG_ANALYZE_QUERY
1358 printf("Exceeded depth limit for state %u\n", j);
1359 #endif
1360
1361 analysis->did_abort = true;
1362 continue;
1363 }
1364
1365 next_state.depth++;
1366 next_state_top = analysis_state__top(&next_state);
1367 }
1368
1369 *next_state_top = (AnalysisStateEntry) {
1370 .parse_state = parse_state,
1371 .parent_symbol = sym,
1372 .child_index = 0,
1373 .field_id = field_id,
1374 .done = false,
1375 };
1376
1377 if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) {
1378 analysis_state_set__insert_sorted(
1379 &analysis->deeper_states,
1380 &analysis->state_pool,
1381 &next_state
1382 );
1383 continue;
1384 }
1385 }
1386
1387 // Pop from the stack when this state reached the end of its current syntax node.
1388 while (next_state.depth > 0 && next_state_top->done) {
1389 next_state.depth--;
1390 next_state_top = analysis_state__top(&next_state);
1391 }
1392
1393 // If this hypothetical child did match the current step of the query pattern,
1394 // then advance to the next step at the current depth. This involves skipping
1395 // over any descendant steps of the current child.
1396 const QueryStep *next_step = step;
1397 if (does_match) {
1398 for (;;) {
1399 next_state.step_index++;
1400 next_step = &self->steps.contents[next_state.step_index];
1401 if (
1402 next_step->depth == PATTERN_DONE_MARKER ||
1403 next_step->depth <= step->depth
1404 ) break;
1405 }
1406 } else if (successor.state == parse_state) {
1407 continue;
1408 }
1409
1410 for (;;) {
1411 // Skip pass-through states. Although these states have alternatives, they are only
1412 // used to implement repetitions, and query analysis does not need to process
1413 // repetitions in order to determine whether steps are possible and definite.
1414 if (next_step->is_pass_through) {
1415 next_state.step_index++;
1416 next_step++;
1417 continue;
1418 }
1419
1420 // If the pattern is finished or hypothetical parent node is complete, then
1421 // record that matching can terminate at this step of the pattern. Otherwise,
1422 // add this state to the list of states to process on the next iteration.
1423 if (!next_step->is_dead_end) {
1424 bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth;
1425 if (did_finish_pattern) {
1426 array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol);
1427 } else if (next_state.depth == 0) {
1428 array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index);
1429 } else {
1430 analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state);
1431 }
1432 }
1433
1434 // If the state has advanced to a step with an alternative step, then add another state
1435 // at that alternative step. This process is simpler than the process of actually matching a
1436 // pattern during query execution, because for the purposes of query analysis, there is no
1437 // need to process repetitions.
1438 if (
1439 does_match &&
1440 next_step->alternative_index != NONE &&
1441 next_step->alternative_index > next_state.step_index
1442 ) {
1443 next_state.step_index = next_step->alternative_index;
1444 next_step = &self->steps.contents[next_state.step_index];
1445 } else {
1446 break;
1447 }
1448 }
1449 }
1450 }
1451 }
1452
1453 AnalysisStateSet _states = analysis->states;
1454 analysis->states = analysis->next_states;
1455 analysis->next_states = _states;
1456 }
1457}
1458
1459static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
1460 Array(uint16_t) non_rooted_pattern_start_steps = array_new();
1461 for (unsigned i = 0; i < self->pattern_map.size; i++) {
1462 PatternEntry *pattern = &self->pattern_map.contents[i];
1463 if (!pattern->is_rooted) {
1464 QueryStep *step = &self->steps.contents[pattern->step_index];
1465 if (step->symbol != WILDCARD_SYMBOL) {
1466 array_push(&non_rooted_pattern_start_steps, i);
1467 }
1468 }
1469 }
1470
1471 // Walk forward through all of the steps in the query, computing some
1472 // basic information about each step. Mark all of the steps that contain
1473 // captures, and record the indices of all of the steps that have child steps.
1474 Array(uint32_t) parent_step_indices = array_new();
1475 for (unsigned i = 0; i < self->steps.size; i++) {
1476 QueryStep *step = &self->steps.contents[i];
1477 if (step->depth == PATTERN_DONE_MARKER) {
1478 step->parent_pattern_guaranteed = true;
1479 step->root_pattern_guaranteed = true;
1480 continue;
1481 }
1482
1483 bool has_children = false;
1484 bool is_wildcard = step->symbol == WILDCARD_SYMBOL;
1485 step->contains_captures = step->capture_ids[0] != NONE;
1486 for (unsigned j = i + 1; j < self->steps.size; j++) {
1487 QueryStep *next_step = &self->steps.contents[j];
1488 if (
1489 next_step->depth == PATTERN_DONE_MARKER ||
1490 next_step->depth <= step->depth
1491 ) break;
1492 if (next_step->capture_ids[0] != NONE) {
1493 step->contains_captures = true;
1494 }
1495 if (!is_wildcard) {
1496 next_step->root_pattern_guaranteed = true;
1497 next_step->parent_pattern_guaranteed = true;
1498 }
1499 has_children = true;
1500 }
1501
1502 if (has_children && !is_wildcard) {
1503 array_push(&parent_step_indices, i);
1504 }
1505 }
1506
1507 // For every parent symbol in the query, initialize an 'analysis subgraph'.
1508 // This subgraph lists all of the states in the parse table that are directly
1509 // involved in building subtrees for this symbol.
1510 //
1511 // In addition to the parent symbols in the query, construct subgraphs for all
1512 // of the hidden symbols in the grammar, because these might occur within
1513 // one of the parent nodes, such that their children appear to belong to the
1514 // parent.
1515 AnalysisSubgraphArray subgraphs = array_new();
1516 for (unsigned i = 0; i < parent_step_indices.size; i++) {
1517 uint32_t parent_step_index = parent_step_indices.contents[i];
1518 TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
1519 AnalysisSubgraph subgraph = { .symbol = parent_symbol };
1520 array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1521 }
1522 for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) {
1523 if (!ts_language_symbol_metadata(self->language, sym).visible) {
1524 AnalysisSubgraph subgraph = { .symbol = sym };
1525 array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1526 }
1527 }
1528
1529 // Scan the parse table to find the data needed to populate these subgraphs.
1530 // Collect three things during this scan:
1531 // 1) All of the parse states where one of these symbols can start.
1532 // 2) All of the parse states where one of these symbols can end, along
1533 // with information about the node that would be created.
1534 // 3) A list of predecessor states for each state.
1535 StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
1536 for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) {
1537 unsigned subgraph_index, exists;
1538 LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state);
1539 while (ts_lookahead_iterator__next(&lookahead_iterator)) {
1540 if (lookahead_iterator.action_count) {
1541 for (unsigned i = 0; i < lookahead_iterator.action_count; i++) {
1542 const TSParseAction *action = &lookahead_iterator.actions[i];
1543 if (action->type == TSParseActionTypeReduce) {
1544 const TSSymbol *aliases, *aliases_end;
1545 ts_language_aliases_for_symbol(
1546 self->language,
1547 action->reduce.symbol,
1548 &aliases,
1549 &aliases_end
1550 );
1551 for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1552 array_search_sorted_by(
1553 &subgraphs,
1554 .symbol,
1555 *symbol,
1556 &subgraph_index,
1557 &exists
1558 );
1559 if (exists) {
1560 AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1561 if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
1562 array_push(&subgraph->nodes, ((AnalysisSubgraphNode) {
1563 .state = state,
1564 .production_id = action->reduce.production_id,
1565 .child_index = action->reduce.child_count,
1566 .done = true,
1567 }));
1568 }
1569 }
1570 }
1571 } else if (action->type == TSParseActionTypeShift && !action->shift.extra) {
1572 TSStateId next_state = action->shift.state;
1573 state_predecessor_map_add(&predecessor_map, next_state, state);
1574 }
1575 }
1576 } else if (lookahead_iterator.next_state != 0) {
1577 if (lookahead_iterator.next_state != state) {
1578 state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
1579 }
1580 if (ts_language_state_is_primary(self->language, state)) {
1581 const TSSymbol *aliases, *aliases_end;
1582 ts_language_aliases_for_symbol(
1583 self->language,
1584 lookahead_iterator.symbol,
1585 &aliases,
1586 &aliases_end
1587 );
1588 for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1589 array_search_sorted_by(
1590 &subgraphs,
1591 .symbol,
1592 *symbol,
1593 &subgraph_index,
1594 &exists
1595 );
1596 if (exists) {
1597 AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1598 if (
1599 subgraph->start_states.size == 0 ||
1600 *array_back(&subgraph->start_states) != state
1601 )
1602 array_push(&subgraph->start_states, state);
1603 }
1604 }
1605 }
1606 }
1607 }
1608 }
1609
1610 // For each subgraph, compute the preceding states by walking backward
1611 // from the end states using the predecessor map.
1612 Array(AnalysisSubgraphNode) next_nodes = array_new();
1613 for (unsigned i = 0; i < subgraphs.size; i++) {
1614 AnalysisSubgraph *subgraph = &subgraphs.contents[i];
1615 if (subgraph->nodes.size == 0) {
1616 array_delete(&subgraph->start_states);
1617 array_erase(&subgraphs, i);
1618 i--;
1619 continue;
1620 }
1621 array_assign(&next_nodes, &subgraph->nodes);
1622 while (next_nodes.size > 0) {
1623 AnalysisSubgraphNode node = array_pop(&next_nodes);
1624 if (node.child_index > 1) {
1625 unsigned predecessor_count;
1626 const TSStateId *predecessors = state_predecessor_map_get(
1627 &predecessor_map,
1628 node.state,
1629 &predecessor_count
1630 );
1631 for (unsigned j = 0; j < predecessor_count; j++) {
1632 AnalysisSubgraphNode predecessor_node = {
1633 .state = predecessors[j],
1634 .child_index = node.child_index - 1,
1635 .production_id = node.production_id,
1636 .done = false,
1637 };
1638 unsigned index, exists;
1639 array_search_sorted_with(
1640 &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node,
1641 &index, &exists
1642 );
1643 if (!exists) {
1644 array_insert(&subgraph->nodes, index, predecessor_node);
1645 array_push(&next_nodes, predecessor_node);
1646 }
1647 }
1648 }
1649 }
1650 }
1651
1652 #ifdef DEBUG_ANALYZE_QUERY
1653 printf("\nSubgraphs:\n");
1654 for (unsigned i = 0; i < subgraphs.size; i++) {
1655 AnalysisSubgraph *subgraph = &subgraphs.contents[i];
1656 printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
1657 for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1658 printf(
1659 " {state: %u}\n",
1660 subgraph->start_states.contents[j]
1661 );
1662 }
1663 for (unsigned j = 0; j < subgraph->nodes.size; j++) {
1664 AnalysisSubgraphNode *node = &subgraph->nodes.contents[j];
1665 printf(
1666 " {state: %u, child_index: %u, production_id: %u, done: %d}\n",
1667 node->state, node->child_index, node->production_id, node->done
1668 );
1669 }
1670 printf("\n");
1671 }
1672 #endif
1673
1674 // For each non-terminal pattern, determine if the pattern can successfully match,
1675 // and identify all of the possible children within the pattern where matching could fail.
1676 bool all_patterns_are_valid = true;
1677 QueryAnalysis analysis = query_analysis__new();
1678 for (unsigned i = 0; i < parent_step_indices.size; i++) {
1679 uint16_t parent_step_index = parent_step_indices.contents[i];
1680 uint16_t parent_depth = self->steps.contents[parent_step_index].depth;
1681 TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
1682 if (parent_symbol == ts_builtin_sym_error) continue;
1683
1684 // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's
1685 // root symbol is a terminal, then return an error.
1686 unsigned subgraph_index, exists;
1687 array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1688 if (!exists) {
1689 unsigned first_child_step_index = parent_step_index + 1;
1690 uint32_t j, child_exists;
1691 array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists);
1692 assert(child_exists);
1693 *error_offset = self->step_offsets.contents[j].byte_offset;
1694 all_patterns_are_valid = false;
1695 break;
1696 }
1697
1698 // Initialize an analysis state at every parse state in the table where
1699 // this parent symbol can occur.
1700 AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1701 analysis_state_set__clear(&analysis.states, &analysis.state_pool);
1702 analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
1703 for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1704 TSStateId parse_state = subgraph->start_states.contents[j];
1705 analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
1706 .step_index = parent_step_index + 1,
1707 .stack = {
1708 [0] = {
1709 .parse_state = parse_state,
1710 .parent_symbol = parent_symbol,
1711 .child_index = 0,
1712 .field_id = 0,
1713 .done = false,
1714 },
1715 },
1716 .depth = 1,
1717 .root_symbol = parent_symbol,
1718 }));
1719 }
1720
1721 #ifdef DEBUG_ANALYZE_QUERY
1722 printf(
1723 "\nWalk states for %s:\n",
1724 ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol)
1725 );
1726 #endif
1727
1728 analysis.did_abort = false;
1729 ts_query__perform_analysis(self, &subgraphs, &analysis);
1730
1731 // If this pattern could not be fully analyzed, then every step should
1732 // be considered fallible.
1733 if (analysis.did_abort) {
1734 for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
1735 QueryStep *step = &self->steps.contents[j];
1736 if (
1737 step->depth <= parent_depth ||
1738 step->depth == PATTERN_DONE_MARKER
1739 ) break;
1740 if (!step->is_dead_end) {
1741 step->parent_pattern_guaranteed = false;
1742 step->root_pattern_guaranteed = false;
1743 }
1744 }
1745 continue;
1746 }
1747
1748 // If this pattern cannot match, store the pattern index so that it can be
1749 // returned to the caller.
1750 if (analysis.finished_parent_symbols.size == 0) {
1751 assert(analysis.final_step_indices.size > 0);
1752 uint16_t impossible_step_index = *array_back(&analysis.final_step_indices);
1753 uint32_t j, impossible_exists;
1754 array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists);
1755 if (j >= self->step_offsets.size) j = self->step_offsets.size - 1;
1756 *error_offset = self->step_offsets.contents[j].byte_offset;
1757 all_patterns_are_valid = false;
1758 break;
1759 }
1760
1761 // Mark as fallible any step where a match terminated.
1762 // Later, this property will be propagated to all of the step's predecessors.
1763 for (unsigned j = 0; j < analysis.final_step_indices.size; j++) {
1764 uint32_t final_step_index = analysis.final_step_indices.contents[j];
1765 QueryStep *step = &self->steps.contents[final_step_index];
1766 if (
1767 step->depth != PATTERN_DONE_MARKER &&
1768 step->depth > parent_depth &&
1769 !step->is_dead_end
1770 ) {
1771 step->parent_pattern_guaranteed = false;
1772 step->root_pattern_guaranteed = false;
1773 }
1774 }
1775 }
1776
1777 // Mark as indefinite any step with captures that are used in predicates.
1778 Array(uint16_t) predicate_capture_ids = array_new();
1779 for (unsigned i = 0; i < self->patterns.size; i++) {
1780 QueryPattern *pattern = &self->patterns.contents[i];
1781
1782 // Gather all of the captures that are used in predicates for this pattern.
1783 array_clear(&predicate_capture_ids);
1784 for (
1785 unsigned start = pattern->predicate_step.offset,
1786 end = start + pattern->predicate_step.length,
1787 j = start; j < end; j++
1788 ) {
1789 TSQueryPredicateStep *step = &self->predicate_steps.contents[j];
1790 if (step->type == TSQueryPredicateStepTypeCapture) {
1791 uint16_t value_id = step->value_id;
1792 array_insert_sorted_by(&predicate_capture_ids, , value_id);
1793 }
1794 }
1795
1796 // Find all of the steps that have these captures.
1797 for (
1798 unsigned start = pattern->step.offset,
1799 end = start + pattern->step.length,
1800 j = start; j < end; j++
1801 ) {
1802 QueryStep *step = &self->steps.contents[j];
1803 for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
1804 uint16_t capture_id = step->capture_ids[k];
1805 if (capture_id == NONE) break;
1806 unsigned index, exists;
1807 array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists);
1808 if (exists) {
1809 step->root_pattern_guaranteed = false;
1810 break;
1811 }
1812 }
1813 }
1814 }
1815
1816 // Propagate fallibility. If a pattern is fallible at a given step, then it is
1817 // fallible at all of its preceding steps.
1818 bool done = self->steps.size == 0;
1819 while (!done) {
1820 done = true;
1821 for (unsigned i = self->steps.size - 1; i > 0; i--) {
1822 QueryStep *step = &self->steps.contents[i];
1823 if (step->depth == PATTERN_DONE_MARKER) continue;
1824
1825 // Determine if this step is definite or has definite alternatives.
1826 bool parent_pattern_guaranteed = false;
1827 for (;;) {
1828 if (step->root_pattern_guaranteed) {
1829 parent_pattern_guaranteed = true;
1830 break;
1831 }
1832 if (step->alternative_index == NONE || step->alternative_index < i) {
1833 break;
1834 }
1835 step = &self->steps.contents[step->alternative_index];
1836 }
1837
1838 // If not, mark its predecessor as indefinite.
1839 if (!parent_pattern_guaranteed) {
1840 QueryStep *prev_step = &self->steps.contents[i - 1];
1841 if (
1842 !prev_step->is_dead_end &&
1843 prev_step->depth != PATTERN_DONE_MARKER &&
1844 prev_step->root_pattern_guaranteed
1845 ) {
1846 prev_step->root_pattern_guaranteed = false;
1847 done = false;
1848 }
1849 }
1850 }
1851 }
1852
1853 #ifdef DEBUG_ANALYZE_QUERY
1854 printf("Steps:\n");
1855 for (unsigned i = 0; i < self->steps.size; i++) {
1856 QueryStep *step = &self->steps.contents[i];
1857 if (step->depth == PATTERN_DONE_MARKER) {
1858 printf(" %u: DONE\n", i);
1859 } else {
1860 printf(
1861 " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n",
1862 i,
1863 (step->symbol == WILDCARD_SYMBOL)
1864 ? "ANY"
1865 : ts_language_symbol_name(self->language, step->symbol),
1866 (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
1867 step->depth,
1868 step->parent_pattern_guaranteed,
1869 step->root_pattern_guaranteed
1870 );
1871 }
1872 }
1873 #endif
1874
1875 // Determine which repetition symbols in this language have the possibility
1876 // of matching non-rooted patterns in this query. These repetition symbols
1877 // prevent certain optimizations with range restrictions.
1878 analysis.did_abort = false;
1879 for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) {
1880 uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i];
1881 PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index];
1882
1883 analysis_state_set__clear(&analysis.states, &analysis.state_pool);
1884 analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
1885 for (unsigned j = 0; j < subgraphs.size; j++) {
1886 AnalysisSubgraph *subgraph = &subgraphs.contents[j];
1887 TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol);
1888 if (metadata.visible || metadata.named) continue;
1889
1890 for (uint32_t k = 0; k < subgraph->start_states.size; k++) {
1891 TSStateId parse_state = subgraph->start_states.contents[k];
1892 analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
1893 .step_index = pattern_entry->step_index,
1894 .stack = {
1895 [0] = {
1896 .parse_state = parse_state,
1897 .parent_symbol = subgraph->symbol,
1898 .child_index = 0,
1899 .field_id = 0,
1900 .done = false,
1901 },
1902 },
1903 .root_symbol = subgraph->symbol,
1904 .depth = 1,
1905 }));
1906 }
1907 }
1908
1909 #ifdef DEBUG_ANALYZE_QUERY
1910 printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index);
1911 #endif
1912
1913 ts_query__perform_analysis(
1914 self,
1915 &subgraphs,
1916 &analysis
1917 );
1918
1919 if (analysis.finished_parent_symbols.size > 0) {
1920 self->patterns.contents[pattern_entry->pattern_index].is_non_local = true;
1921 }
1922
1923 for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) {
1924 TSSymbol symbol = analysis.finished_parent_symbols.contents[k];
1925 array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol);
1926 }
1927 }
1928
1929 #ifdef DEBUG_ANALYZE_QUERY
1930 if (self->repeat_symbols_with_rootless_patterns.size > 0) {
1931 printf("\nRepetition symbols with rootless patterns:\n");
1932 printf("aborted analysis: %d\n", analysis.did_abort);
1933 for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) {
1934 TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i];
1935 printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol));
1936 }
1937 printf("\n");
1938 }
1939 #endif
1940
1941 // Cleanup
1942 for (unsigned i = 0; i < subgraphs.size; i++) {
1943 array_delete(&subgraphs.contents[i].start_states);
1944 array_delete(&subgraphs.contents[i].nodes);
1945 }
1946 array_delete(&subgraphs);
1947 query_analysis__delete(&analysis);
1948 array_delete(&next_nodes);
1949 array_delete(&non_rooted_pattern_start_steps);
1950 array_delete(&parent_step_indices);
1951 array_delete(&predicate_capture_ids);
1952 state_predecessor_map_delete(&predecessor_map);
1953
1954 return all_patterns_are_valid;
1955}
1956
1957static void ts_query__add_negated_fields(
1958 TSQuery *self,
1959 uint16_t step_index,
1960 TSFieldId *field_ids,
1961 uint16_t field_count
1962) {
1963 QueryStep *step = &self->steps.contents[step_index];
1964
1965 // The negated field array stores a list of field lists, separated by zeros.
1966 // Try to find the start index of an existing list that matches this new list.
1967 bool failed_match = false;
1968 unsigned match_count = 0;
1969 unsigned start_i = 0;
1970 for (unsigned i = 0; i < self->negated_fields.size; i++) {
1971 TSFieldId existing_field_id = self->negated_fields.contents[i];
1972
1973 // At each zero value, terminate the match attempt. If we've exactly
1974 // matched the new field list, then reuse this index. Otherwise,
1975 // start over the matching process.
1976 if (existing_field_id == 0) {
1977 if (match_count == field_count) {
1978 step->negated_field_list_id = start_i;
1979 return;
1980 } else {
1981 start_i = i + 1;
1982 match_count = 0;
1983 failed_match = false;
1984 }
1985 }
1986
1987 // If the existing list matches our new list so far, then advance
1988 // to the next element of the new list.
1989 else if (
1990 match_count < field_count &&
1991 existing_field_id == field_ids[match_count] &&
1992 !failed_match
1993 ) {
1994 match_count++;
1995 }
1996
1997 // Otherwise, this existing list has failed to match.
1998 else {
1999 match_count = 0;
2000 failed_match = true;
2001 }
2002 }
2003
2004 step->negated_field_list_id = self->negated_fields.size;
2005 array_extend(&self->negated_fields, field_count, field_ids);
2006 array_push(&self->negated_fields, 0);
2007}
2008
2009static TSQueryError ts_query__parse_string_literal(
2010 TSQuery *self,
2011 Stream *stream
2012) {
2013 const char *string_start = stream->input;
2014 if (stream->next != '"') return TSQueryErrorSyntax;
2015 stream_advance(stream);
2016 const char *prev_position = stream->input;
2017
2018 bool is_escaped = false;
2019 array_clear(&self->string_buffer);
2020 for (;;) {
2021 if (is_escaped) {
2022 is_escaped = false;
2023 switch (stream->next) {
2024 case 'n':
2025 array_push(&self->string_buffer, '\n');
2026 break;
2027 case 'r':
2028 array_push(&self->string_buffer, '\r');
2029 break;
2030 case 't':
2031 array_push(&self->string_buffer, '\t');
2032 break;
2033 case '0':
2034 array_push(&self->string_buffer, '\0');
2035 break;
2036 default:
2037 array_extend(&self->string_buffer, stream->next_size, stream->input);
2038 break;
2039 }
2040 prev_position = stream->input + stream->next_size;
2041 } else {
2042 if (stream->next == '\\') {
2043 array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
2044 prev_position = stream->input + 1;
2045 is_escaped = true;
2046 } else if (stream->next == '"') {
2047 array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
2048 stream_advance(stream);
2049 return TSQueryErrorNone;
2050 } else if (stream->next == '\n') {
2051 stream_reset(stream, string_start);
2052 return TSQueryErrorSyntax;
2053 }
2054 }
2055 if (!stream_advance(stream)) {
2056 stream_reset(stream, string_start);
2057 return TSQueryErrorSyntax;
2058 }
2059 }
2060}
2061
2062// Parse a single predicate associated with a pattern, adding it to the
2063// query's internal `predicate_steps` array. Predicates are arbitrary
2064// S-expressions associated with a pattern which are meant to be handled at
2065// a higher level of abstraction, such as the Rust/JavaScript bindings. They
2066// can contain '@'-prefixed capture names, double-quoted strings, and bare
2067// symbols, which also represent strings.
2068static TSQueryError ts_query__parse_predicate(
2069 TSQuery *self,
2070 Stream *stream
2071) {
2072 if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2073 const char *predicate_name = stream->input;
2074 stream_scan_identifier(stream);
2075 uint32_t length = (uint32_t)(stream->input - predicate_name);
2076 uint16_t id = symbol_table_insert_name(
2077 &self->predicate_values,
2078 predicate_name,
2079 length
2080 );
2081 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2082 .type = TSQueryPredicateStepTypeString,
2083 .value_id = id,
2084 }));
2085 stream_skip_whitespace(stream);
2086
2087 for (;;) {
2088 if (stream->next == ')') {
2089 stream_advance(stream);
2090 stream_skip_whitespace(stream);
2091 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2092 .type = TSQueryPredicateStepTypeDone,
2093 .value_id = 0,
2094 }));
2095 break;
2096 }
2097
2098 // Parse an '@'-prefixed capture name
2099 else if (stream->next == '@') {
2100 stream_advance(stream);
2101
2102 // Parse the capture name
2103 if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2104 const char *capture_name = stream->input;
2105 stream_scan_identifier(stream);
2106 uint32_t capture_length = (uint32_t)(stream->input - capture_name);
2107
2108 // Add the capture id to the first step of the pattern
2109 int capture_id = symbol_table_id_for_name(
2110 &self->captures,
2111 capture_name,
2112 capture_length
2113 );
2114 if (capture_id == -1) {
2115 stream_reset(stream, capture_name);
2116 return TSQueryErrorCapture;
2117 }
2118
2119 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2120 .type = TSQueryPredicateStepTypeCapture,
2121 .value_id = capture_id,
2122 }));
2123 }
2124
2125 // Parse a string literal
2126 else if (stream->next == '"') {
2127 TSQueryError e = ts_query__parse_string_literal(self, stream);
2128 if (e) return e;
2129 uint16_t query_id = symbol_table_insert_name(
2130 &self->predicate_values,
2131 self->string_buffer.contents,
2132 self->string_buffer.size
2133 );
2134 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2135 .type = TSQueryPredicateStepTypeString,
2136 .value_id = query_id,
2137 }));
2138 }
2139
2140 // Parse a bare symbol
2141 else if (stream_is_ident_start(stream)) {
2142 const char *symbol_start = stream->input;
2143 stream_scan_identifier(stream);
2144 uint32_t symbol_length = (uint32_t)(stream->input - symbol_start);
2145 uint16_t query_id = symbol_table_insert_name(
2146 &self->predicate_values,
2147 symbol_start,
2148 symbol_length
2149 );
2150 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2151 .type = TSQueryPredicateStepTypeString,
2152 .value_id = query_id,
2153 }));
2154 }
2155
2156 else {
2157 return TSQueryErrorSyntax;
2158 }
2159
2160 stream_skip_whitespace(stream);
2161 }
2162
2163 return 0;
2164}
2165
2166// Read one S-expression pattern from the stream, and incorporate it into
2167// the query's internal state machine representation. For nested patterns,
2168// this function calls itself recursively.
2169//
2170// The caller is responsible for passing in a dedicated CaptureQuantifiers.
2171// These should not be shared between different calls to ts_query__parse_pattern!
2172static TSQueryError ts_query__parse_pattern(
2173 TSQuery *self,
2174 Stream *stream,
2175 uint32_t depth,
2176 bool is_immediate,
2177 CaptureQuantifiers *capture_quantifiers
2178) {
2179 if (stream->next == 0) return TSQueryErrorSyntax;
2180 if (stream->next == ')' || stream->next == ']') return PARENT_DONE;
2181
2182 const uint32_t starting_step_index = self->steps.size;
2183
2184 // Store the byte offset of each step in the query.
2185 if (
2186 self->step_offsets.size == 0 ||
2187 array_back(&self->step_offsets)->step_index != starting_step_index
2188 ) {
2189 array_push(&self->step_offsets, ((StepOffset) {
2190 .step_index = starting_step_index,
2191 .byte_offset = stream_offset(stream),
2192 }));
2193 }
2194
2195 // An open bracket is the start of an alternation.
2196 if (stream->next == '[') {
2197 stream_advance(stream);
2198 stream_skip_whitespace(stream);
2199
2200 // Parse each branch, and add a placeholder step in between the branches.
2201 Array(uint32_t) branch_step_indices = array_new();
2202 CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new();
2203 for (;;) {
2204 uint32_t start_index = self->steps.size;
2205 TSQueryError e = ts_query__parse_pattern(
2206 self,
2207 stream,
2208 depth,
2209 is_immediate,
2210 &branch_capture_quantifiers
2211 );
2212
2213 if (e == PARENT_DONE) {
2214 if (stream->next == ']' && branch_step_indices.size > 0) {
2215 stream_advance(stream);
2216 break;
2217 }
2218 e = TSQueryErrorSyntax;
2219 }
2220 if (e) {
2221 capture_quantifiers_delete(&branch_capture_quantifiers);
2222 array_delete(&branch_step_indices);
2223 return e;
2224 }
2225
2226 if (start_index == starting_step_index) {
2227 capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers);
2228 } else {
2229 capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers);
2230 }
2231
2232 array_push(&branch_step_indices, start_index);
2233 array_push(&self->steps, query_step__new(0, depth, false));
2234 capture_quantifiers_clear(&branch_capture_quantifiers);
2235 }
2236 (void)array_pop(&self->steps);
2237
2238 // For all of the branches except for the last one, add the subsequent branch as an
2239 // alternative, and link the end of the branch to the current end of the steps.
2240 for (unsigned i = 0; i < branch_step_indices.size - 1; i++) {
2241 uint32_t step_index = branch_step_indices.contents[i];
2242 uint32_t next_step_index = branch_step_indices.contents[i + 1];
2243 QueryStep *start_step = &self->steps.contents[step_index];
2244 QueryStep *end_step = &self->steps.contents[next_step_index - 1];
2245 start_step->alternative_index = next_step_index;
2246 end_step->alternative_index = self->steps.size;
2247 end_step->is_dead_end = true;
2248 }
2249
2250 capture_quantifiers_delete(&branch_capture_quantifiers);
2251 array_delete(&branch_step_indices);
2252 }
2253
2254 // An open parenthesis can be the start of three possible constructs:
2255 // * A grouped sequence
2256 // * A predicate
2257 // * A named node
2258 else if (stream->next == '(') {
2259 stream_advance(stream);
2260 stream_skip_whitespace(stream);
2261
2262 // If this parenthesis is followed by a node, then it represents a grouped sequence.
2263 if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
2264 bool child_is_immediate = is_immediate;
2265 CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2266 for (;;) {
2267 if (stream->next == '.') {
2268 child_is_immediate = true;
2269 stream_advance(stream);
2270 stream_skip_whitespace(stream);
2271 }
2272 TSQueryError e = ts_query__parse_pattern(
2273 self,
2274 stream,
2275 depth,
2276 child_is_immediate,
2277 &child_capture_quantifiers
2278 );
2279 if (e == PARENT_DONE) {
2280 if (stream->next == ')') {
2281 stream_advance(stream);
2282 break;
2283 }
2284 e = TSQueryErrorSyntax;
2285 }
2286 if (e) {
2287 capture_quantifiers_delete(&child_capture_quantifiers);
2288 return e;
2289 }
2290
2291 capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2292 capture_quantifiers_clear(&child_capture_quantifiers);
2293 child_is_immediate = false;
2294 }
2295
2296 capture_quantifiers_delete(&child_capture_quantifiers);
2297 }
2298
2299 // A dot/pound character indicates the start of a predicate.
2300 else if (stream->next == '.' || stream->next == '#') {
2301 stream_advance(stream);
2302 return ts_query__parse_predicate(self, stream);
2303 }
2304
2305 // Otherwise, this parenthesis is the start of a named node.
2306 else {
2307 TSSymbol symbol;
2308
2309 // Parse a normal node name
2310 if (stream_is_ident_start(stream)) {
2311 const char *node_name = stream->input;
2312 stream_scan_identifier(stream);
2313 uint32_t length = (uint32_t)(stream->input - node_name);
2314
2315 // TODO - remove.
2316 // For temporary backward compatibility, handle predicates without the leading '#' sign.
2317 if (length > 0 && (node_name[length - 1] == '!' || node_name[length - 1] == '?')) {
2318 stream_reset(stream, node_name);
2319 return ts_query__parse_predicate(self, stream);
2320 }
2321
2322 // Parse the wildcard symbol
2323 else if (length == 1 && node_name[0] == '_') {
2324 symbol = WILDCARD_SYMBOL;
2325 }
2326
2327 else {
2328 symbol = ts_language_symbol_for_name(
2329 self->language,
2330 node_name,
2331 length,
2332 true
2333 );
2334 if (!symbol) {
2335 stream_reset(stream, node_name);
2336 return TSQueryErrorNodeType;
2337 }
2338 }
2339 } else {
2340 return TSQueryErrorSyntax;
2341 }
2342
2343 // Add a step for the node.
2344 array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2345 QueryStep *step = array_back(&self->steps);
2346 if (ts_language_symbol_metadata(self->language, symbol).supertype) {
2347 step->supertype_symbol = step->symbol;
2348 step->symbol = WILDCARD_SYMBOL;
2349 }
2350 if (symbol == WILDCARD_SYMBOL) {
2351 step->is_named = true;
2352 }
2353
2354 stream_skip_whitespace(stream);
2355
2356 if (stream->next == '/') {
2357 stream_advance(stream);
2358 if (!stream_is_ident_start(stream)) {
2359 return TSQueryErrorSyntax;
2360 }
2361
2362 const char *node_name = stream->input;
2363 stream_scan_identifier(stream);
2364 uint32_t length = (uint32_t)(stream->input - node_name);
2365
2366 step->symbol = ts_language_symbol_for_name(
2367 self->language,
2368 node_name,
2369 length,
2370 true
2371 );
2372 if (!step->symbol) {
2373 stream_reset(stream, node_name);
2374 return TSQueryErrorNodeType;
2375 }
2376
2377 stream_skip_whitespace(stream);
2378 }
2379
2380 // Parse the child patterns
2381 bool child_is_immediate = false;
2382 uint16_t last_child_step_index = 0;
2383 uint16_t negated_field_count = 0;
2384 TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT];
2385 CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2386 for (;;) {
2387 // Parse a negated field assertion
2388 if (stream->next == '!') {
2389 stream_advance(stream);
2390 stream_skip_whitespace(stream);
2391 if (!stream_is_ident_start(stream)) {
2392 capture_quantifiers_delete(&child_capture_quantifiers);
2393 return TSQueryErrorSyntax;
2394 }
2395 const char *field_name = stream->input;
2396 stream_scan_identifier(stream);
2397 uint32_t length = (uint32_t)(stream->input - field_name);
2398 stream_skip_whitespace(stream);
2399
2400 TSFieldId field_id = ts_language_field_id_for_name(
2401 self->language,
2402 field_name,
2403 length
2404 );
2405 if (!field_id) {
2406 stream->input = field_name;
2407 capture_quantifiers_delete(&child_capture_quantifiers);
2408 return TSQueryErrorField;
2409 }
2410
2411 // Keep the field ids sorted.
2412 if (negated_field_count < MAX_NEGATED_FIELD_COUNT) {
2413 negated_field_ids[negated_field_count] = field_id;
2414 negated_field_count++;
2415 }
2416
2417 continue;
2418 }
2419
2420 // Parse a sibling anchor
2421 if (stream->next == '.') {
2422 child_is_immediate = true;
2423 stream_advance(stream);
2424 stream_skip_whitespace(stream);
2425 }
2426
2427 uint16_t step_index = self->steps.size;
2428 TSQueryError e = ts_query__parse_pattern(
2429 self,
2430 stream,
2431 depth + 1,
2432 child_is_immediate,
2433 &child_capture_quantifiers
2434 );
2435 if (e == PARENT_DONE) {
2436 if (stream->next == ')') {
2437 if (child_is_immediate) {
2438 if (last_child_step_index == 0) {
2439 capture_quantifiers_delete(&child_capture_quantifiers);
2440 return TSQueryErrorSyntax;
2441 }
2442 self->steps.contents[last_child_step_index].is_last_child = true;
2443 }
2444
2445 if (negated_field_count) {
2446 ts_query__add_negated_fields(
2447 self,
2448 starting_step_index,
2449 negated_field_ids,
2450 negated_field_count
2451 );
2452 }
2453
2454 stream_advance(stream);
2455 break;
2456 }
2457 e = TSQueryErrorSyntax;
2458 }
2459 if (e) {
2460 capture_quantifiers_delete(&child_capture_quantifiers);
2461 return e;
2462 }
2463
2464 capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2465
2466 last_child_step_index = step_index;
2467 child_is_immediate = false;
2468 capture_quantifiers_clear(&child_capture_quantifiers);
2469 }
2470 capture_quantifiers_delete(&child_capture_quantifiers);
2471 }
2472 }
2473
2474 // Parse a wildcard pattern
2475 else if (stream->next == '_') {
2476 stream_advance(stream);
2477 stream_skip_whitespace(stream);
2478
2479 // Add a step that matches any kind of node
2480 array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate));
2481 }
2482
2483 // Parse a double-quoted anonymous leaf node expression
2484 else if (stream->next == '"') {
2485 const char *string_start = stream->input;
2486 TSQueryError e = ts_query__parse_string_literal(self, stream);
2487 if (e) return e;
2488
2489 // Add a step for the node
2490 TSSymbol symbol = ts_language_symbol_for_name(
2491 self->language,
2492 self->string_buffer.contents,
2493 self->string_buffer.size,
2494 false
2495 );
2496 if (!symbol) {
2497 stream_reset(stream, string_start + 1);
2498 return TSQueryErrorNodeType;
2499 }
2500 array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2501 }
2502
2503 // Parse a field-prefixed pattern
2504 else if (stream_is_ident_start(stream)) {
2505 // Parse the field name
2506 const char *field_name = stream->input;
2507 stream_scan_identifier(stream);
2508 uint32_t length = (uint32_t)(stream->input - field_name);
2509 stream_skip_whitespace(stream);
2510
2511 if (stream->next != ':') {
2512 stream_reset(stream, field_name);
2513 return TSQueryErrorSyntax;
2514 }
2515 stream_advance(stream);
2516 stream_skip_whitespace(stream);
2517
2518 // Parse the pattern
2519 CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new();
2520 TSQueryError e = ts_query__parse_pattern(
2521 self,
2522 stream,
2523 depth,
2524 is_immediate,
2525 &field_capture_quantifiers
2526 );
2527 if (e) {
2528 capture_quantifiers_delete(&field_capture_quantifiers);
2529 if (e == PARENT_DONE) e = TSQueryErrorSyntax;
2530 return e;
2531 }
2532
2533 // Add the field name to the first step of the pattern
2534 TSFieldId field_id = ts_language_field_id_for_name(
2535 self->language,
2536 field_name,
2537 length
2538 );
2539 if (!field_id) {
2540 stream->input = field_name;
2541 return TSQueryErrorField;
2542 }
2543
2544 uint32_t step_index = starting_step_index;
2545 QueryStep *step = &self->steps.contents[step_index];
2546 for (;;) {
2547 step->field = field_id;
2548 if (
2549 step->alternative_index != NONE &&
2550 step->alternative_index > step_index &&
2551 step->alternative_index < self->steps.size
2552 ) {
2553 step_index = step->alternative_index;
2554 step = &self->steps.contents[step_index];
2555 } else {
2556 break;
2557 }
2558 }
2559
2560 capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers);
2561 capture_quantifiers_delete(&field_capture_quantifiers);
2562 }
2563
2564 else {
2565 return TSQueryErrorSyntax;
2566 }
2567
2568 stream_skip_whitespace(stream);
2569
2570 // Parse suffixes modifiers for this pattern
2571 TSQuantifier quantifier = TSQuantifierOne;
2572 for (;;) {
2573 // Parse the one-or-more operator.
2574 if (stream->next == '+') {
2575 quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier);
2576
2577 stream_advance(stream);
2578 stream_skip_whitespace(stream);
2579
2580 QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2581 repeat_step.alternative_index = starting_step_index;
2582 repeat_step.is_pass_through = true;
2583 repeat_step.alternative_is_immediate = true;
2584 array_push(&self->steps, repeat_step);
2585 }
2586
2587 // Parse the zero-or-more repetition operator.
2588 else if (stream->next == '*') {
2589 quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier);
2590
2591 stream_advance(stream);
2592 stream_skip_whitespace(stream);
2593
2594 QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2595 repeat_step.alternative_index = starting_step_index;
2596 repeat_step.is_pass_through = true;
2597 repeat_step.alternative_is_immediate = true;
2598 array_push(&self->steps, repeat_step);
2599
2600 // Stop when `step->alternative_index` is `NONE` or it points to
2601 // `repeat_step` or beyond. Note that having just been pushed,
2602 // `repeat_step` occupies slot `self->steps.size - 1`.
2603 QueryStep *step = &self->steps.contents[starting_step_index];
2604 while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) {
2605 step = &self->steps.contents[step->alternative_index];
2606 }
2607 step->alternative_index = self->steps.size;
2608 }
2609
2610 // Parse the optional operator.
2611 else if (stream->next == '?') {
2612 quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier);
2613
2614 stream_advance(stream);
2615 stream_skip_whitespace(stream);
2616
2617 QueryStep *step = &self->steps.contents[starting_step_index];
2618 while (step->alternative_index != NONE && step->alternative_index < self->steps.size) {
2619 step = &self->steps.contents[step->alternative_index];
2620 }
2621 step->alternative_index = self->steps.size;
2622 }
2623
2624 // Parse an '@'-prefixed capture pattern
2625 else if (stream->next == '@') {
2626 stream_advance(stream);
2627 if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2628 const char *capture_name = stream->input;
2629 stream_scan_identifier(stream);
2630 uint32_t length = (uint32_t)(stream->input - capture_name);
2631 stream_skip_whitespace(stream);
2632
2633 // Add the capture id to the first step of the pattern
2634 uint16_t capture_id = symbol_table_insert_name(
2635 &self->captures,
2636 capture_name,
2637 length
2638 );
2639
2640 // Add the capture quantifier
2641 capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne);
2642
2643 uint32_t step_index = starting_step_index;
2644 for (;;) {
2645 QueryStep *step = &self->steps.contents[step_index];
2646 query_step__add_capture(step, capture_id);
2647 if (
2648 step->alternative_index != NONE &&
2649 step->alternative_index > step_index &&
2650 step->alternative_index < self->steps.size
2651 ) {
2652 step_index = step->alternative_index;
2653 step = &self->steps.contents[step_index];
2654 } else {
2655 break;
2656 }
2657 }
2658 }
2659
2660 // No more suffix modifiers
2661 else {
2662 break;
2663 }
2664 }
2665
2666 capture_quantifiers_mul(capture_quantifiers, quantifier);
2667
2668 return 0;
2669}
2670
2671TSQuery *ts_query_new(
2672 const TSLanguage *language,
2673 const char *source,
2674 uint32_t source_len,
2675 uint32_t *error_offset,
2676 TSQueryError *error_type
2677) {
2678 if (
2679 !language ||
2680 language->version > TREE_SITTER_LANGUAGE_VERSION ||
2681 language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
2682 ) {
2683 *error_type = TSQueryErrorLanguage;
2684 return NULL;
2685 }
2686
2687 TSQuery *self = ts_malloc(sizeof(TSQuery));
2688 *self = (TSQuery) {
2689 .steps = array_new(),
2690 .pattern_map = array_new(),
2691 .captures = symbol_table_new(),
2692 .capture_quantifiers = array_new(),
2693 .predicate_values = symbol_table_new(),
2694 .predicate_steps = array_new(),
2695 .patterns = array_new(),
2696 .step_offsets = array_new(),
2697 .string_buffer = array_new(),
2698 .negated_fields = array_new(),
2699 .repeat_symbols_with_rootless_patterns = array_new(),
2700 .wildcard_root_pattern_count = 0,
2701 .language = language,
2702 };
2703
2704 array_push(&self->negated_fields, 0);
2705
2706 // Parse all of the S-expressions in the given string.
2707 Stream stream = stream_new(source, source_len);
2708 stream_skip_whitespace(&stream);
2709 while (stream.input < stream.end) {
2710 uint32_t pattern_index = self->patterns.size;
2711 uint32_t start_step_index = self->steps.size;
2712 uint32_t start_predicate_step_index = self->predicate_steps.size;
2713 array_push(&self->patterns, ((QueryPattern) {
2714 .step = (Slice) {.offset = start_step_index},
2715 .predicate_step = (Slice) {.offset = start_predicate_step_index},
2716 .start_byte = stream_offset(&stream),
2717 .is_non_local = false,
2718 }));
2719 CaptureQuantifiers capture_quantifiers = capture_quantifiers_new();
2720 *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers);
2721 array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
2722
2723 QueryPattern *pattern = array_back(&self->patterns);
2724 pattern->step.length = self->steps.size - start_step_index;
2725 pattern->predicate_step.length = self->predicate_steps.size - start_predicate_step_index;
2726
2727 // If any pattern could not be parsed, then report the error information
2728 // and terminate.
2729 if (*error_type) {
2730 if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax;
2731 *error_offset = stream_offset(&stream);
2732 capture_quantifiers_delete(&capture_quantifiers);
2733 ts_query_delete(self);
2734 return NULL;
2735 }
2736
2737 // Maintain a list of capture quantifiers for each pattern
2738 array_push(&self->capture_quantifiers, capture_quantifiers);
2739
2740 // Maintain a map that can look up patterns for a given root symbol.
2741 uint16_t wildcard_root_alternative_index = NONE;
2742 for (;;) {
2743 QueryStep *step = &self->steps.contents[start_step_index];
2744
2745 // If a pattern has a wildcard at its root, but it has a non-wildcard child,
2746 // then optimize the matching process by skipping matching the wildcard.
2747 // Later, during the matching process, the query cursor will check that
2748 // there is a parent node, and capture it if necessary.
2749 if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) {
2750 QueryStep *second_step = &self->steps.contents[start_step_index + 1];
2751 if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) {
2752 wildcard_root_alternative_index = step->alternative_index;
2753 start_step_index += 1;
2754 step = second_step;
2755 }
2756 }
2757
2758 // Determine whether the pattern has a single root node. This affects
2759 // decisions about whether or not to start matching the pattern when
2760 // a query cursor has a range restriction or when immediately within an
2761 // error node.
2762 uint32_t start_depth = step->depth;
2763 bool is_rooted = start_depth == 0;
2764 for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) {
2765 QueryStep *child_step = &self->steps.contents[step_index];
2766 if (child_step->is_dead_end) break;
2767 if (child_step->depth == start_depth) {
2768 is_rooted = false;
2769 break;
2770 }
2771 }
2772
2773 ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) {
2774 .step_index = start_step_index,
2775 .pattern_index = pattern_index,
2776 .is_rooted = is_rooted
2777 });
2778 if (step->symbol == WILDCARD_SYMBOL) {
2779 self->wildcard_root_pattern_count++;
2780 }
2781
2782 // If there are alternatives or options at the root of the pattern,
2783 // then add multiple entries to the pattern map.
2784 if (step->alternative_index != NONE) {
2785 start_step_index = step->alternative_index;
2786 } else if (wildcard_root_alternative_index != NONE) {
2787 start_step_index = wildcard_root_alternative_index;
2788 wildcard_root_alternative_index = NONE;
2789 } else {
2790 break;
2791 }
2792 }
2793 }
2794
2795 if (!ts_query__analyze_patterns(self, error_offset)) {
2796 *error_type = TSQueryErrorStructure;
2797 ts_query_delete(self);
2798 return NULL;
2799 }
2800
2801 array_delete(&self->string_buffer);
2802 return self;
2803}
2804
2805void ts_query_delete(TSQuery *self) {
2806 if (self) {
2807 array_delete(&self->steps);
2808 array_delete(&self->pattern_map);
2809 array_delete(&self->predicate_steps);
2810 array_delete(&self->patterns);
2811 array_delete(&self->step_offsets);
2812 array_delete(&self->string_buffer);
2813 array_delete(&self->negated_fields);
2814 array_delete(&self->repeat_symbols_with_rootless_patterns);
2815 symbol_table_delete(&self->captures);
2816 symbol_table_delete(&self->predicate_values);
2817 for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) {
2818 CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index);
2819 capture_quantifiers_delete(capture_quantifiers);
2820 }
2821 array_delete(&self->capture_quantifiers);
2822 ts_free(self);
2823 }
2824}
2825
2826uint32_t ts_query_pattern_count(const TSQuery *self) {
2827 return self->patterns.size;
2828}
2829
2830uint32_t ts_query_capture_count(const TSQuery *self) {
2831 return self->captures.slices.size;
2832}
2833
2834uint32_t ts_query_string_count(const TSQuery *self) {
2835 return self->predicate_values.slices.size;
2836}
2837
2838const char *ts_query_capture_name_for_id(
2839 const TSQuery *self,
2840 uint32_t index,
2841 uint32_t *length
2842) {
2843 return symbol_table_name_for_id(&self->captures, index, length);
2844}
2845
2846TSQuantifier ts_query_capture_quantifier_for_id(
2847 const TSQuery *self,
2848 uint32_t pattern_index,
2849 uint32_t capture_index
2850) {
2851 CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index);
2852 return capture_quantifier_for_id(capture_quantifiers, capture_index);
2853}
2854
2855const char *ts_query_string_value_for_id(
2856 const TSQuery *self,
2857 uint32_t index,
2858 uint32_t *length
2859) {
2860 return symbol_table_name_for_id(&self->predicate_values, index, length);
2861}
2862
2863const TSQueryPredicateStep *ts_query_predicates_for_pattern(
2864 const TSQuery *self,
2865 uint32_t pattern_index,
2866 uint32_t *step_count
2867) {
2868 Slice slice = self->patterns.contents[pattern_index].predicate_step;
2869 *step_count = slice.length;
2870 if (self->predicate_steps.contents == NULL) {
2871 return NULL;
2872 }
2873 return &self->predicate_steps.contents[slice.offset];
2874}
2875
2876uint32_t ts_query_start_byte_for_pattern(
2877 const TSQuery *self,
2878 uint32_t pattern_index
2879) {
2880 return self->patterns.contents[pattern_index].start_byte;
2881}
2882
2883bool ts_query_is_pattern_rooted(
2884 const TSQuery *self,
2885 uint32_t pattern_index
2886) {
2887 for (unsigned i = 0; i < self->pattern_map.size; i++) {
2888 PatternEntry *entry = &self->pattern_map.contents[i];
2889 if (entry->pattern_index == pattern_index) {
2890 if (!entry->is_rooted) return false;
2891 }
2892 }
2893 return true;
2894}
2895
2896bool ts_query_is_pattern_non_local(
2897 const TSQuery *self,
2898 uint32_t pattern_index
2899) {
2900 if (pattern_index < self->patterns.size) {
2901 return self->patterns.contents[pattern_index].is_non_local;
2902 } else {
2903 return false;
2904 }
2905}
2906
2907bool ts_query_is_pattern_guaranteed_at_step(
2908 const TSQuery *self,
2909 uint32_t byte_offset
2910) {
2911 uint32_t step_index = UINT32_MAX;
2912 for (unsigned i = 0; i < self->step_offsets.size; i++) {
2913 StepOffset *step_offset = &self->step_offsets.contents[i];
2914 if (step_offset->byte_offset > byte_offset) break;
2915 step_index = step_offset->step_index;
2916 }
2917 if (step_index < self->steps.size) {
2918 return self->steps.contents[step_index].root_pattern_guaranteed;
2919 } else {
2920 return false;
2921 }
2922}
2923
2924bool ts_query__step_is_fallible(
2925 const TSQuery *self,
2926 uint16_t step_index
2927) {
2928 assert((uint32_t)step_index + 1 < self->steps.size);
2929 QueryStep *step = &self->steps.contents[step_index];
2930 QueryStep *next_step = &self->steps.contents[step_index + 1];
2931 return (
2932 next_step->depth != PATTERN_DONE_MARKER &&
2933 next_step->depth > step->depth &&
2934 !next_step->parent_pattern_guaranteed
2935 );
2936}
2937
2938void ts_query_disable_capture(
2939 TSQuery *self,
2940 const char *name,
2941 uint32_t length
2942) {
2943 // Remove capture information for any pattern step that previously
2944 // captured with the given name.
2945 int id = symbol_table_id_for_name(&self->captures, name, length);
2946 if (id != -1) {
2947 for (unsigned i = 0; i < self->steps.size; i++) {
2948 QueryStep *step = &self->steps.contents[i];
2949 query_step__remove_capture(step, id);
2950 }
2951 }
2952}
2953
2954void ts_query_disable_pattern(
2955 TSQuery *self,
2956 uint32_t pattern_index
2957) {
2958 // Remove the given pattern from the pattern map. Its steps will still
2959 // be in the `steps` array, but they will never be read.
2960 for (unsigned i = 0; i < self->pattern_map.size; i++) {
2961 PatternEntry *pattern = &self->pattern_map.contents[i];
2962 if (pattern->pattern_index == pattern_index) {
2963 array_erase(&self->pattern_map, i);
2964 i--;
2965 }
2966 }
2967}
2968
2969/***************
2970 * QueryCursor
2971 ***************/
2972
2973TSQueryCursor *ts_query_cursor_new(void) {
2974 TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
2975 *self = (TSQueryCursor) {
2976 .did_exceed_match_limit = false,
2977 .ascending = false,
2978 .halted = false,
2979 .states = array_new(),
2980 .finished_states = array_new(),
2981 .capture_list_pool = capture_list_pool_new(),
2982 .start_byte = 0,
2983 .end_byte = UINT32_MAX,
2984 .start_point = {0, 0},
2985 .end_point = POINT_MAX,
2986 .max_start_depth = UINT32_MAX,
2987 };
2988 array_reserve(&self->states, 8);
2989 array_reserve(&self->finished_states, 8);
2990 return self;
2991}
2992
2993void ts_query_cursor_delete(TSQueryCursor *self) {
2994 array_delete(&self->states);
2995 array_delete(&self->finished_states);
2996 ts_tree_cursor_delete(&self->cursor);
2997 capture_list_pool_delete(&self->capture_list_pool);
2998 ts_free(self);
2999}
3000
3001bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
3002 return self->did_exceed_match_limit;
3003}
3004
3005uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
3006 return self->capture_list_pool.max_capture_list_count;
3007}
3008
3009void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
3010 self->capture_list_pool.max_capture_list_count = limit;
3011}
3012
3013#ifdef DEBUG_EXECUTE_QUERY
3014#define LOG(...) fprintf(stderr, __VA_ARGS__)
3015#else
3016#define LOG(...)
3017#endif
3018
3019void ts_query_cursor_exec(
3020 TSQueryCursor *self,
3021 const TSQuery *query,
3022 TSNode node
3023) {
3024 if (query) {
3025 LOG("query steps:\n");
3026 for (unsigned i = 0; i < query->steps.size; i++) {
3027 QueryStep *step = &query->steps.contents[i];
3028 LOG(" %u: {", i);
3029 if (step->depth == PATTERN_DONE_MARKER) {
3030 LOG("DONE");
3031 } else if (step->is_dead_end) {
3032 LOG("dead_end");
3033 } else if (step->is_pass_through) {
3034 LOG("pass_through");
3035 } else if (step->symbol != WILDCARD_SYMBOL) {
3036 LOG("symbol: %s", query->language->symbol_names[step->symbol]);
3037 } else {
3038 LOG("symbol: *");
3039 }
3040 if (step->field) {
3041 LOG(", field: %s", query->language->field_names[step->field]);
3042 }
3043 if (step->alternative_index != NONE) {
3044 LOG(", alternative: %u", step->alternative_index);
3045 }
3046 LOG("},\n");
3047 }
3048 }
3049
3050 array_clear(&self->states);
3051 array_clear(&self->finished_states);
3052 ts_tree_cursor_reset(&self->cursor, node);
3053 capture_list_pool_reset(&self->capture_list_pool);
3054 self->on_visible_node = true;
3055 self->next_state_id = 0;
3056 self->depth = 0;
3057 self->ascending = false;
3058 self->halted = false;
3059 self->query = query;
3060 self->did_exceed_match_limit = false;
3061}
3062
3063void ts_query_cursor_set_byte_range(
3064 TSQueryCursor *self,
3065 uint32_t start_byte,
3066 uint32_t end_byte
3067) {
3068 if (end_byte == 0) {
3069 end_byte = UINT32_MAX;
3070 }
3071 self->start_byte = start_byte;
3072 self->end_byte = end_byte;
3073}
3074
3075void ts_query_cursor_set_point_range(
3076 TSQueryCursor *self,
3077 TSPoint start_point,
3078 TSPoint end_point
3079) {
3080 if (end_point.row == 0 && end_point.column == 0) {
3081 end_point = POINT_MAX;
3082 }
3083 self->start_point = start_point;
3084 self->end_point = end_point;
3085}
3086
3087// Search through all of the in-progress states, and find the captured
3088// node that occurs earliest in the document.
3089static bool ts_query_cursor__first_in_progress_capture(
3090 TSQueryCursor *self,
3091 uint32_t *state_index,
3092 uint32_t *byte_offset,
3093 uint32_t *pattern_index,
3094 bool *root_pattern_guaranteed
3095) {
3096 bool result = false;
3097 *state_index = UINT32_MAX;
3098 *byte_offset = UINT32_MAX;
3099 *pattern_index = UINT32_MAX;
3100 for (unsigned i = 0; i < self->states.size; i++) {
3101 QueryState *state = &self->states.contents[i];
3102 if (state->dead) continue;
3103
3104 const CaptureList *captures = capture_list_pool_get(
3105 &self->capture_list_pool,
3106 state->capture_list_id
3107 );
3108 if (state->consumed_capture_count >= captures->size) {
3109 continue;
3110 }
3111
3112 TSNode node = captures->contents[state->consumed_capture_count].node;
3113 if (
3114 ts_node_end_byte(node) <= self->start_byte ||
3115 point_lte(ts_node_end_point(node), self->start_point)
3116 ) {
3117 state->consumed_capture_count++;
3118 i--;
3119 continue;
3120 }
3121
3122 uint32_t node_start_byte = ts_node_start_byte(node);
3123 if (
3124 !result ||
3125 node_start_byte < *byte_offset ||
3126 (node_start_byte == *byte_offset && state->pattern_index < *pattern_index)
3127 ) {
3128 QueryStep *step = &self->query->steps.contents[state->step_index];
3129 if (root_pattern_guaranteed) {
3130 *root_pattern_guaranteed = step->root_pattern_guaranteed;
3131 } else if (step->root_pattern_guaranteed) {
3132 continue;
3133 }
3134
3135 result = true;
3136 *state_index = i;
3137 *byte_offset = node_start_byte;
3138 *pattern_index = state->pattern_index;
3139 }
3140 }
3141 return result;
3142}
3143
3144// Determine which node is first in a depth-first traversal
3145int ts_query_cursor__compare_nodes(TSNode left, TSNode right) {
3146 if (left.id != right.id) {
3147 uint32_t left_start = ts_node_start_byte(left);
3148 uint32_t right_start = ts_node_start_byte(right);
3149 if (left_start < right_start) return -1;
3150 if (left_start > right_start) return 1;
3151 uint32_t left_node_count = ts_node_end_byte(left);
3152 uint32_t right_node_count = ts_node_end_byte(right);
3153 if (left_node_count > right_node_count) return -1;
3154 if (left_node_count < right_node_count) return 1;
3155 }
3156 return 0;
3157}
3158
3159// Determine if either state contains a superset of the other state's captures.
3160void ts_query_cursor__compare_captures(
3161 TSQueryCursor *self,
3162 QueryState *left_state,
3163 QueryState *right_state,
3164 bool *left_contains_right,
3165 bool *right_contains_left
3166) {
3167 const CaptureList *left_captures = capture_list_pool_get(
3168 &self->capture_list_pool,
3169 left_state->capture_list_id
3170 );
3171 const CaptureList *right_captures = capture_list_pool_get(
3172 &self->capture_list_pool,
3173 right_state->capture_list_id
3174 );
3175 *left_contains_right = true;
3176 *right_contains_left = true;
3177 unsigned i = 0, j = 0;
3178 for (;;) {
3179 if (i < left_captures->size) {
3180 if (j < right_captures->size) {
3181 TSQueryCapture *left = &left_captures->contents[i];
3182 TSQueryCapture *right = &right_captures->contents[j];
3183 if (left->node.id == right->node.id && left->index == right->index) {
3184 i++;
3185 j++;
3186 } else {
3187 switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
3188 case -1:
3189 *right_contains_left = false;
3190 i++;
3191 break;
3192 case 1:
3193 *left_contains_right = false;
3194 j++;
3195 break;
3196 default:
3197 *right_contains_left = false;
3198 *left_contains_right = false;
3199 i++;
3200 j++;
3201 break;
3202 }
3203 }
3204 } else {
3205 *right_contains_left = false;
3206 break;
3207 }
3208 } else {
3209 if (j < right_captures->size) {
3210 *left_contains_right = false;
3211 }
3212 break;
3213 }
3214 }
3215}
3216
3217static void ts_query_cursor__add_state(
3218 TSQueryCursor *self,
3219 const PatternEntry *pattern
3220) {
3221 QueryStep *step = &self->query->steps.contents[pattern->step_index];
3222 uint32_t start_depth = self->depth - step->depth;
3223
3224 // Keep the states array in ascending order of start_depth and pattern_index,
3225 // so that it can be processed more efficiently elsewhere. Usually, there is
3226 // no work to do here because of two facts:
3227 // * States with lower start_depth are naturally added first due to the
3228 // order in which nodes are visited.
3229 // * Earlier patterns are naturally added first because of the ordering of the
3230 // pattern_map data structure that's used to initiate matches.
3231 //
3232 // This loop is only needed in cases where two conditions hold:
3233 // * A pattern consists of more than one sibling node, so that its states
3234 // remain in progress after exiting the node that started the match.
3235 // * The first node in the pattern matches against multiple nodes at the
3236 // same depth.
3237 //
3238 // An example of this is the pattern '((comment)* (function))'. If multiple
3239 // `comment` nodes appear in a row, then we may initiate a new state for this
3240 // pattern while another state for the same pattern is already in progress.
3241 // If there are multiple patterns like this in a query, then this loop will
3242 // need to execute in order to keep the states ordered by pattern_index.
3243 uint32_t index = self->states.size;
3244 while (index > 0) {
3245 QueryState *prev_state = &self->states.contents[index - 1];
3246 if (prev_state->start_depth < start_depth) break;
3247 if (prev_state->start_depth == start_depth) {
3248 // Avoid inserting an unnecessary duplicate state, which would be
3249 // immediately pruned by the longest-match criteria.
3250 if (
3251 prev_state->pattern_index == pattern->pattern_index &&
3252 prev_state->step_index == pattern->step_index
3253 ) return;
3254 if (prev_state->pattern_index <= pattern->pattern_index) break;
3255 }
3256 index--;
3257 }
3258
3259 LOG(
3260 " start state. pattern:%u, step:%u\n",
3261 pattern->pattern_index,
3262 pattern->step_index
3263 );
3264 array_insert(&self->states, index, ((QueryState) {
3265 .id = UINT32_MAX,
3266 .capture_list_id = NONE,
3267 .step_index = pattern->step_index,
3268 .pattern_index = pattern->pattern_index,
3269 .start_depth = start_depth,
3270 .consumed_capture_count = 0,
3271 .seeking_immediate_match = true,
3272 .has_in_progress_alternatives = false,
3273 .needs_parent = step->depth == 1,
3274 .dead = false,
3275 }));
3276}
3277
3278// Acquire a capture list for this state. If there are no capture lists left in the
3279// pool, this will steal the capture list from another existing state, and mark that
3280// other state as 'dead'.
3281static CaptureList *ts_query_cursor__prepare_to_capture(
3282 TSQueryCursor *self,
3283 QueryState *state,
3284 unsigned state_index_to_preserve
3285) {
3286 if (state->capture_list_id == NONE) {
3287 state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
3288
3289 // If there are no capture lists left in the pool, then terminate whichever
3290 // state has captured the earliest node in the document, and steal its
3291 // capture list.
3292 if (state->capture_list_id == NONE) {
3293 self->did_exceed_match_limit = true;
3294 uint32_t state_index, byte_offset, pattern_index;
3295 if (
3296 ts_query_cursor__first_in_progress_capture(
3297 self,
3298 &state_index,
3299 &byte_offset,
3300 &pattern_index,
3301 NULL
3302 ) &&
3303 state_index != state_index_to_preserve
3304 ) {
3305 LOG(
3306 " abandon state. index:%u, pattern:%u, offset:%u.\n",
3307 state_index, pattern_index, byte_offset
3308 );
3309 QueryState *other_state = &self->states.contents[state_index];
3310 state->capture_list_id = other_state->capture_list_id;
3311 other_state->capture_list_id = NONE;
3312 other_state->dead = true;
3313 CaptureList *list = capture_list_pool_get_mut(
3314 &self->capture_list_pool,
3315 state->capture_list_id
3316 );
3317 array_clear(list);
3318 return list;
3319 } else {
3320 LOG(" ran out of capture lists");
3321 return NULL;
3322 }
3323 }
3324 }
3325 return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
3326}
3327
3328static void ts_query_cursor__capture(
3329 TSQueryCursor *self,
3330 QueryState *state,
3331 QueryStep *step,
3332 TSNode node
3333) {
3334 if (state->dead) return;
3335 CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
3336 if (!capture_list) {
3337 state->dead = true;
3338 return;
3339 }
3340
3341 for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
3342 uint16_t capture_id = step->capture_ids[j];
3343 if (step->capture_ids[j] == NONE) break;
3344 array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
3345 LOG(
3346 " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n",
3347 ts_node_type(node),
3348 state->pattern_index,
3349 capture_id,
3350 capture_list->size
3351 );
3352 }
3353}
3354
3355// Duplicate the given state and insert the newly-created state immediately after
3356// the given state in the `states` array. Ensures that the given state reference is
3357// still valid, even if the states array is reallocated.
3358static QueryState *ts_query_cursor__copy_state(
3359 TSQueryCursor *self,
3360 QueryState **state_ref
3361) {
3362 const QueryState *state = *state_ref;
3363 uint32_t state_index = (uint32_t)(state - self->states.contents);
3364 QueryState copy = *state;
3365 copy.capture_list_id = NONE;
3366
3367 // If the state has captures, copy its capture list.
3368 if (state->capture_list_id != NONE) {
3369 CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, &copy, state_index);
3370 if (!new_captures) return NULL;
3371 const CaptureList *old_captures = capture_list_pool_get(
3372 &self->capture_list_pool,
3373 state->capture_list_id
3374 );
3375 array_push_all(new_captures, old_captures);
3376 }
3377
3378 array_insert(&self->states, state_index + 1, copy);
3379 *state_ref = &self->states.contents[state_index];
3380 return &self->states.contents[state_index + 1];
3381}
3382
3383static inline bool ts_query_cursor__should_descend(
3384 TSQueryCursor *self,
3385 bool node_intersects_range
3386) {
3387
3388 if (node_intersects_range && self->depth < self->max_start_depth) {
3389 return true;
3390 }
3391
3392 // If there are in-progress matches whose remaining steps occur
3393 // deeper in the tree, then descend.
3394 for (unsigned i = 0; i < self->states.size; i++) {
3395 QueryState *state = &self->states.contents[i];;
3396 QueryStep *next_step = &self->query->steps.contents[state->step_index];
3397 if (
3398 next_step->depth != PATTERN_DONE_MARKER &&
3399 state->start_depth + next_step->depth > self->depth
3400 ) {
3401 return true;
3402 }
3403 }
3404
3405 if (self->depth >= self->max_start_depth) {
3406 return false;
3407 }
3408
3409 // If the current node is hidden, then a non-rooted pattern might match
3410 // one if its roots inside of this node, and match another of its roots
3411 // as part of a sibling node, so we may need to descend.
3412 if (!self->on_visible_node) {
3413 // Descending into a repetition node outside of the range can be
3414 // expensive, because these nodes can have many visible children.
3415 // Avoid descending into repetition nodes unless we have already
3416 // determined that this query can match rootless patterns inside
3417 // of this type of repetition node.
3418 Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor);
3419 if (ts_subtree_is_repetition(subtree)) {
3420 bool exists;
3421 uint32_t index;
3422 array_search_sorted_by(
3423 &self->query->repeat_symbols_with_rootless_patterns,,
3424 ts_subtree_symbol(subtree),
3425 &index,
3426 &exists
3427 );
3428 return exists;
3429 }
3430
3431 return true;
3432 }
3433
3434 return false;
3435}
3436
3437// Walk the tree, processing patterns until at least one pattern finishes,
3438// If one or more patterns finish, return `true` and store their states in the
3439// `finished_states` array. Multiple patterns can finish on the same node. If
3440// there are no more matches, return `false`.
3441static inline bool ts_query_cursor__advance(
3442 TSQueryCursor *self,
3443 bool stop_on_definite_step
3444) {
3445 bool did_match = false;
3446 for (;;) {
3447 if (self->halted) {
3448 while (self->states.size > 0) {
3449 QueryState state = array_pop(&self->states);
3450 capture_list_pool_release(
3451 &self->capture_list_pool,
3452 state.capture_list_id
3453 );
3454 }
3455 }
3456
3457 if (did_match || self->halted) return did_match;
3458
3459 // Exit the current node.
3460 if (self->ascending) {
3461 if (self->on_visible_node) {
3462 LOG(
3463 "leave node. depth:%u, type:%s\n",
3464 self->depth,
3465 ts_node_type(ts_tree_cursor_current_node(&self->cursor))
3466 );
3467
3468 // After leaving a node, remove any states that cannot make further progress.
3469 uint32_t deleted_count = 0;
3470 for (unsigned i = 0, n = self->states.size; i < n; i++) {
3471 QueryState *state = &self->states.contents[i];
3472 QueryStep *step = &self->query->steps.contents[state->step_index];
3473
3474 // If a state completed its pattern inside of this node, but was deferred from finishing
3475 // in order to search for longer matches, mark it as finished.
3476 if (
3477 step->depth == PATTERN_DONE_MARKER &&
3478 (state->start_depth > self->depth || self->depth == 0)
3479 ) {
3480 LOG(" finish pattern %u\n", state->pattern_index);
3481 array_push(&self->finished_states, *state);
3482 did_match = true;
3483 deleted_count++;
3484 }
3485
3486 // If a state needed to match something within this node, then remove that state
3487 // as it has failed to match.
3488 else if (
3489 step->depth != PATTERN_DONE_MARKER &&
3490 (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth
3491 ) {
3492 LOG(
3493 " failed to match. pattern:%u, step:%u\n",
3494 state->pattern_index,
3495 state->step_index
3496 );
3497 capture_list_pool_release(
3498 &self->capture_list_pool,
3499 state->capture_list_id
3500 );
3501 deleted_count++;
3502 }
3503
3504 else if (deleted_count > 0) {
3505 self->states.contents[i - deleted_count] = *state;
3506 }
3507 }
3508 self->states.size -= deleted_count;
3509 }
3510
3511 // Leave this node by stepping to its next sibling or to its parent.
3512 switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) {
3513 case TreeCursorStepVisible:
3514 if (!self->on_visible_node) {
3515 self->depth++;
3516 self->on_visible_node = true;
3517 }
3518 self->ascending = false;
3519 break;
3520 case TreeCursorStepHidden:
3521 if (self->on_visible_node) {
3522 self->depth--;
3523 self->on_visible_node = false;
3524 }
3525 self->ascending = false;
3526 break;
3527 default:
3528 if (ts_tree_cursor_goto_parent(&self->cursor)) {
3529 self->depth--;
3530 } else {
3531 LOG("halt at root\n");
3532 self->halted = true;
3533 }
3534 }
3535 }
3536
3537 // Enter a new node.
3538 else {
3539 // Get the properties of the current node.
3540 TSNode node = ts_tree_cursor_current_node(&self->cursor);
3541 TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor);
3542 bool parent_precedes_range = !ts_node_is_null(parent_node) && (
3543 ts_node_end_byte(parent_node) <= self->start_byte ||
3544 point_lte(ts_node_end_point(parent_node), self->start_point)
3545 );
3546 bool parent_follows_range = !ts_node_is_null(parent_node) && (
3547 ts_node_start_byte(parent_node) >= self->end_byte ||
3548 point_gte(ts_node_start_point(parent_node), self->end_point)
3549 );
3550 bool node_precedes_range = parent_precedes_range || (
3551 ts_node_end_byte(node) <= self->start_byte ||
3552 point_lte(ts_node_end_point(node), self->start_point)
3553 );
3554 bool node_follows_range = parent_follows_range || (
3555 ts_node_start_byte(node) >= self->end_byte ||
3556 point_gte(ts_node_start_point(node), self->end_point)
3557 );
3558 bool parent_intersects_range = !parent_precedes_range && !parent_follows_range;
3559 bool node_intersects_range = !node_precedes_range && !node_follows_range;
3560
3561 if (self->on_visible_node) {
3562 TSSymbol symbol = ts_node_symbol(node);
3563 bool is_named = ts_node_is_named(node);
3564 bool has_later_siblings;
3565 bool has_later_named_siblings;
3566 bool can_have_later_siblings_with_this_field;
3567 TSFieldId field_id = 0;
3568 TSSymbol supertypes[8] = {0};
3569 unsigned supertype_count = 8;
3570 ts_tree_cursor_current_status(
3571 &self->cursor,
3572 &field_id,
3573 &has_later_siblings,
3574 &has_later_named_siblings,
3575 &can_have_later_siblings_with_this_field,
3576 supertypes,
3577 &supertype_count
3578 );
3579 LOG(
3580 "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
3581 self->depth,
3582 ts_node_type(node),
3583 ts_language_field_name_for_id(self->query->language, field_id),
3584 ts_node_start_point(node).row,
3585 self->states.size,
3586 self->finished_states.size
3587 );
3588
3589 bool node_is_error = symbol == ts_builtin_sym_error;
3590 bool parent_is_error =
3591 !ts_node_is_null(parent_node) &&
3592 ts_node_symbol(parent_node) == ts_builtin_sym_error;
3593
3594 // Add new states for any patterns whose root node is a wildcard.
3595 if (!node_is_error) {
3596 for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
3597 PatternEntry *pattern = &self->query->pattern_map.contents[i];
3598
3599 // If this node matches the first step of the pattern, then add a new
3600 // state at the start of this pattern.
3601 QueryStep *step = &self->query->steps.contents[pattern->step_index];
3602 uint32_t start_depth = self->depth - step->depth;
3603 if (
3604 (pattern->is_rooted ?
3605 node_intersects_range :
3606 (parent_intersects_range && !parent_is_error)) &&
3607 (!step->field || field_id == step->field) &&
3608 (!step->supertype_symbol || supertype_count > 0) &&
3609 (start_depth <= self->max_start_depth)
3610 ) {
3611 ts_query_cursor__add_state(self, pattern);
3612 }
3613 }
3614 }
3615
3616 // Add new states for any patterns whose root node matches this node.
3617 unsigned i;
3618 if (ts_query__pattern_map_search(self->query, symbol, &i)) {
3619 PatternEntry *pattern = &self->query->pattern_map.contents[i];
3620
3621 QueryStep *step = &self->query->steps.contents[pattern->step_index];
3622 uint32_t start_depth = self->depth - step->depth;
3623 do {
3624 // If this node matches the first step of the pattern, then add a new
3625 // state at the start of this pattern.
3626 if (
3627 (pattern->is_rooted ?
3628 node_intersects_range :
3629 (parent_intersects_range && !parent_is_error)) &&
3630 (!step->field || field_id == step->field) &&
3631 (start_depth <= self->max_start_depth)
3632 ) {
3633 ts_query_cursor__add_state(self, pattern);
3634 }
3635
3636 // Advance to the next pattern whose root node matches this node.
3637 i++;
3638 if (i == self->query->pattern_map.size) break;
3639 pattern = &self->query->pattern_map.contents[i];
3640 step = &self->query->steps.contents[pattern->step_index];
3641 } while (step->symbol == symbol);
3642 }
3643
3644 // Update all of the in-progress states with current node.
3645 for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) {
3646 QueryState *state = &self->states.contents[j];
3647 QueryStep *step = &self->query->steps.contents[state->step_index];
3648 state->has_in_progress_alternatives = false;
3649 copy_count = 0;
3650
3651 // Check that the node matches all of the criteria for the next
3652 // step of the pattern.
3653 if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
3654
3655 // Determine if this node matches this step of the pattern, and also
3656 // if this node can have later siblings that match this step of the
3657 // pattern.
3658 bool node_does_match = false;
3659 if (step->symbol == WILDCARD_SYMBOL) {
3660 node_does_match = !node_is_error && (is_named || !step->is_named);
3661 } else {
3662 node_does_match = symbol == step->symbol;
3663 }
3664 bool later_sibling_can_match = has_later_siblings;
3665 if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
3666 later_sibling_can_match = false;
3667 }
3668 if (step->is_last_child && has_later_named_siblings) {
3669 node_does_match = false;
3670 }
3671 if (step->supertype_symbol) {
3672 bool has_supertype = false;
3673 for (unsigned k = 0; k < supertype_count; k++) {
3674 if (supertypes[k] == step->supertype_symbol) {
3675 has_supertype = true;
3676 break;
3677 }
3678 }
3679 if (!has_supertype) node_does_match = false;
3680 }
3681 if (step->field) {
3682 if (step->field == field_id) {
3683 if (!can_have_later_siblings_with_this_field) {
3684 later_sibling_can_match = false;
3685 }
3686 } else {
3687 node_does_match = false;
3688 }
3689 }
3690
3691 if (step->negated_field_list_id) {
3692 TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id];
3693 for (;;) {
3694 TSFieldId negated_field_id = *negated_field_ids;
3695 if (negated_field_id) {
3696 negated_field_ids++;
3697 if (ts_node_child_by_field_id(node, negated_field_id).id) {
3698 node_does_match = false;
3699 break;
3700 }
3701 } else {
3702 break;
3703 }
3704 }
3705 }
3706
3707 // Remove states immediately if it is ever clear that they cannot match.
3708 if (!node_does_match) {
3709 if (!later_sibling_can_match) {
3710 LOG(
3711 " discard state. pattern:%u, step:%u\n",
3712 state->pattern_index,
3713 state->step_index
3714 );
3715 capture_list_pool_release(
3716 &self->capture_list_pool,
3717 state->capture_list_id
3718 );
3719 array_erase(&self->states, j);
3720 j--;
3721 }
3722 continue;
3723 }
3724
3725 // Some patterns can match their root node in multiple ways, capturing different
3726 // children. If this pattern step could match later children within the same
3727 // parent, then this query state cannot simply be updated in place. It must be
3728 // split into two states: one that matches this node, and one which skips over
3729 // this node, to preserve the possibility of matching later siblings.
3730 if (later_sibling_can_match && (
3731 step->contains_captures ||
3732 ts_query__step_is_fallible(self->query, state->step_index)
3733 )) {
3734 if (ts_query_cursor__copy_state(self, &state)) {
3735 LOG(
3736 " split state for capture. pattern:%u, step:%u\n",
3737 state->pattern_index,
3738 state->step_index
3739 );
3740 copy_count++;
3741 }
3742 }
3743
3744 // If this pattern started with a wildcard, such that the pattern map
3745 // actually points to the *second* step of the pattern, then check
3746 // that the node has a parent, and capture the parent node if necessary.
3747 if (state->needs_parent) {
3748 TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
3749 if (ts_node_is_null(parent)) {
3750 LOG(" missing parent node\n");
3751 state->dead = true;
3752 } else {
3753 state->needs_parent = false;
3754 QueryStep *skipped_wildcard_step = step;
3755 do {
3756 skipped_wildcard_step--;
3757 } while (
3758 skipped_wildcard_step->is_dead_end ||
3759 skipped_wildcard_step->is_pass_through ||
3760 skipped_wildcard_step->depth > 0
3761 );
3762 if (skipped_wildcard_step->capture_ids[0] != NONE) {
3763 LOG(" capture wildcard parent\n");
3764 ts_query_cursor__capture(
3765 self,
3766 state,
3767 skipped_wildcard_step,
3768 parent
3769 );
3770 }
3771 }
3772 }
3773
3774 // If the current node is captured in this pattern, add it to the capture list.
3775 if (step->capture_ids[0] != NONE) {
3776 ts_query_cursor__capture(self, state, step, node);
3777 }
3778
3779 if (state->dead) {
3780 array_erase(&self->states, j);
3781 j--;
3782 continue;
3783 }
3784
3785 // Advance this state to the next step of its pattern.
3786 state->step_index++;
3787 state->seeking_immediate_match = false;
3788 LOG(
3789 " advance state. pattern:%u, step:%u\n",
3790 state->pattern_index,
3791 state->step_index
3792 );
3793
3794 QueryStep *next_step = &self->query->steps.contents[state->step_index];
3795 if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true;
3796
3797 // If this state's next step has an alternative step, then copy the state in order
3798 // to pursue both alternatives. The alternative step itself may have an alternative,
3799 // so this is an interactive process.
3800 unsigned end_index = j + 1;
3801 for (unsigned k = j; k < end_index; k++) {
3802 QueryState *child_state = &self->states.contents[k];
3803 QueryStep *child_step = &self->query->steps.contents[child_state->step_index];
3804 if (child_step->alternative_index != NONE) {
3805 // A "dead-end" step exists only to add a non-sequential jump into the step sequence,
3806 // via its alternative index. When a state reaches a dead-end step, it jumps straight
3807 // to the step's alternative.
3808 if (child_step->is_dead_end) {
3809 child_state->step_index = child_step->alternative_index;
3810 k--;
3811 continue;
3812 }
3813
3814 // A "pass-through" step exists only to add a branch into the step sequence,
3815 // via its alternative_index. When a state reaches a pass-through step, it splits
3816 // in order to process the alternative step, and then it advances to the next step.
3817 if (child_step->is_pass_through) {
3818 child_state->step_index++;
3819 k--;
3820 }
3821
3822 QueryState *copy = ts_query_cursor__copy_state(self, &child_state);
3823 if (copy) {
3824 LOG(
3825 " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
3826 copy->pattern_index,
3827 copy->step_index,
3828 next_step->alternative_index,
3829 next_step->alternative_is_immediate,
3830 capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
3831 );
3832 end_index++;
3833 copy_count++;
3834 copy->step_index = child_step->alternative_index;
3835 if (child_step->alternative_is_immediate) {
3836 copy->seeking_immediate_match = true;
3837 }
3838 }
3839 }
3840 }
3841 }
3842
3843 for (unsigned j = 0; j < self->states.size; j++) {
3844 QueryState *state = &self->states.contents[j];
3845 if (state->dead) {
3846 array_erase(&self->states, j);
3847 j--;
3848 continue;
3849 }
3850
3851 // Enfore the longest-match criteria. When a query pattern contains optional or
3852 // repeated nodes, this is necessary to avoid multiple redundant states, where
3853 // one state has a strict subset of another state's captures.
3854 bool did_remove = false;
3855 for (unsigned k = j + 1; k < self->states.size; k++) {
3856 QueryState *other_state = &self->states.contents[k];
3857
3858 // Query states are kept in ascending order of start_depth and pattern_index.
3859 // Since the longest-match criteria is only used for deduping matches of the same
3860 // pattern and root node, we only need to perform pairwise comparisons within a
3861 // small slice of the states array.
3862 if (
3863 other_state->start_depth != state->start_depth ||
3864 other_state->pattern_index != state->pattern_index
3865 ) break;
3866
3867 bool left_contains_right, right_contains_left;
3868 ts_query_cursor__compare_captures(
3869 self,
3870 state,
3871 other_state,
3872 &left_contains_right,
3873 &right_contains_left
3874 );
3875 if (left_contains_right) {
3876 if (state->step_index == other_state->step_index) {
3877 LOG(
3878 " drop shorter state. pattern: %u, step_index: %u\n",
3879 state->pattern_index,
3880 state->step_index
3881 );
3882 capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
3883 array_erase(&self->states, k);
3884 k--;
3885 continue;
3886 }
3887 other_state->has_in_progress_alternatives = true;
3888 }
3889 if (right_contains_left) {
3890 if (state->step_index == other_state->step_index) {
3891 LOG(
3892 " drop shorter state. pattern: %u, step_index: %u\n",
3893 state->pattern_index,
3894 state->step_index
3895 );
3896 capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
3897 array_erase(&self->states, j);
3898 j--;
3899 did_remove = true;
3900 break;
3901 }
3902 state->has_in_progress_alternatives = true;
3903 }
3904 }
3905
3906 // If the state is at the end of its pattern, remove it from the list
3907 // of in-progress states and add it to the list of finished states.
3908 if (!did_remove) {
3909 LOG(
3910 " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
3911 state->pattern_index,
3912 state->start_depth,
3913 state->step_index,
3914 capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
3915 );
3916 QueryStep *next_step = &self->query->steps.contents[state->step_index];
3917 if (next_step->depth == PATTERN_DONE_MARKER) {
3918 if (state->has_in_progress_alternatives) {
3919 LOG(" defer finishing pattern %u\n", state->pattern_index);
3920 } else {
3921 LOG(" finish pattern %u\n", state->pattern_index);
3922 array_push(&self->finished_states, *state);
3923 array_erase(&self->states, (uint32_t)(state - self->states.contents));
3924 did_match = true;
3925 j--;
3926 }
3927 }
3928 }
3929 }
3930 }
3931
3932 if (ts_query_cursor__should_descend(self, node_intersects_range)) {
3933 switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) {
3934 case TreeCursorStepVisible:
3935 self->depth++;
3936 self->on_visible_node = true;
3937 continue;
3938 case TreeCursorStepHidden:
3939 self->on_visible_node = false;
3940 continue;
3941 default:
3942 break;
3943 }
3944 }
3945
3946 self->ascending = true;
3947 }
3948 }
3949}
3950
3951bool ts_query_cursor_next_match(
3952 TSQueryCursor *self,
3953 TSQueryMatch *match
3954) {
3955 if (self->finished_states.size == 0) {
3956 if (!ts_query_cursor__advance(self, false)) {
3957 return false;
3958 }
3959 }
3960
3961 QueryState *state = &self->finished_states.contents[0];
3962 if (state->id == UINT32_MAX) state->id = self->next_state_id++;
3963 match->id = state->id;
3964 match->pattern_index = state->pattern_index;
3965 const CaptureList *captures = capture_list_pool_get(
3966 &self->capture_list_pool,
3967 state->capture_list_id
3968 );
3969 match->captures = captures->contents;
3970 match->capture_count = captures->size;
3971 capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
3972 array_erase(&self->finished_states, 0);
3973 return true;
3974}
3975
3976void ts_query_cursor_remove_match(
3977 TSQueryCursor *self,
3978 uint32_t match_id
3979) {
3980 for (unsigned i = 0; i < self->finished_states.size; i++) {
3981 const QueryState *state = &self->finished_states.contents[i];
3982 if (state->id == match_id) {
3983 capture_list_pool_release(
3984 &self->capture_list_pool,
3985 state->capture_list_id
3986 );
3987 array_erase(&self->finished_states, i);
3988 return;
3989 }
3990 }
3991
3992 // Remove unfinished query states as well to prevent future
3993 // captures for a match being removed.
3994 for (unsigned i = 0; i < self->states.size; i++) {
3995 const QueryState *state = &self->states.contents[i];
3996 if (state->id == match_id) {
3997 capture_list_pool_release(
3998 &self->capture_list_pool,
3999 state->capture_list_id
4000 );
4001 array_erase(&self->states, i);
4002 return;
4003 }
4004 }
4005}
4006
4007bool ts_query_cursor_next_capture(
4008 TSQueryCursor *self,
4009 TSQueryMatch *match,
4010 uint32_t *capture_index
4011) {
4012 // The goal here is to return captures in order, even though they may not
4013 // be discovered in order, because patterns can overlap. Search for matches
4014 // until there is a finished capture that is before any unfinished capture.
4015 for (;;) {
4016 // First, find the earliest capture in an unfinished match.
4017 uint32_t first_unfinished_capture_byte;
4018 uint32_t first_unfinished_pattern_index;
4019 uint32_t first_unfinished_state_index;
4020 bool first_unfinished_state_is_definite = false;
4021 ts_query_cursor__first_in_progress_capture(
4022 self,
4023 &first_unfinished_state_index,
4024 &first_unfinished_capture_byte,
4025 &first_unfinished_pattern_index,
4026 &first_unfinished_state_is_definite
4027 );
4028
4029 // Then find the earliest capture in a finished match. It must occur
4030 // before the first capture in an *unfinished* match.
4031 QueryState *first_finished_state = NULL;
4032 uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
4033 uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
4034 for (unsigned i = 0; i < self->finished_states.size;) {
4035 QueryState *state = &self->finished_states.contents[i];
4036 const CaptureList *captures = capture_list_pool_get(
4037 &self->capture_list_pool,
4038 state->capture_list_id
4039 );
4040
4041 // Remove states whose captures are all consumed.
4042 if (state->consumed_capture_count >= captures->size) {
4043 capture_list_pool_release(
4044 &self->capture_list_pool,
4045 state->capture_list_id
4046 );
4047 array_erase(&self->finished_states, i);
4048 continue;
4049 }
4050
4051 // Skip captures that precede the cursor's start byte.
4052 TSNode node = captures->contents[state->consumed_capture_count].node;
4053 if (ts_node_end_byte(node) <= self->start_byte) {
4054 state->consumed_capture_count++;
4055 continue;
4056 }
4057
4058 uint32_t node_start_byte = ts_node_start_byte(node);
4059 if (
4060 node_start_byte < first_finished_capture_byte ||
4061 (
4062 node_start_byte == first_finished_capture_byte &&
4063 state->pattern_index < first_finished_pattern_index
4064 )
4065 ) {
4066 first_finished_state = state;
4067 first_finished_capture_byte = node_start_byte;
4068 first_finished_pattern_index = state->pattern_index;
4069 }
4070 i++;
4071 }
4072
4073 // If there is finished capture that is clearly before any unfinished
4074 // capture, then return its match, and its capture index. Internally
4075 // record the fact that the capture has been 'consumed'.
4076 QueryState *state;
4077 if (first_finished_state) {
4078 state = first_finished_state;
4079 } else if (first_unfinished_state_is_definite) {
4080 state = &self->states.contents[first_unfinished_state_index];
4081 } else {
4082 state = NULL;
4083 }
4084
4085 if (state) {
4086 if (state->id == UINT32_MAX) state->id = self->next_state_id++;
4087 match->id = state->id;
4088 match->pattern_index = state->pattern_index;
4089 const CaptureList *captures = capture_list_pool_get(
4090 &self->capture_list_pool,
4091 state->capture_list_id
4092 );
4093 match->captures = captures->contents;
4094 match->capture_count = captures->size;
4095 *capture_index = state->consumed_capture_count;
4096 state->consumed_capture_count++;
4097 return true;
4098 }
4099
4100 if (capture_list_pool_is_empty(&self->capture_list_pool)) {
4101 LOG(
4102 " abandon state. index:%u, pattern:%u, offset:%u.\n",
4103 first_unfinished_state_index,
4104 first_unfinished_pattern_index,
4105 first_unfinished_capture_byte
4106 );
4107 capture_list_pool_release(
4108 &self->capture_list_pool,
4109 self->states.contents[first_unfinished_state_index].capture_list_id
4110 );
4111 array_erase(&self->states, first_unfinished_state_index);
4112 }
4113
4114 // If there are no finished matches that are ready to be returned, then
4115 // continue finding more matches.
4116 if (
4117 !ts_query_cursor__advance(self, true) &&
4118 self->finished_states.size == 0
4119 ) return false;
4120 }
4121}
4122
4123void ts_query_cursor_set_max_start_depth(
4124 TSQueryCursor *self,
4125 uint32_t max_start_depth
4126) {
4127 self->max_start_depth = max_start_depth;
4128}
4129
4130#undef LOG
diff --git a/vendor/tree-sitter/lib/src/reduce_action.h b/vendor/tree-sitter/lib/src/reduce_action.h
new file mode 100644
index 0000000..72aff08
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/reduce_action.h
@@ -0,0 +1,34 @@
1#ifndef TREE_SITTER_REDUCE_ACTION_H_
2#define TREE_SITTER_REDUCE_ACTION_H_
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8#include "./array.h"
9#include "tree_sitter/api.h"
10
11typedef struct {
12 uint32_t count;
13 TSSymbol symbol;
14 int dynamic_precedence;
15 unsigned short production_id;
16} ReduceAction;
17
18typedef Array(ReduceAction) ReduceActionSet;
19
20static inline void ts_reduce_action_set_add(ReduceActionSet *self,
21 ReduceAction new_action) {
22 for (uint32_t i = 0; i < self->size; i++) {
23 ReduceAction action = self->contents[i];
24 if (action.symbol == new_action.symbol && action.count == new_action.count)
25 return;
26 }
27 array_push(self, new_action);
28}
29
30#ifdef __cplusplus
31}
32#endif
33
34#endif // TREE_SITTER_REDUCE_ACTION_H_
diff --git a/vendor/tree-sitter/lib/src/reusable_node.h b/vendor/tree-sitter/lib/src/reusable_node.h
new file mode 100644
index 0000000..63fe3c1
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/reusable_node.h
@@ -0,0 +1,95 @@
1#include "./subtree.h"
2
3typedef struct {
4 Subtree tree;
5 uint32_t child_index;
6 uint32_t byte_offset;
7} StackEntry;
8
9typedef struct {
10 Array(StackEntry) stack;
11 Subtree last_external_token;
12} ReusableNode;
13
14static inline ReusableNode reusable_node_new(void) {
15 return (ReusableNode) {array_new(), NULL_SUBTREE};
16}
17
18static inline void reusable_node_clear(ReusableNode *self) {
19 array_clear(&self->stack);
20 self->last_external_token = NULL_SUBTREE;
21}
22
23static inline Subtree reusable_node_tree(ReusableNode *self) {
24 return self->stack.size > 0
25 ? self->stack.contents[self->stack.size - 1].tree
26 : NULL_SUBTREE;
27}
28
29static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
30 return self->stack.size > 0
31 ? self->stack.contents[self->stack.size - 1].byte_offset
32 : UINT32_MAX;
33}
34
35static inline void reusable_node_delete(ReusableNode *self) {
36 array_delete(&self->stack);
37}
38
39static inline void reusable_node_advance(ReusableNode *self) {
40 StackEntry last_entry = *array_back(&self->stack);
41 uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
42 if (ts_subtree_has_external_tokens(last_entry.tree)) {
43 self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
44 }
45
46 Subtree tree;
47 uint32_t next_index;
48 do {
49 StackEntry popped_entry = array_pop(&self->stack);
50 next_index = popped_entry.child_index + 1;
51 if (self->stack.size == 0) return;
52 tree = array_back(&self->stack)->tree;
53 } while (ts_subtree_child_count(tree) <= next_index);
54
55 array_push(&self->stack, ((StackEntry) {
56 .tree = ts_subtree_children(tree)[next_index],
57 .child_index = next_index,
58 .byte_offset = byte_offset,
59 }));
60}
61
62static inline bool reusable_node_descend(ReusableNode *self) {
63 StackEntry last_entry = *array_back(&self->stack);
64 if (ts_subtree_child_count(last_entry.tree) > 0) {
65 array_push(&self->stack, ((StackEntry) {
66 .tree = ts_subtree_children(last_entry.tree)[0],
67 .child_index = 0,
68 .byte_offset = last_entry.byte_offset,
69 }));
70 return true;
71 } else {
72 return false;
73 }
74}
75
76static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
77 while (reusable_node_descend(self)) {}
78 reusable_node_advance(self);
79}
80
81static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
82 reusable_node_clear(self);
83 array_push(&self->stack, ((StackEntry) {
84 .tree = tree,
85 .child_index = 0,
86 .byte_offset = 0,
87 }));
88
89 // Never reuse the root node, because it has a non-standard internal structure
90 // due to transformations that are applied when it is accepted: adding the EOF
91 // child and any extra children.
92 if (!reusable_node_descend(self)) {
93 reusable_node_clear(self);
94 }
95}
diff --git a/vendor/tree-sitter/lib/src/stack.c b/vendor/tree-sitter/lib/src/stack.c
new file mode 100644
index 0000000..3484635
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/stack.c
@@ -0,0 +1,897 @@
1#include "./alloc.h"
2#include "./language.h"
3#include "./subtree.h"
4#include "./array.h"
5#include "./stack.h"
6#include "./length.h"
7#include <assert.h>
8#include <stdio.h>
9
10#define MAX_LINK_COUNT 8
11#define MAX_NODE_POOL_SIZE 50
12#define MAX_ITERATOR_COUNT 64
13
14#if defined _WIN32 && !defined __GNUC__
15#define inline __forceinline
16#else
17#define inline static inline __attribute__((always_inline))
18#endif
19
20typedef struct StackNode StackNode;
21
22typedef struct {
23 StackNode *node;
24 Subtree subtree;
25 bool is_pending;
26} StackLink;
27
28struct StackNode {
29 TSStateId state;
30 Length position;
31 StackLink links[MAX_LINK_COUNT];
32 short unsigned int link_count;
33 uint32_t ref_count;
34 unsigned error_cost;
35 unsigned node_count;
36 int dynamic_precedence;
37};
38
39typedef struct {
40 StackNode *node;
41 SubtreeArray subtrees;
42 uint32_t subtree_count;
43 bool is_pending;
44} StackIterator;
45
46typedef Array(StackNode *) StackNodeArray;
47
48typedef enum {
49 StackStatusActive,
50 StackStatusPaused,
51 StackStatusHalted,
52} StackStatus;
53
54typedef struct {
55 StackNode *node;
56 StackSummary *summary;
57 unsigned node_count_at_last_error;
58 Subtree last_external_token;
59 Subtree lookahead_when_paused;
60 StackStatus status;
61} StackHead;
62
63struct Stack {
64 Array(StackHead) heads;
65 StackSliceArray slices;
66 Array(StackIterator) iterators;
67 StackNodeArray node_pool;
68 StackNode *base_node;
69 SubtreePool *subtree_pool;
70};
71
72typedef unsigned StackAction;
73enum {
74 StackActionNone,
75 StackActionStop = 1,
76 StackActionPop = 2,
77};
78
79typedef StackAction (*StackCallback)(void *, const StackIterator *);
80
81static void stack_node_retain(StackNode *self) {
82 if (!self)
83 return;
84 assert(self->ref_count > 0);
85 self->ref_count++;
86 assert(self->ref_count != 0);
87}
88
89static void stack_node_release(
90 StackNode *self,
91 StackNodeArray *pool,
92 SubtreePool *subtree_pool
93) {
94recur:
95 assert(self->ref_count != 0);
96 self->ref_count--;
97 if (self->ref_count > 0) return;
98
99 StackNode *first_predecessor = NULL;
100 if (self->link_count > 0) {
101 for (unsigned i = self->link_count - 1; i > 0; i--) {
102 StackLink link = self->links[i];
103 if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
104 stack_node_release(link.node, pool, subtree_pool);
105 }
106 StackLink link = self->links[0];
107 if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
108 first_predecessor = self->links[0].node;
109 }
110
111 if (pool->size < MAX_NODE_POOL_SIZE) {
112 array_push(pool, self);
113 } else {
114 ts_free(self);
115 }
116
117 if (first_predecessor) {
118 self = first_predecessor;
119 goto recur;
120 }
121}
122
123/// Get the number of nodes in the subtree, for the purpose of measuring
124/// how much progress has been made by a given version of the stack.
125static uint32_t stack__subtree_node_count(Subtree subtree) {
126 uint32_t count = ts_subtree_visible_descendant_count(subtree);
127 if (ts_subtree_visible(subtree)) count++;
128
129 // Count intermediate error nodes even though they are not visible,
130 // because a stack version's node count is used to check whether it
131 // has made any progress since the last time it encountered an error.
132 if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++;
133
134 return count;
135}
136
137static StackNode *stack_node_new(
138 StackNode *previous_node,
139 Subtree subtree,
140 bool is_pending,
141 TSStateId state,
142 StackNodeArray *pool
143) {
144 StackNode *node = pool->size > 0
145 ? array_pop(pool)
146 : ts_malloc(sizeof(StackNode));
147 *node = (StackNode) {
148 .ref_count = 1,
149 .link_count = 0,
150 .state = state
151 };
152
153 if (previous_node) {
154 node->link_count = 1;
155 node->links[0] = (StackLink) {
156 .node = previous_node,
157 .subtree = subtree,
158 .is_pending = is_pending,
159 };
160
161 node->position = previous_node->position;
162 node->error_cost = previous_node->error_cost;
163 node->dynamic_precedence = previous_node->dynamic_precedence;
164 node->node_count = previous_node->node_count;
165
166 if (subtree.ptr) {
167 node->error_cost += ts_subtree_error_cost(subtree);
168 node->position = length_add(node->position, ts_subtree_total_size(subtree));
169 node->node_count += stack__subtree_node_count(subtree);
170 node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree);
171 }
172 } else {
173 node->position = length_zero();
174 node->error_cost = 0;
175 }
176
177 return node;
178}
179
180static bool stack__subtree_is_equivalent(Subtree left, Subtree right) {
181 if (left.ptr == right.ptr) return true;
182 if (!left.ptr || !right.ptr) return false;
183
184 // Symbols must match
185 if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false;
186
187 // If both have errors, don't bother keeping both.
188 if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true;
189
190 return (
191 ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes &&
192 ts_subtree_size(left).bytes == ts_subtree_size(right).bytes &&
193 ts_subtree_child_count(left) == ts_subtree_child_count(right) &&
194 ts_subtree_extra(left) == ts_subtree_extra(right) &&
195 ts_subtree_external_scanner_state_eq(left, right)
196 );
197}
198
199static void stack_node_add_link(
200 StackNode *self,
201 StackLink link,
202 SubtreePool *subtree_pool
203) {
204 if (link.node == self) return;
205
206 for (int i = 0; i < self->link_count; i++) {
207 StackLink *existing_link = &self->links[i];
208 if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) {
209 // In general, we preserve ambiguities until they are removed from the stack
210 // during a pop operation where multiple paths lead to the same node. But in
211 // the special case where two links directly connect the same pair of nodes,
212 // we can safely remove the ambiguity ahead of time without changing behavior.
213 if (existing_link->node == link.node) {
214 if (
215 ts_subtree_dynamic_precedence(link.subtree) >
216 ts_subtree_dynamic_precedence(existing_link->subtree)
217 ) {
218 ts_subtree_retain(link.subtree);
219 ts_subtree_release(subtree_pool, existing_link->subtree);
220 existing_link->subtree = link.subtree;
221 self->dynamic_precedence =
222 link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree);
223 }
224 return;
225 }
226
227 // If the previous nodes are mergeable, merge them recursively.
228 if (
229 existing_link->node->state == link.node->state &&
230 existing_link->node->position.bytes == link.node->position.bytes
231 ) {
232 for (int j = 0; j < link.node->link_count; j++) {
233 stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool);
234 }
235 int32_t dynamic_precedence = link.node->dynamic_precedence;
236 if (link.subtree.ptr) {
237 dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
238 }
239 if (dynamic_precedence > self->dynamic_precedence) {
240 self->dynamic_precedence = dynamic_precedence;
241 }
242 return;
243 }
244 }
245 }
246
247 if (self->link_count == MAX_LINK_COUNT) return;
248
249 stack_node_retain(link.node);
250 unsigned node_count = link.node->node_count;
251 int dynamic_precedence = link.node->dynamic_precedence;
252 self->links[self->link_count++] = link;
253
254 if (link.subtree.ptr) {
255 ts_subtree_retain(link.subtree);
256 node_count += stack__subtree_node_count(link.subtree);
257 dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
258 }
259
260 if (node_count > self->node_count) self->node_count = node_count;
261 if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence;
262}
263
264static void stack_head_delete(
265 StackHead *self,
266 StackNodeArray *pool,
267 SubtreePool *subtree_pool
268) {
269 if (self->node) {
270 if (self->last_external_token.ptr) {
271 ts_subtree_release(subtree_pool, self->last_external_token);
272 }
273 if (self->lookahead_when_paused.ptr) {
274 ts_subtree_release(subtree_pool, self->lookahead_when_paused);
275 }
276 if (self->summary) {
277 array_delete(self->summary);
278 ts_free(self->summary);
279 }
280 stack_node_release(self->node, pool, subtree_pool);
281 }
282}
283
284static StackVersion ts_stack__add_version(
285 Stack *self,
286 StackVersion original_version,
287 StackNode *node
288) {
289 StackHead head = {
290 .node = node,
291 .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error,
292 .last_external_token = self->heads.contents[original_version].last_external_token,
293 .status = StackStatusActive,
294 .lookahead_when_paused = NULL_SUBTREE,
295 };
296 array_push(&self->heads, head);
297 stack_node_retain(node);
298 if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token);
299 return (StackVersion)(self->heads.size - 1);
300}
301
302static void ts_stack__add_slice(
303 Stack *self,
304 StackVersion original_version,
305 StackNode *node,
306 SubtreeArray *subtrees
307) {
308 for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
309 StackVersion version = self->slices.contents[i].version;
310 if (self->heads.contents[version].node == node) {
311 StackSlice slice = {*subtrees, version};
312 array_insert(&self->slices, i + 1, slice);
313 return;
314 }
315 }
316
317 StackVersion version = ts_stack__add_version(self, original_version, node);
318 StackSlice slice = { *subtrees, version };
319 array_push(&self->slices, slice);
320}
321
322static StackSliceArray stack__iter(
323 Stack *self,
324 StackVersion version,
325 StackCallback callback,
326 void *payload,
327 int goal_subtree_count
328) {
329 array_clear(&self->slices);
330 array_clear(&self->iterators);
331
332 StackHead *head = array_get(&self->heads, version);
333 StackIterator new_iterator = {
334 .node = head->node,
335 .subtrees = array_new(),
336 .subtree_count = 0,
337 .is_pending = true,
338 };
339
340 bool include_subtrees = false;
341 if (goal_subtree_count >= 0) {
342 include_subtrees = true;
343 array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree));
344 }
345
346 array_push(&self->iterators, new_iterator);
347
348 while (self->iterators.size > 0) {
349 for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
350 StackIterator *iterator = &self->iterators.contents[i];
351 StackNode *node = iterator->node;
352
353 StackAction action = callback(payload, iterator);
354 bool should_pop = action & StackActionPop;
355 bool should_stop = action & StackActionStop || node->link_count == 0;
356
357 if (should_pop) {
358 SubtreeArray subtrees = iterator->subtrees;
359 if (!should_stop) {
360 ts_subtree_array_copy(subtrees, &subtrees);
361 }
362 ts_subtree_array_reverse(&subtrees);
363 ts_stack__add_slice(
364 self,
365 version,
366 node,
367 &subtrees
368 );
369 }
370
371 if (should_stop) {
372 if (!should_pop) {
373 ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
374 }
375 array_erase(&self->iterators, i);
376 i--, size--;
377 continue;
378 }
379
380 for (uint32_t j = 1; j <= node->link_count; j++) {
381 StackIterator *next_iterator;
382 StackLink link;
383 if (j == node->link_count) {
384 link = node->links[0];
385 next_iterator = &self->iterators.contents[i];
386 } else {
387 if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
388 link = node->links[j];
389 StackIterator current_iterator = self->iterators.contents[i];
390 array_push(&self->iterators, current_iterator);
391 next_iterator = array_back(&self->iterators);
392 ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
393 }
394
395 next_iterator->node = link.node;
396 if (link.subtree.ptr) {
397 if (include_subtrees) {
398 array_push(&next_iterator->subtrees, link.subtree);
399 ts_subtree_retain(link.subtree);
400 }
401
402 if (!ts_subtree_extra(link.subtree)) {
403 next_iterator->subtree_count++;
404 if (!link.is_pending) {
405 next_iterator->is_pending = false;
406 }
407 }
408 } else {
409 next_iterator->subtree_count++;
410 next_iterator->is_pending = false;
411 }
412 }
413 }
414 }
415
416 return self->slices;
417}
418
419Stack *ts_stack_new(SubtreePool *subtree_pool) {
420 Stack *self = ts_calloc(1, sizeof(Stack));
421
422 array_init(&self->heads);
423 array_init(&self->slices);
424 array_init(&self->iterators);
425 array_init(&self->node_pool);
426 array_reserve(&self->heads, 4);
427 array_reserve(&self->slices, 4);
428 array_reserve(&self->iterators, 4);
429 array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);
430
431 self->subtree_pool = subtree_pool;
432 self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool);
433 ts_stack_clear(self);
434
435 return self;
436}
437
438void ts_stack_delete(Stack *self) {
439 if (self->slices.contents)
440 array_delete(&self->slices);
441 if (self->iterators.contents)
442 array_delete(&self->iterators);
443 stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
444 for (uint32_t i = 0; i < self->heads.size; i++) {
445 stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
446 }
447 array_clear(&self->heads);
448 if (self->node_pool.contents) {
449 for (uint32_t i = 0; i < self->node_pool.size; i++)
450 ts_free(self->node_pool.contents[i]);
451 array_delete(&self->node_pool);
452 }
453 array_delete(&self->heads);
454 ts_free(self);
455}
456
457uint32_t ts_stack_version_count(const Stack *self) {
458 return self->heads.size;
459}
460
461TSStateId ts_stack_state(const Stack *self, StackVersion version) {
462 return array_get(&self->heads, version)->node->state;
463}
464
465Length ts_stack_position(const Stack *self, StackVersion version) {
466 return array_get(&self->heads, version)->node->position;
467}
468
469Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) {
470 return array_get(&self->heads, version)->last_external_token;
471}
472
473void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) {
474 StackHead *head = array_get(&self->heads, version);
475 if (token.ptr) ts_subtree_retain(token);
476 if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token);
477 head->last_external_token = token;
478}
479
480unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
481 StackHead *head = array_get(&self->heads, version);
482 unsigned result = head->node->error_cost;
483 if (
484 head->status == StackStatusPaused ||
485 (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) {
486 result += ERROR_COST_PER_RECOVERY;
487 }
488 return result;
489}
490
491unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
492 StackHead *head = array_get(&self->heads, version);
493 if (head->node->node_count < head->node_count_at_last_error) {
494 head->node_count_at_last_error = head->node->node_count;
495 }
496 return head->node->node_count - head->node_count_at_last_error;
497}
498
499void ts_stack_push(
500 Stack *self,
501 StackVersion version,
502 Subtree subtree,
503 bool pending,
504 TSStateId state
505) {
506 StackHead *head = array_get(&self->heads, version);
507 StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
508 if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count;
509 head->node = new_node;
510}
511
512inline StackAction pop_count_callback(void *payload, const StackIterator *iterator) {
513 unsigned *goal_subtree_count = payload;
514 if (iterator->subtree_count == *goal_subtree_count) {
515 return StackActionPop | StackActionStop;
516 } else {
517 return StackActionNone;
518 }
519}
520
521StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
522 return stack__iter(self, version, pop_count_callback, &count, (int)count);
523}
524
525inline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) {
526 (void)payload;
527 if (iterator->subtree_count >= 1) {
528 if (iterator->is_pending) {
529 return StackActionPop | StackActionStop;
530 } else {
531 return StackActionStop;
532 }
533 } else {
534 return StackActionNone;
535 }
536}
537
538StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
539 StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
540 if (pop.size > 0) {
541 ts_stack_renumber_version(self, pop.contents[0].version, version);
542 pop.contents[0].version = version;
543 }
544 return pop;
545}
546
547inline StackAction pop_error_callback(void *payload, const StackIterator *iterator) {
548 if (iterator->subtrees.size > 0) {
549 bool *found_error = payload;
550 if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) {
551 *found_error = true;
552 return StackActionPop | StackActionStop;
553 } else {
554 return StackActionStop;
555 }
556 } else {
557 return StackActionNone;
558 }
559}
560
561SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
562 StackNode *node = array_get(&self->heads, version)->node;
563 for (unsigned i = 0; i < node->link_count; i++) {
564 if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) {
565 bool found_error = false;
566 StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
567 if (pop.size > 0) {
568 assert(pop.size == 1);
569 ts_stack_renumber_version(self, pop.contents[0].version, version);
570 return pop.contents[0].subtrees;
571 }
572 break;
573 }
574 }
575 return (SubtreeArray) {.size = 0};
576}
577
578inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) {
579 (void)payload;
580 return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
581}
582
583StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
584 return stack__iter(self, version, pop_all_callback, NULL, 0);
585}
586
587typedef struct {
588 StackSummary *summary;
589 unsigned max_depth;
590} SummarizeStackSession;
591
592inline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) {
593 SummarizeStackSession *session = payload;
594 TSStateId state = iterator->node->state;
595 unsigned depth = iterator->subtree_count;
596 if (depth > session->max_depth) return StackActionStop;
597 for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
598 StackSummaryEntry entry = session->summary->contents[i];
599 if (entry.depth < depth) break;
600 if (entry.depth == depth && entry.state == state) return StackActionNone;
601 }
602 array_push(session->summary, ((StackSummaryEntry) {
603 .position = iterator->node->position,
604 .depth = depth,
605 .state = state,
606 }));
607 return StackActionNone;
608}
609
610void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
611 SummarizeStackSession session = {
612 .summary = ts_malloc(sizeof(StackSummary)),
613 .max_depth = max_depth
614 };
615 array_init(session.summary);
616 stack__iter(self, version, summarize_stack_callback, &session, -1);
617 StackHead *head = &self->heads.contents[version];
618 if (head->summary) {
619 array_delete(head->summary);
620 ts_free(head->summary);
621 }
622 head->summary = session.summary;
623}
624
625StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
626 return array_get(&self->heads, version)->summary;
627}
628
629int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
630 return array_get(&self->heads, version)->node->dynamic_precedence;
631}
632
633bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
634 const StackHead *head = array_get(&self->heads, version);
635 const StackNode *node = head->node;
636 if (node->error_cost == 0) return true;
637 while (node) {
638 if (node->link_count > 0) {
639 Subtree subtree = node->links[0].subtree;
640 if (subtree.ptr) {
641 if (ts_subtree_total_bytes(subtree) > 0) {
642 return true;
643 } else if (
644 node->node_count > head->node_count_at_last_error &&
645 ts_subtree_error_cost(subtree) == 0
646 ) {
647 node = node->links[0].node;
648 continue;
649 }
650 }
651 }
652 break;
653 }
654 return false;
655}
656
657void ts_stack_remove_version(Stack *self, StackVersion version) {
658 stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
659 array_erase(&self->heads, version);
660}
661
662void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
663 if (v1 == v2) return;
664 assert(v2 < v1);
665 assert((uint32_t)v1 < self->heads.size);
666 StackHead *source_head = &self->heads.contents[v1];
667 StackHead *target_head = &self->heads.contents[v2];
668 if (target_head->summary && !source_head->summary) {
669 source_head->summary = target_head->summary;
670 target_head->summary = NULL;
671 }
672 stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
673 *target_head = *source_head;
674 array_erase(&self->heads, v1);
675}
676
677void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
678 StackHead temporary_head = self->heads.contents[v1];
679 self->heads.contents[v1] = self->heads.contents[v2];
680 self->heads.contents[v2] = temporary_head;
681}
682
683StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
684 assert(version < self->heads.size);
685 array_push(&self->heads, self->heads.contents[version]);
686 StackHead *head = array_back(&self->heads);
687 stack_node_retain(head->node);
688 if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token);
689 head->summary = NULL;
690 return self->heads.size - 1;
691}
692
693bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
694 if (!ts_stack_can_merge(self, version1, version2)) return false;
695 StackHead *head1 = &self->heads.contents[version1];
696 StackHead *head2 = &self->heads.contents[version2];
697 for (uint32_t i = 0; i < head2->node->link_count; i++) {
698 stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool);
699 }
700 if (head1->node->state == ERROR_STATE) {
701 head1->node_count_at_last_error = head1->node->node_count;
702 }
703 ts_stack_remove_version(self, version2);
704 return true;
705}
706
707bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
708 StackHead *head1 = &self->heads.contents[version1];
709 StackHead *head2 = &self->heads.contents[version2];
710 return
711 head1->status == StackStatusActive &&
712 head2->status == StackStatusActive &&
713 head1->node->state == head2->node->state &&
714 head1->node->position.bytes == head2->node->position.bytes &&
715 head1->node->error_cost == head2->node->error_cost &&
716 ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
717}
718
719void ts_stack_halt(Stack *self, StackVersion version) {
720 array_get(&self->heads, version)->status = StackStatusHalted;
721}
722
723void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) {
724 StackHead *head = array_get(&self->heads, version);
725 head->status = StackStatusPaused;
726 head->lookahead_when_paused = lookahead;
727 head->node_count_at_last_error = head->node->node_count;
728}
729
730bool ts_stack_is_active(const Stack *self, StackVersion version) {
731 return array_get(&self->heads, version)->status == StackStatusActive;
732}
733
734bool ts_stack_is_halted(const Stack *self, StackVersion version) {
735 return array_get(&self->heads, version)->status == StackStatusHalted;
736}
737
738bool ts_stack_is_paused(const Stack *self, StackVersion version) {
739 return array_get(&self->heads, version)->status == StackStatusPaused;
740}
741
742Subtree ts_stack_resume(Stack *self, StackVersion version) {
743 StackHead *head = array_get(&self->heads, version);
744 assert(head->status == StackStatusPaused);
745 Subtree result = head->lookahead_when_paused;
746 head->status = StackStatusActive;
747 head->lookahead_when_paused = NULL_SUBTREE;
748 return result;
749}
750
751void ts_stack_clear(Stack *self) {
752 stack_node_retain(self->base_node);
753 for (uint32_t i = 0; i < self->heads.size; i++) {
754 stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
755 }
756 array_clear(&self->heads);
757 array_push(&self->heads, ((StackHead) {
758 .node = self->base_node,
759 .status = StackStatusActive,
760 .last_external_token = NULL_SUBTREE,
761 .lookahead_when_paused = NULL_SUBTREE,
762 }));
763}
764
765bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
766 array_reserve(&self->iterators, 32);
767 if (!f) f = stderr;
768
769 fprintf(f, "digraph stack {\n");
770 fprintf(f, "rankdir=\"RL\";\n");
771 fprintf(f, "edge [arrowhead=none]\n");
772
773 Array(StackNode *) visited_nodes = array_new();
774
775 array_clear(&self->iterators);
776 for (uint32_t i = 0; i < self->heads.size; i++) {
777 StackHead *head = &self->heads.contents[i];
778 if (head->status == StackStatusHalted) continue;
779
780 fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
781 fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node);
782
783 if (head->status == StackStatusPaused) {
784 fprintf(f, "color=red ");
785 }
786 fprintf(f,
787 "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
788 i,
789 ts_stack_node_count_since_error(self, i),
790 ts_stack_error_cost(self, i)
791 );
792
793 if (head->summary) {
794 fprintf(f, "\nsummary:");
795 for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state);
796 }
797
798 if (head->last_external_token.ptr) {
799 const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
800 const char *data = ts_external_scanner_state_data(state);
801 fprintf(f, "\nexternal_scanner_state:");
802 for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
803 }
804
805 fprintf(f, "\"]\n");
806 array_push(&self->iterators, ((StackIterator) {
807 .node = head->node
808 }));
809 }
810
811 bool all_iterators_done = false;
812 while (!all_iterators_done) {
813 all_iterators_done = true;
814
815 for (uint32_t i = 0; i < self->iterators.size; i++) {
816 StackIterator iterator = self->iterators.contents[i];
817 StackNode *node = iterator.node;
818
819 for (uint32_t j = 0; j < visited_nodes.size; j++) {
820 if (visited_nodes.contents[j] == node) {
821 node = NULL;
822 break;
823 }
824 }
825
826 if (!node) continue;
827 all_iterators_done = false;
828
829 fprintf(f, "node_%p [", (void *)node);
830 if (node->state == ERROR_STATE) {
831 fprintf(f, "label=\"?\"");
832 } else if (
833 node->link_count == 1 &&
834 node->links[0].subtree.ptr &&
835 ts_subtree_extra(node->links[0].subtree)
836 ) {
837 fprintf(f, "shape=point margin=0 label=\"\"");
838 } else {
839 fprintf(f, "label=\"%d\"", node->state);
840 }
841
842 fprintf(
843 f,
844 " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
845 node->position.extent.row + 1,
846 node->position.extent.column,
847 node->node_count,
848 node->error_cost,
849 node->dynamic_precedence
850 );
851
852 for (int j = 0; j < node->link_count; j++) {
853 StackLink link = node->links[j];
854 fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node);
855 if (link.is_pending) fprintf(f, "style=dashed ");
856 if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray ");
857
858 if (!link.subtree.ptr) {
859 fprintf(f, "color=red");
860 } else {
861 fprintf(f, "label=\"");
862 bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree);
863 if (quoted) fprintf(f, "'");
864 ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree));
865 if (quoted) fprintf(f, "'");
866 fprintf(f, "\"");
867 fprintf(
868 f,
869 "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"",
870 ts_subtree_error_cost(link.subtree),
871 ts_subtree_dynamic_precedence(link.subtree)
872 );
873 }
874
875 fprintf(f, "];\n");
876
877 StackIterator *next_iterator;
878 if (j == 0) {
879 next_iterator = &self->iterators.contents[i];
880 } else {
881 array_push(&self->iterators, iterator);
882 next_iterator = array_back(&self->iterators);
883 }
884 next_iterator->node = link.node;
885 }
886
887 array_push(&visited_nodes, node);
888 }
889 }
890
891 fprintf(f, "}\n");
892
893 array_delete(&visited_nodes);
894 return true;
895}
896
897#undef inline
diff --git a/vendor/tree-sitter/lib/src/stack.h b/vendor/tree-sitter/lib/src/stack.h
new file mode 100644
index 0000000..86abbc9
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/stack.h
@@ -0,0 +1,133 @@
1#ifndef TREE_SITTER_PARSE_STACK_H_
2#define TREE_SITTER_PARSE_STACK_H_
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8#include "./array.h"
9#include "./subtree.h"
10#include "./error_costs.h"
11#include <stdio.h>
12
13typedef struct Stack Stack;
14
15typedef unsigned StackVersion;
16#define STACK_VERSION_NONE ((StackVersion)-1)
17
18typedef struct {
19 SubtreeArray subtrees;
20 StackVersion version;
21} StackSlice;
22typedef Array(StackSlice) StackSliceArray;
23
24typedef struct {
25 Length position;
26 unsigned depth;
27 TSStateId state;
28} StackSummaryEntry;
29typedef Array(StackSummaryEntry) StackSummary;
30
31// Create a stack.
32Stack *ts_stack_new(SubtreePool *);
33
34// Release the memory reserved for a given stack.
35void ts_stack_delete(Stack *);
36
37// Get the stack's current number of versions.
38uint32_t ts_stack_version_count(const Stack *);
39
40// Get the state at the top of the given version of the stack. If the stack is
41// empty, this returns the initial state, 0.
42TSStateId ts_stack_state(const Stack *, StackVersion);
43
44// Get the last external token associated with a given version of the stack.
45Subtree ts_stack_last_external_token(const Stack *, StackVersion);
46
47// Set the last external token associated with a given version of the stack.
48void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );
49
50// Get the position of the given version of the stack within the document.
51Length ts_stack_position(const Stack *, StackVersion);
52
53// Push a tree and state onto the given version of the stack.
54//
55// This transfers ownership of the tree to the Stack. Callers that
56// need to retain ownership of the tree for their own purposes should
57// first retain the tree.
58void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);
59
60// Pop the given number of entries from the given version of the stack. This
61// operation can increase the number of stack versions by revealing multiple
62// versions which had previously been merged. It returns an array that
63// specifies the index of each revealed version and the trees that were
64// removed from that version.
65StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);
66
67// Remove an error at the top of the given version of the stack.
68SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
69
70// Remove any pending trees from the top of the given version of the stack.
71StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
72
73// Remove any all trees from the given version of the stack.
74StackSliceArray ts_stack_pop_all(Stack *, StackVersion);
75
76// Get the maximum number of tree nodes reachable from this version of the stack
77// since the last error was detected.
78unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);
79
80int ts_stack_dynamic_precedence(Stack *, StackVersion);
81
82bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);
83
84// Compute a summary of all the parse states near the top of the given
85// version of the stack and store the summary for later retrieval.
86void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);
87
88// Retrieve a summary of all the parse states near the top of the
89// given version of the stack.
90StackSummary *ts_stack_get_summary(Stack *, StackVersion);
91
92// Get the total cost of all errors on the given version of the stack.
93unsigned ts_stack_error_cost(const Stack *, StackVersion version);
94
95// Merge the given two stack versions if possible, returning true
96// if they were successfully merged and false otherwise.
97bool ts_stack_merge(Stack *, StackVersion, StackVersion);
98
99// Determine whether the given two stack versions can be merged.
100bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
101
102Subtree ts_stack_resume(Stack *, StackVersion);
103
104void ts_stack_pause(Stack *, StackVersion, Subtree);
105
106void ts_stack_halt(Stack *, StackVersion);
107
108bool ts_stack_is_active(const Stack *, StackVersion);
109
110bool ts_stack_is_paused(const Stack *, StackVersion);
111
112bool ts_stack_is_halted(const Stack *, StackVersion);
113
114void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
115
116void ts_stack_swap_versions(Stack *, StackVersion, StackVersion);
117
118StackVersion ts_stack_copy_version(Stack *, StackVersion);
119
120// Remove the given version from the stack.
121void ts_stack_remove_version(Stack *, StackVersion);
122
123void ts_stack_clear(Stack *);
124
125bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *);
126
127typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t);
128
129#ifdef __cplusplus
130}
131#endif
132
133#endif // TREE_SITTER_PARSE_STACK_H_
diff --git a/vendor/tree-sitter/lib/src/subtree.c b/vendor/tree-sitter/lib/src/subtree.c
new file mode 100644
index 0000000..51bc2ef
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/subtree.c
@@ -0,0 +1,1039 @@
1#include <assert.h>
2#include <ctype.h>
3#include <limits.h>
4#include <stdbool.h>
5#include <string.h>
6#include <stdio.h>
7#include "./alloc.h"
8#include "./atomic.h"
9#include "./subtree.h"
10#include "./length.h"
11#include "./language.h"
12#include "./error_costs.h"
13#include <stddef.h>
14
15typedef struct {
16 Length start;
17 Length old_end;
18 Length new_end;
19} Edit;
20
21#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
22#define TS_MAX_TREE_POOL_SIZE 32
23
24// ExternalScannerState
25
26void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
27 self->length = length;
28 if (length > sizeof(self->short_data)) {
29 self->long_data = ts_malloc(length);
30 memcpy(self->long_data, data, length);
31 } else {
32 memcpy(self->short_data, data, length);
33 }
34}
35
36ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) {
37 ExternalScannerState result = *self;
38 if (self->length > sizeof(self->short_data)) {
39 result.long_data = ts_malloc(self->length);
40 memcpy(result.long_data, self->long_data, self->length);
41 }
42 return result;
43}
44
45void ts_external_scanner_state_delete(ExternalScannerState *self) {
46 if (self->length > sizeof(self->short_data)) {
47 ts_free(self->long_data);
48 }
49}
50
51const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
52 if (self->length > sizeof(self->short_data)) {
53 return self->long_data;
54 } else {
55 return self->short_data;
56 }
57}
58
59bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) {
60 return
61 self->length == length &&
62 memcmp(ts_external_scanner_state_data(self), buffer, length) == 0;
63}
64
65// SubtreeArray
66
67void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
68 dest->size = self.size;
69 dest->capacity = self.capacity;
70 dest->contents = self.contents;
71 if (self.capacity > 0) {
72 dest->contents = ts_calloc(self.capacity, sizeof(Subtree));
73 memcpy(dest->contents, self.contents, self.size * sizeof(Subtree));
74 for (uint32_t i = 0; i < self.size; i++) {
75 ts_subtree_retain(dest->contents[i]);
76 }
77 }
78}
79
80void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) {
81 for (uint32_t i = 0; i < self->size; i++) {
82 ts_subtree_release(pool, self->contents[i]);
83 }
84 array_clear(self);
85}
86
87void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
88 ts_subtree_array_clear(pool, self);
89 array_delete(self);
90}
91
92void ts_subtree_array_remove_trailing_extras(
93 SubtreeArray *self,
94 SubtreeArray *destination
95) {
96 array_clear(destination);
97 while (self->size > 0) {
98 Subtree last = self->contents[self->size - 1];
99 if (ts_subtree_extra(last)) {
100 self->size--;
101 array_push(destination, last);
102 } else {
103 break;
104 }
105 }
106 ts_subtree_array_reverse(destination);
107}
108
109void ts_subtree_array_reverse(SubtreeArray *self) {
110 for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
111 size_t reverse_index = self->size - 1 - i;
112 Subtree swap = self->contents[i];
113 self->contents[i] = self->contents[reverse_index];
114 self->contents[reverse_index] = swap;
115 }
116}
117
118// SubtreePool
119
120SubtreePool ts_subtree_pool_new(uint32_t capacity) {
121 SubtreePool self = {array_new(), array_new()};
122 array_reserve(&self.free_trees, capacity);
123 return self;
124}
125
126void ts_subtree_pool_delete(SubtreePool *self) {
127 if (self->free_trees.contents) {
128 for (unsigned i = 0; i < self->free_trees.size; i++) {
129 ts_free(self->free_trees.contents[i].ptr);
130 }
131 array_delete(&self->free_trees);
132 }
133 if (self->tree_stack.contents) array_delete(&self->tree_stack);
134}
135
136static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) {
137 if (self->free_trees.size > 0) {
138 return array_pop(&self->free_trees).ptr;
139 } else {
140 return ts_malloc(sizeof(SubtreeHeapData));
141 }
142}
143
144static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) {
145 if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) {
146 array_push(&self->free_trees, (MutableSubtree) {.ptr = tree});
147 } else {
148 ts_free(tree);
149 }
150}
151
152// Subtree
153
154static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) {
155 return
156 padding.bytes < TS_MAX_INLINE_TREE_LENGTH &&
157 padding.extent.row < 16 &&
158 padding.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
159 size.extent.row == 0 &&
160 size.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
161 lookahead_bytes < 16;
162}
163
164Subtree ts_subtree_new_leaf(
165 SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
166 uint32_t lookahead_bytes, TSStateId parse_state,
167 bool has_external_tokens, bool depends_on_column,
168 bool is_keyword, const TSLanguage *language
169) {
170 TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
171 bool extra = symbol == ts_builtin_sym_end;
172
173 bool is_inline = (
174 symbol <= UINT8_MAX &&
175 !has_external_tokens &&
176 ts_subtree_can_inline(padding, size, lookahead_bytes)
177 );
178
179 if (is_inline) {
180 return (Subtree) {{
181 .parse_state = parse_state,
182 .symbol = symbol,
183 .padding_bytes = padding.bytes,
184 .padding_rows = padding.extent.row,
185 .padding_columns = padding.extent.column,
186 .size_bytes = size.bytes,
187 .lookahead_bytes = lookahead_bytes,
188 .visible = metadata.visible,
189 .named = metadata.named,
190 .extra = extra,
191 .has_changes = false,
192 .is_missing = false,
193 .is_keyword = is_keyword,
194 .is_inline = true,
195 }};
196 } else {
197 SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
198 *data = (SubtreeHeapData) {
199 .ref_count = 1,
200 .padding = padding,
201 .size = size,
202 .lookahead_bytes = lookahead_bytes,
203 .error_cost = 0,
204 .child_count = 0,
205 .symbol = symbol,
206 .parse_state = parse_state,
207 .visible = metadata.visible,
208 .named = metadata.named,
209 .extra = extra,
210 .fragile_left = false,
211 .fragile_right = false,
212 .has_changes = false,
213 .has_external_tokens = has_external_tokens,
214 .has_external_scanner_state_change = false,
215 .depends_on_column = depends_on_column,
216 .is_missing = false,
217 .is_keyword = is_keyword,
218 {{.first_leaf = {.symbol = 0, .parse_state = 0}}}
219 };
220 return (Subtree) {.ptr = data};
221 }
222}
223
224void ts_subtree_set_symbol(
225 MutableSubtree *self,
226 TSSymbol symbol,
227 const TSLanguage *language
228) {
229 TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
230 if (self->data.is_inline) {
231 assert(symbol < UINT8_MAX);
232 self->data.symbol = symbol;
233 self->data.named = metadata.named;
234 self->data.visible = metadata.visible;
235 } else {
236 self->ptr->symbol = symbol;
237 self->ptr->named = metadata.named;
238 self->ptr->visible = metadata.visible;
239 }
240}
241
242Subtree ts_subtree_new_error(
243 SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
244 uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
245) {
246 Subtree result = ts_subtree_new_leaf(
247 pool, ts_builtin_sym_error, padding, size, bytes_scanned,
248 parse_state, false, false, false, language
249 );
250 SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
251 data->fragile_left = true;
252 data->fragile_right = true;
253 data->lookahead_char = lookahead_char;
254 return result;
255}
256
257// Clone a subtree.
258MutableSubtree ts_subtree_clone(Subtree self) {
259 size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count);
260 Subtree *new_children = ts_malloc(alloc_size);
261 Subtree *old_children = ts_subtree_children(self);
262 memcpy(new_children, old_children, alloc_size);
263 SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count];
264 if (self.ptr->child_count > 0) {
265 for (uint32_t i = 0; i < self.ptr->child_count; i++) {
266 ts_subtree_retain(new_children[i]);
267 }
268 } else if (self.ptr->has_external_tokens) {
269 result->external_scanner_state = ts_external_scanner_state_copy(
270 &self.ptr->external_scanner_state
271 );
272 }
273 result->ref_count = 1;
274 return (MutableSubtree) {.ptr = result};
275}
276
277// Get mutable version of a subtree.
278//
279// This takes ownership of the subtree. If the subtree has only one owner,
280// this will directly convert it into a mutable version. Otherwise, it will
281// perform a copy.
282MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
283 if (self.data.is_inline) return (MutableSubtree) {self.data};
284 if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
285 MutableSubtree result = ts_subtree_clone(self);
286 ts_subtree_release(pool, self);
287 return result;
288}
289
290static void ts_subtree__compress(
291 MutableSubtree self,
292 unsigned count,
293 const TSLanguage *language,
294 MutableSubtreeArray *stack
295) {
296 unsigned initial_stack_size = stack->size;
297
298 MutableSubtree tree = self;
299 TSSymbol symbol = tree.ptr->symbol;
300 for (unsigned i = 0; i < count; i++) {
301 if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;
302
303 MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
304 if (
305 child.data.is_inline ||
306 child.ptr->child_count < 2 ||
307 child.ptr->ref_count > 1 ||
308 child.ptr->symbol != symbol
309 ) break;
310
311 MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]);
312 if (
313 grandchild.data.is_inline ||
314 grandchild.ptr->child_count < 2 ||
315 grandchild.ptr->ref_count > 1 ||
316 grandchild.ptr->symbol != symbol
317 ) break;
318
319 ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild);
320 ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1];
321 ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
322 array_push(stack, tree);
323 tree = grandchild;
324 }
325
326 while (stack->size > initial_stack_size) {
327 tree = array_pop(stack);
328 MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
329 MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]);
330 ts_subtree_summarize_children(grandchild, language);
331 ts_subtree_summarize_children(child, language);
332 ts_subtree_summarize_children(tree, language);
333 }
334}
335
336void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) {
337 array_clear(&pool->tree_stack);
338
339 if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) {
340 array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
341 }
342
343 while (pool->tree_stack.size > 0) {
344 MutableSubtree tree = array_pop(&pool->tree_stack);
345
346 if (tree.ptr->repeat_depth > 0) {
347 Subtree child1 = ts_subtree_children(tree)[0];
348 Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1];
349 long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2);
350 if (repeat_delta > 0) {
351 unsigned n = (unsigned)repeat_delta;
352 for (unsigned i = n / 2; i > 0; i /= 2) {
353 ts_subtree__compress(tree, i, language, &pool->tree_stack);
354 n -= i;
355 }
356 }
357 }
358
359 for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
360 Subtree child = ts_subtree_children(tree)[i];
361 if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
362 array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
363 }
364 }
365 }
366}
367
368// Assign all of the node's properties that depend on its children.
369void ts_subtree_summarize_children(
370 MutableSubtree self,
371 const TSLanguage *language
372) {
373 assert(!self.data.is_inline);
374
375 self.ptr->named_child_count = 0;
376 self.ptr->visible_child_count = 0;
377 self.ptr->error_cost = 0;
378 self.ptr->repeat_depth = 0;
379 self.ptr->visible_descendant_count = 0;
380 self.ptr->has_external_tokens = false;
381 self.ptr->depends_on_column = false;
382 self.ptr->has_external_scanner_state_change = false;
383 self.ptr->dynamic_precedence = 0;
384
385 uint32_t structural_index = 0;
386 const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
387 uint32_t lookahead_end_byte = 0;
388
389 const Subtree *children = ts_subtree_children(self);
390 for (uint32_t i = 0; i < self.ptr->child_count; i++) {
391 Subtree child = children[i];
392
393 if (
394 self.ptr->size.extent.row == 0 &&
395 ts_subtree_depends_on_column(child)
396 ) {
397 self.ptr->depends_on_column = true;
398 }
399
400 if (ts_subtree_has_external_scanner_state_change(child)) {
401 self.ptr->has_external_scanner_state_change = true;
402 }
403
404 if (i == 0) {
405 self.ptr->padding = ts_subtree_padding(child);
406 self.ptr->size = ts_subtree_size(child);
407 } else {
408 self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child));
409 }
410
411 uint32_t child_lookahead_end_byte =
412 self.ptr->padding.bytes +
413 self.ptr->size.bytes +
414 ts_subtree_lookahead_bytes(child);
415 if (child_lookahead_end_byte > lookahead_end_byte) {
416 lookahead_end_byte = child_lookahead_end_byte;
417 }
418
419 if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) {
420 self.ptr->error_cost += ts_subtree_error_cost(child);
421 }
422
423 uint32_t grandchild_count = ts_subtree_child_count(child);
424 if (
425 self.ptr->symbol == ts_builtin_sym_error ||
426 self.ptr->symbol == ts_builtin_sym_error_repeat
427 ) {
428 if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) {
429 if (ts_subtree_visible(child)) {
430 self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
431 } else if (grandchild_count > 0) {
432 self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
433 }
434 }
435 }
436
437 self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
438 self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child);
439
440 if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) {
441 self.ptr->visible_descendant_count++;
442 self.ptr->visible_child_count++;
443 if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) {
444 self.ptr->named_child_count++;
445 }
446 } else if (ts_subtree_visible(child)) {
447 self.ptr->visible_descendant_count++;
448 self.ptr->visible_child_count++;
449 if (ts_subtree_named(child)) self.ptr->named_child_count++;
450 } else if (grandchild_count > 0) {
451 self.ptr->visible_child_count += child.ptr->visible_child_count;
452 self.ptr->named_child_count += child.ptr->named_child_count;
453 }
454
455 if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true;
456
457 if (ts_subtree_is_error(child)) {
458 self.ptr->fragile_left = self.ptr->fragile_right = true;
459 self.ptr->parse_state = TS_TREE_STATE_NONE;
460 }
461
462 if (!ts_subtree_extra(child)) structural_index++;
463 }
464
465 self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
466
467 if (
468 self.ptr->symbol == ts_builtin_sym_error ||
469 self.ptr->symbol == ts_builtin_sym_error_repeat
470 ) {
471 self.ptr->error_cost +=
472 ERROR_COST_PER_RECOVERY +
473 ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
474 ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
475 }
476
477 if (self.ptr->child_count > 0) {
478 Subtree first_child = children[0];
479 Subtree last_child = children[self.ptr->child_count - 1];
480
481 self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
482 self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
483
484 if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true;
485 if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true;
486
487 if (
488 self.ptr->child_count >= 2 &&
489 !self.ptr->visible &&
490 !self.ptr->named &&
491 ts_subtree_symbol(first_child) == self.ptr->symbol
492 ) {
493 if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) {
494 self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
495 } else {
496 self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
497 }
498 }
499 }
500}
501
502// Create a new parent node with the given children.
503//
504// This takes ownership of the children array.
505MutableSubtree ts_subtree_new_node(
506 TSSymbol symbol,
507 SubtreeArray *children,
508 unsigned production_id,
509 const TSLanguage *language
510) {
511 TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
512 bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
513
514 // Allocate the node's data at the end of the array of children.
515 size_t new_byte_size = ts_subtree_alloc_size(children->size);
516 if (children->capacity * sizeof(Subtree) < new_byte_size) {
517 children->contents = ts_realloc(children->contents, new_byte_size);
518 children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree));
519 }
520 SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size];
521
522 *data = (SubtreeHeapData) {
523 .ref_count = 1,
524 .symbol = symbol,
525 .child_count = children->size,
526 .visible = metadata.visible,
527 .named = metadata.named,
528 .has_changes = false,
529 .has_external_scanner_state_change = false,
530 .fragile_left = fragile,
531 .fragile_right = fragile,
532 .is_keyword = false,
533 {{
534 .visible_descendant_count = 0,
535 .production_id = production_id,
536 .first_leaf = {.symbol = 0, .parse_state = 0},
537 }}
538 };
539 MutableSubtree result = {.ptr = data};
540 ts_subtree_summarize_children(result, language);
541 return result;
542}
543
544// Create a new error node containing the given children.
545//
546// This node is treated as 'extra'. Its children are prevented from having
547// having any effect on the parse state.
548Subtree ts_subtree_new_error_node(
549 SubtreeArray *children,
550 bool extra,
551 const TSLanguage *language
552) {
553 MutableSubtree result = ts_subtree_new_node(
554 ts_builtin_sym_error, children, 0, language
555 );
556 result.ptr->extra = extra;
557 return ts_subtree_from_mut(result);
558}
559
560// Create a new 'missing leaf' node.
561//
562// This node is treated as 'extra'. Its children are prevented from having
563// having any effect on the parse state.
564Subtree ts_subtree_new_missing_leaf(
565 SubtreePool *pool,
566 TSSymbol symbol,
567 Length padding,
568 uint32_t lookahead_bytes,
569 const TSLanguage *language
570) {
571 Subtree result = ts_subtree_new_leaf(
572 pool, symbol, padding, length_zero(), lookahead_bytes,
573 0, false, false, false, language
574 );
575 if (result.data.is_inline) {
576 result.data.is_missing = true;
577 } else {
578 ((SubtreeHeapData *)result.ptr)->is_missing = true;
579 }
580 return result;
581}
582
583void ts_subtree_retain(Subtree self) {
584 if (self.data.is_inline) return;
585 assert(self.ptr->ref_count > 0);
586 atomic_inc((volatile uint32_t *)&self.ptr->ref_count);
587 assert(self.ptr->ref_count != 0);
588}
589
590void ts_subtree_release(SubtreePool *pool, Subtree self) {
591 if (self.data.is_inline) return;
592 array_clear(&pool->tree_stack);
593
594 assert(self.ptr->ref_count > 0);
595 if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) {
596 array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
597 }
598
599 while (pool->tree_stack.size > 0) {
600 MutableSubtree tree = array_pop(&pool->tree_stack);
601 if (tree.ptr->child_count > 0) {
602 Subtree *children = ts_subtree_children(tree);
603 for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
604 Subtree child = children[i];
605 if (child.data.is_inline) continue;
606 assert(child.ptr->ref_count > 0);
607 if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
608 array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
609 }
610 }
611 ts_free(children);
612 } else {
613 if (tree.ptr->has_external_tokens) {
614 ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
615 }
616 ts_subtree_pool_free(pool, tree.ptr);
617 }
618 }
619}
620
621int ts_subtree_compare(Subtree left, Subtree right) {
622 if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) return -1;
623 if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) return 1;
624 if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1;
625 if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1;
626 for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) {
627 Subtree left_child = ts_subtree_children(left)[i];
628 Subtree right_child = ts_subtree_children(right)[i];
629 switch (ts_subtree_compare(left_child, right_child)) {
630 case -1: return -1;
631 case 1: return 1;
632 default: break;
633 }
634 }
635 return 0;
636}
637
638static inline void ts_subtree_set_has_changes(MutableSubtree *self) {
639 if (self->data.is_inline) {
640 self->data.has_changes = true;
641 } else {
642 self->ptr->has_changes = true;
643 }
644}
645
646Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) {
647 typedef struct {
648 Subtree *tree;
649 Edit edit;
650 } EditEntry;
651
652 Array(EditEntry) stack = array_new();
653 array_push(&stack, ((EditEntry) {
654 .tree = &self,
655 .edit = (Edit) {
656 .start = {input_edit->start_byte, input_edit->start_point},
657 .old_end = {input_edit->old_end_byte, input_edit->old_end_point},
658 .new_end = {input_edit->new_end_byte, input_edit->new_end_point},
659 },
660 }));
661
662 while (stack.size) {
663 EditEntry entry = array_pop(&stack);
664 Edit edit = entry.edit;
665 bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
666 bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
667 bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree);
668
669 Length size = ts_subtree_size(*entry.tree);
670 Length padding = ts_subtree_padding(*entry.tree);
671 Length total_size = length_add(padding, size);
672 uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
673 uint32_t end_byte = total_size.bytes + lookahead_bytes;
674 if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue;
675
676 // If the edit is entirely within the space before this subtree, then shift this
677 // subtree over according to the edit without changing its size.
678 if (edit.old_end.bytes <= padding.bytes) {
679 padding = length_add(edit.new_end, length_sub(padding, edit.old_end));
680 }
681
682 // If the edit starts in the space before this subtree and extends into this subtree,
683 // shrink the subtree's content to compensate for the change in the space before it.
684 else if (edit.start.bytes < padding.bytes) {
685 size = length_saturating_sub(size, length_sub(edit.old_end, padding));
686 padding = edit.new_end;
687 }
688
689 // If the edit is a pure insertion right at the start of the subtree,
690 // shift the subtree over according to the insertion.
691 else if (edit.start.bytes == padding.bytes && is_pure_insertion) {
692 padding = edit.new_end;
693 }
694
695 // If the edit is within this subtree, resize the subtree to reflect the edit.
696 else if (
697 edit.start.bytes < total_size.bytes ||
698 (edit.start.bytes == total_size.bytes && is_pure_insertion)
699 ) {
700 size = length_add(
701 length_sub(edit.new_end, padding),
702 length_saturating_sub(total_size, edit.old_end)
703 );
704 }
705
706 MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);
707
708 if (result.data.is_inline) {
709 if (ts_subtree_can_inline(padding, size, lookahead_bytes)) {
710 result.data.padding_bytes = padding.bytes;
711 result.data.padding_rows = padding.extent.row;
712 result.data.padding_columns = padding.extent.column;
713 result.data.size_bytes = size.bytes;
714 } else {
715 SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
716 data->ref_count = 1;
717 data->padding = padding;
718 data->size = size;
719 data->lookahead_bytes = lookahead_bytes;
720 data->error_cost = 0;
721 data->child_count = 0;
722 data->symbol = result.data.symbol;
723 data->parse_state = result.data.parse_state;
724 data->visible = result.data.visible;
725 data->named = result.data.named;
726 data->extra = result.data.extra;
727 data->fragile_left = false;
728 data->fragile_right = false;
729 data->has_changes = false;
730 data->has_external_tokens = false;
731 data->depends_on_column = false;
732 data->is_missing = result.data.is_missing;
733 data->is_keyword = result.data.is_keyword;
734 result.ptr = data;
735 }
736 } else {
737 result.ptr->padding = padding;
738 result.ptr->size = size;
739 }
740
741 ts_subtree_set_has_changes(&result);
742 *entry.tree = ts_subtree_from_mut(result);
743
744 Length child_left, child_right = length_zero();
745 for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
746 Subtree *child = &ts_subtree_children(*entry.tree)[i];
747 Length child_size = ts_subtree_total_size(*child);
748 child_left = child_right;
749 child_right = length_add(child_left, child_size);
750
751 // If this child ends before the edit, it is not affected.
752 if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
753
754 // Keep editing child nodes until a node is reached that starts after the edit.
755 // Also, if this node's validity depends on its column position, then continue
756 // invaliditing child nodes until reaching a line break.
757 if ((
758 (child_left.bytes > edit.old_end.bytes) ||
759 (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)
760 ) && (
761 !invalidate_first_row ||
762 child_left.extent.row > entry.tree->ptr->padding.extent.row
763 )) {
764 break;
765 }
766
767 // Transform edit into the child's coordinate space.
768 Edit child_edit = {
769 .start = length_saturating_sub(edit.start, child_left),
770 .old_end = length_saturating_sub(edit.old_end, child_left),
771 .new_end = length_saturating_sub(edit.new_end, child_left),
772 };
773
774 // Interpret all inserted text as applying to the *first* child that touches the edit.
775 // Subsequent children are only never have any text inserted into them; they are only
776 // shrunk to compensate for the edit.
777 if (
778 child_right.bytes > edit.start.bytes ||
779 (child_right.bytes == edit.start.bytes && is_pure_insertion)
780 ) {
781 edit.new_end = edit.start;
782 }
783
784 // Children that occur before the edit are not reshaped by the edit.
785 else {
786 child_edit.old_end = child_edit.start;
787 child_edit.new_end = child_edit.start;
788 }
789
790 // Queue processing of this child's subtree.
791 array_push(&stack, ((EditEntry) {
792 .tree = child,
793 .edit = child_edit,
794 }));
795 }
796 }
797
798 array_delete(&stack);
799 return self;
800}
801
802Subtree ts_subtree_last_external_token(Subtree tree) {
803 if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
804 while (tree.ptr->child_count > 0) {
805 for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
806 Subtree child = ts_subtree_children(tree)[i];
807 if (ts_subtree_has_external_tokens(child)) {
808 tree = child;
809 break;
810 }
811 }
812 }
813 return tree;
814}
815
816static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) {
817 if (chr == -1)
818 return snprintf(str, n, "INVALID");
819 else if (chr == '\0')
820 return snprintf(str, n, "'\\0'");
821 else if (chr == '\n')
822 return snprintf(str, n, "'\\n'");
823 else if (chr == '\t')
824 return snprintf(str, n, "'\\t'");
825 else if (chr == '\r')
826 return snprintf(str, n, "'\\r'");
827 else if (0 < chr && chr < 128 && isprint(chr))
828 return snprintf(str, n, "'%c'", chr);
829 else
830 return snprintf(str, n, "%d", chr);
831}
832
833static const char *const ROOT_FIELD = "__ROOT__";
834
835static size_t ts_subtree__write_to_string(
836 Subtree self, char *string, size_t limit,
837 const TSLanguage *language, bool include_all,
838 TSSymbol alias_symbol, bool alias_is_named, const char *field_name
839) {
840 if (!self.ptr) return snprintf(string, limit, "(NULL)");
841
842 char *cursor = string;
843 char **writer = (limit > 1) ? &cursor : &string;
844 bool is_root = field_name == ROOT_FIELD;
845 bool is_visible =
846 include_all ||
847 ts_subtree_missing(self) ||
848 (
849 alias_symbol
850 ? alias_is_named
851 : ts_subtree_visible(self) && ts_subtree_named(self)
852 );
853
854 if (is_visible) {
855 if (!is_root) {
856 cursor += snprintf(*writer, limit, " ");
857 if (field_name) {
858 cursor += snprintf(*writer, limit, "%s: ", field_name);
859 }
860 }
861
862 if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
863 cursor += snprintf(*writer, limit, "(UNEXPECTED ");
864 cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
865 } else {
866 TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
867 const char *symbol_name = ts_language_symbol_name(language, symbol);
868 if (ts_subtree_missing(self)) {
869 cursor += snprintf(*writer, limit, "(MISSING ");
870 if (alias_is_named || ts_subtree_named(self)) {
871 cursor += snprintf(*writer, limit, "%s", symbol_name);
872 } else {
873 cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
874 }
875 } else {
876 cursor += snprintf(*writer, limit, "(%s", symbol_name);
877 }
878 }
879 } else if (is_root) {
880 TSSymbol symbol = ts_subtree_symbol(self);
881 const char *symbol_name = ts_language_symbol_name(language, symbol);
882 cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name);
883 }
884
885 if (ts_subtree_child_count(self)) {
886 const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
887 const TSFieldMapEntry *field_map, *field_map_end;
888 ts_language_field_map(
889 language,
890 self.ptr->production_id,
891 &field_map,
892 &field_map_end
893 );
894
895 uint32_t structural_child_index = 0;
896 for (uint32_t i = 0; i < self.ptr->child_count; i++) {
897 Subtree child = ts_subtree_children(self)[i];
898 if (ts_subtree_extra(child)) {
899 cursor += ts_subtree__write_to_string(
900 child, *writer, limit,
901 language, include_all,
902 0, false, NULL
903 );
904 } else {
905 TSSymbol subtree_alias_symbol = alias_sequence
906 ? alias_sequence[structural_child_index]
907 : 0;
908 bool subtree_alias_is_named = subtree_alias_symbol
909 ? ts_language_symbol_metadata(language, subtree_alias_symbol).named
910 : false;
911
912 const char *child_field_name = is_visible ? NULL : field_name;
913 for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
914 if (!map->inherited && map->child_index == structural_child_index) {
915 child_field_name = language->field_names[map->field_id];
916 break;
917 }
918 }
919
920 cursor += ts_subtree__write_to_string(
921 child, *writer, limit,
922 language, include_all,
923 subtree_alias_symbol, subtree_alias_is_named, child_field_name
924 );
925 structural_child_index++;
926 }
927 }
928 }
929
930 if (is_visible) cursor += snprintf(*writer, limit, ")");
931
932 return cursor - string;
933}
934
935char *ts_subtree_string(
936 Subtree self,
937 const TSLanguage *language,
938 bool include_all
939) {
940 char scratch_string[1];
941 size_t size = ts_subtree__write_to_string(
942 self, scratch_string, 1,
943 language, include_all,
944 0, false, ROOT_FIELD
945 ) + 1;
946 char *result = ts_malloc(size * sizeof(char));
947 ts_subtree__write_to_string(
948 self, result, size,
949 language, include_all,
950 0, false, ROOT_FIELD
951 );
952 return result;
953}
954
955void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
956 const TSLanguage *language, TSSymbol alias_symbol,
957 FILE *f) {
958 TSSymbol subtree_symbol = ts_subtree_symbol(*self);
959 TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol;
960 uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
961 fprintf(f, "tree_%p [label=\"", (void *)self);
962 ts_language_write_symbol_as_dot_string(language, f, symbol);
963 fprintf(f, "\"");
964
965 if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
966 if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray");
967
968 fprintf(f, ", tooltip=\""
969 "range: %u - %u\n"
970 "state: %d\n"
971 "error-cost: %u\n"
972 "has-changes: %u\n"
973 "depends-on-column: %u\n"
974 "descendant-count: %u\n"
975 "repeat-depth: %u\n"
976 "lookahead-bytes: %u",
977 start_offset, end_offset,
978 ts_subtree_parse_state(*self),
979 ts_subtree_error_cost(*self),
980 ts_subtree_has_changes(*self),
981 ts_subtree_depends_on_column(*self),
982 ts_subtree_visible_descendant_count(*self),
983 ts_subtree_repeat_depth(*self),
984 ts_subtree_lookahead_bytes(*self)
985 );
986
987 if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0) {
988 fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
989 }
990
991 fprintf(f, "\"]\n");
992
993 uint32_t child_start_offset = start_offset;
994 uint32_t child_info_offset =
995 language->max_alias_sequence_length *
996 ts_subtree_production_id(*self);
997 for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
998 const Subtree *child = &ts_subtree_children(*self)[i];
999 TSSymbol subtree_alias_symbol = 0;
1000 if (!ts_subtree_extra(*child) && child_info_offset) {
1001 subtree_alias_symbol = language->alias_sequences[child_info_offset];
1002 child_info_offset++;
1003 }
1004 ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f);
1005 fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i);
1006 child_start_offset += ts_subtree_total_bytes(*child);
1007 }
1008}
1009
1010void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) {
1011 fprintf(f, "digraph tree {\n");
1012 fprintf(f, "edge [arrowhead=none]\n");
1013 ts_subtree__print_dot_graph(&self, 0, language, 0, f);
1014 fprintf(f, "}\n");
1015}
1016
1017const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) {
1018 static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0};
1019 if (
1020 self.ptr &&
1021 !self.data.is_inline &&
1022 self.ptr->has_external_tokens &&
1023 self.ptr->child_count == 0
1024 ) {
1025 return &self.ptr->external_scanner_state;
1026 } else {
1027 return &empty_state;
1028 }
1029}
1030
1031bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) {
1032 const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self);
1033 const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other);
1034 return ts_external_scanner_state_eq(
1035 state_self,
1036 ts_external_scanner_state_data(state_other),
1037 state_other->length
1038 );
1039}
diff --git a/vendor/tree-sitter/lib/src/subtree.h b/vendor/tree-sitter/lib/src/subtree.h
new file mode 100644
index 0000000..cac657f
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/subtree.h
@@ -0,0 +1,382 @@
1#ifndef TREE_SITTER_SUBTREE_H_
2#define TREE_SITTER_SUBTREE_H_
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8#include <limits.h>
9#include <stdbool.h>
10#include <stdio.h>
11#include "./length.h"
12#include "./array.h"
13#include "./error_costs.h"
14#include "./host.h"
15#include "tree_sitter/api.h"
16#include "tree_sitter/parser.h"
17
18#define TS_TREE_STATE_NONE USHRT_MAX
19#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
20
21// The serialized state of an external scanner.
22//
23// Every time an external token subtree is created after a call to an
24// external scanner, the scanner's `serialize` function is called to
25// retrieve a serialized copy of its state. The bytes are then copied
26// onto the subtree itself so that the scanner's state can later be
27// restored using its `deserialize` function.
28//
29// Small byte arrays are stored inline, and long ones are allocated
30// separately on the heap.
31typedef struct {
32 union {
33 char *long_data;
34 char short_data[24];
35 };
36 uint32_t length;
37} ExternalScannerState;
38
39// A compact representation of a subtree.
40//
41// This representation is used for small leaf nodes that are not
42// errors, and were not created by an external scanner.
43//
44// The idea behind the layout of this struct is that the `is_inline`
45// bit will fall exactly into the same location as the least significant
46// bit of the pointer in `Subtree` or `MutableSubtree`, respectively.
47// Because of alignment, for any valid pointer this will be 0, giving
48// us the opportunity to make use of this bit to signify whether to use
49// the pointer or the inline struct.
50typedef struct SubtreeInlineData SubtreeInlineData;
51
52#define SUBTREE_BITS \
53 bool visible : 1; \
54 bool named : 1; \
55 bool extra : 1; \
56 bool has_changes : 1; \
57 bool is_missing : 1; \
58 bool is_keyword : 1;
59
60#define SUBTREE_SIZE \
61 uint8_t padding_columns; \
62 uint8_t padding_rows : 4; \
63 uint8_t lookahead_bytes : 4; \
64 uint8_t padding_bytes; \
65 uint8_t size_bytes;
66
67#if TS_BIG_ENDIAN
68#if TS_PTR_SIZE == 32
69
70struct SubtreeInlineData {
71 uint16_t parse_state;
72 uint8_t symbol;
73 SUBTREE_BITS
74 bool unused : 1;
75 bool is_inline : 1;
76 SUBTREE_SIZE
77};
78
79#else
80
81struct SubtreeInlineData {
82 SUBTREE_SIZE
83 uint16_t parse_state;
84 uint8_t symbol;
85 SUBTREE_BITS
86 bool unused : 1;
87 bool is_inline : 1;
88};
89
90#endif
91#else
92
93struct SubtreeInlineData {
94 bool is_inline : 1;
95 SUBTREE_BITS
96 uint8_t symbol;
97 uint16_t parse_state;
98 SUBTREE_SIZE
99};
100
101#endif
102
103#undef SUBTREE_BITS
104#undef SUBTREE_SIZE
105
106// A heap-allocated representation of a subtree.
107//
108// This representation is used for parent nodes, external tokens,
109// errors, and other leaf nodes whose data is too large to fit into
110// the inline representation.
111typedef struct {
112 volatile uint32_t ref_count;
113 Length padding;
114 Length size;
115 uint32_t lookahead_bytes;
116 uint32_t error_cost;
117 uint32_t child_count;
118 TSSymbol symbol;
119 TSStateId parse_state;
120
121 bool visible : 1;
122 bool named : 1;
123 bool extra : 1;
124 bool fragile_left : 1;
125 bool fragile_right : 1;
126 bool has_changes : 1;
127 bool has_external_tokens : 1;
128 bool has_external_scanner_state_change : 1;
129 bool depends_on_column: 1;
130 bool is_missing : 1;
131 bool is_keyword : 1;
132
133 union {
134 // Non-terminal subtrees (`child_count > 0`)
135 struct {
136 uint32_t visible_child_count;
137 uint32_t named_child_count;
138 uint32_t visible_descendant_count;
139 int32_t dynamic_precedence;
140 uint16_t repeat_depth;
141 uint16_t production_id;
142 struct {
143 TSSymbol symbol;
144 TSStateId parse_state;
145 } first_leaf;
146 };
147
148 // External terminal subtrees (`child_count == 0 && has_external_tokens`)
149 ExternalScannerState external_scanner_state;
150
151 // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
152 int32_t lookahead_char;
153 };
154} SubtreeHeapData;
155
156// The fundamental building block of a syntax tree.
157typedef union {
158 SubtreeInlineData data;
159 const SubtreeHeapData *ptr;
160} Subtree;
161
162// Like Subtree, but mutable.
163typedef union {
164 SubtreeInlineData data;
165 SubtreeHeapData *ptr;
166} MutableSubtree;
167
168typedef Array(Subtree) SubtreeArray;
169typedef Array(MutableSubtree) MutableSubtreeArray;
170
171typedef struct {
172 MutableSubtreeArray free_trees;
173 MutableSubtreeArray tree_stack;
174} SubtreePool;
175
176void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
177const char *ts_external_scanner_state_data(const ExternalScannerState *);
178bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned);
179void ts_external_scanner_state_delete(ExternalScannerState *self);
180
181void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
182void ts_subtree_array_clear(SubtreePool *, SubtreeArray *);
183void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
184void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *);
185void ts_subtree_array_reverse(SubtreeArray *);
186
187SubtreePool ts_subtree_pool_new(uint32_t capacity);
188void ts_subtree_pool_delete(SubtreePool *);
189
190Subtree ts_subtree_new_leaf(
191 SubtreePool *, TSSymbol, Length, Length, uint32_t,
192 TSStateId, bool, bool, bool, const TSLanguage *
193);
194Subtree ts_subtree_new_error(
195 SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
196);
197MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
198Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *);
199Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *);
200MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
201void ts_subtree_retain(Subtree);
202void ts_subtree_release(SubtreePool *, Subtree);
203int ts_subtree_compare(Subtree, Subtree);
204void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
205void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *);
206void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *);
207void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
208Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
209char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
210void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *);
211Subtree ts_subtree_last_external_token(Subtree);
212const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self);
213bool ts_subtree_external_scanner_state_eq(Subtree, Subtree);
214
215#define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name)
216
217static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
218static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
219static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
220static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
221static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
222static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
223static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
224static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
225static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
226
227#undef SUBTREE_GET
228
229// Get the size needed to store a heap-allocated subtree with the given
230// number of children.
231static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
232 return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
233}
234
235// Get a subtree's children, which are allocated immediately before the
236// tree's own heap data.
237#define ts_subtree_children(self) \
238 ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
239
240static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
241 if (self->data.is_inline) {
242 self->data.extra = is_extra;
243 } else {
244 self->ptr->extra = is_extra;
245 }
246}
247
248static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
249 if (self.data.is_inline) return self.data.symbol;
250 if (self.ptr->child_count == 0) return self.ptr->symbol;
251 return self.ptr->first_leaf.symbol;
252}
253
254static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
255 if (self.data.is_inline) return self.data.parse_state;
256 if (self.ptr->child_count == 0) return self.ptr->parse_state;
257 return self.ptr->first_leaf.parse_state;
258}
259
260static inline Length ts_subtree_padding(Subtree self) {
261 if (self.data.is_inline) {
262 Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
263 return result;
264 } else {
265 return self.ptr->padding;
266 }
267}
268
269static inline Length ts_subtree_size(Subtree self) {
270 if (self.data.is_inline) {
271 Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
272 return result;
273 } else {
274 return self.ptr->size;
275 }
276}
277
278static inline Length ts_subtree_total_size(Subtree self) {
279 return length_add(ts_subtree_padding(self), ts_subtree_size(self));
280}
281
282static inline uint32_t ts_subtree_total_bytes(Subtree self) {
283 return ts_subtree_total_size(self).bytes;
284}
285
286static inline uint32_t ts_subtree_child_count(Subtree self) {
287 return self.data.is_inline ? 0 : self.ptr->child_count;
288}
289
290static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
291 return self.data.is_inline ? 0 : self.ptr->repeat_depth;
292}
293
294static inline uint32_t ts_subtree_is_repetition(Subtree self) {
295 return self.data.is_inline
296 ? 0
297 : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0;
298}
299
300static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) {
301 return (self.data.is_inline || self.ptr->child_count == 0)
302 ? 0
303 : self.ptr->visible_descendant_count;
304}
305
306static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
307 if (ts_subtree_child_count(self) > 0) {
308 return self.ptr->visible_child_count;
309 } else {
310 return 0;
311 }
312}
313
314static inline uint32_t ts_subtree_error_cost(Subtree self) {
315 if (ts_subtree_missing(self)) {
316 return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
317 } else {
318 return self.data.is_inline ? 0 : self.ptr->error_cost;
319 }
320}
321
322static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
323 return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
324}
325
326static inline uint16_t ts_subtree_production_id(Subtree self) {
327 if (ts_subtree_child_count(self) > 0) {
328 return self.ptr->production_id;
329 } else {
330 return 0;
331 }
332}
333
334static inline bool ts_subtree_fragile_left(Subtree self) {
335 return self.data.is_inline ? false : self.ptr->fragile_left;
336}
337
338static inline bool ts_subtree_fragile_right(Subtree self) {
339 return self.data.is_inline ? false : self.ptr->fragile_right;
340}
341
342static inline bool ts_subtree_has_external_tokens(Subtree self) {
343 return self.data.is_inline ? false : self.ptr->has_external_tokens;
344}
345
346static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) {
347 return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change;
348}
349
350static inline bool ts_subtree_depends_on_column(Subtree self) {
351 return self.data.is_inline ? false : self.ptr->depends_on_column;
352}
353
354static inline bool ts_subtree_is_fragile(Subtree self) {
355 return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
356}
357
358static inline bool ts_subtree_is_error(Subtree self) {
359 return ts_subtree_symbol(self) == ts_builtin_sym_error;
360}
361
362static inline bool ts_subtree_is_eof(Subtree self) {
363 return ts_subtree_symbol(self) == ts_builtin_sym_end;
364}
365
366static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
367 Subtree result;
368 result.data = self.data;
369 return result;
370}
371
372static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
373 MutableSubtree result;
374 result.data = self.data;
375 return result;
376}
377
378#ifdef __cplusplus
379}
380#endif
381
382#endif // TREE_SITTER_SUBTREE_H_
diff --git a/vendor/tree-sitter/lib/src/tree.c b/vendor/tree-sitter/lib/src/tree.c
new file mode 100644
index 0000000..784c51f
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/tree.c
@@ -0,0 +1,143 @@
1#include "tree_sitter/api.h"
2#include "./array.h"
3#include "./get_changed_ranges.h"
4#include "./length.h"
5#include "./subtree.h"
6#include "./tree_cursor.h"
7#include "./tree.h"
8
9TSTree *ts_tree_new(
10 Subtree root, const TSLanguage *language,
11 const TSRange *included_ranges, unsigned included_range_count
12) {
13 TSTree *result = ts_malloc(sizeof(TSTree));
14 result->root = root;
15 result->language = language;
16 result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
17 memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
18 result->included_range_count = included_range_count;
19 return result;
20}
21
22TSTree *ts_tree_copy(const TSTree *self) {
23 ts_subtree_retain(self->root);
24 return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
25}
26
27void ts_tree_delete(TSTree *self) {
28 if (!self) return;
29
30 SubtreePool pool = ts_subtree_pool_new(0);
31 ts_subtree_release(&pool, self->root);
32 ts_subtree_pool_delete(&pool);
33 ts_free(self->included_ranges);
34 ts_free(self);
35}
36
37TSNode ts_tree_root_node(const TSTree *self) {
38 return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
39}
40
41TSNode ts_tree_root_node_with_offset(
42 const TSTree *self,
43 uint32_t offset_bytes,
44 TSPoint offset_extent
45) {
46 Length offset = {offset_bytes, offset_extent};
47 return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0);
48}
49
50const TSLanguage *ts_tree_language(const TSTree *self) {
51 return self->language;
52}
53
54void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
55 for (unsigned i = 0; i < self->included_range_count; i++) {
56 TSRange *range = &self->included_ranges[i];
57 if (range->end_byte >= edit->old_end_byte) {
58 if (range->end_byte != UINT32_MAX) {
59 range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
60 range->end_point = point_add(
61 edit->new_end_point,
62 point_sub(range->end_point, edit->old_end_point)
63 );
64 if (range->end_byte < edit->new_end_byte) {
65 range->end_byte = UINT32_MAX;
66 range->end_point = POINT_MAX;
67 }
68 }
69 } else if (range->end_byte > edit->start_byte) {
70 range->end_byte = edit->start_byte;
71 range->end_point = edit->start_point;
72 }
73 if (range->start_byte >= edit->old_end_byte) {
74 range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
75 range->start_point = point_add(
76 edit->new_end_point,
77 point_sub(range->start_point, edit->old_end_point)
78 );
79 if (range->start_byte < edit->new_end_byte) {
80 range->start_byte = UINT32_MAX;
81 range->start_point = POINT_MAX;
82 }
83 } else if (range->start_byte > edit->start_byte) {
84 range->start_byte = edit->start_byte;
85 range->start_point = edit->start_point;
86 }
87 }
88
89 SubtreePool pool = ts_subtree_pool_new(0);
90 self->root = ts_subtree_edit(self->root, edit, &pool);
91 ts_subtree_pool_delete(&pool);
92}
93
94TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) {
95 *length = self->included_range_count;
96 TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange));
97 memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange));
98 return ranges;
99}
100
101TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) {
102 TreeCursor cursor1 = {NULL, array_new()};
103 TreeCursor cursor2 = {NULL, array_new()};
104 ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree));
105 ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree));
106
107 TSRangeArray included_range_differences = array_new();
108 ts_range_array_get_changed_ranges(
109 old_tree->included_ranges, old_tree->included_range_count,
110 new_tree->included_ranges, new_tree->included_range_count,
111 &included_range_differences
112 );
113
114 TSRange *result;
115 *length = ts_subtree_get_changed_ranges(
116 &old_tree->root, &new_tree->root, &cursor1, &cursor2,
117 old_tree->language, &included_range_differences, &result
118 );
119
120 array_delete(&included_range_differences);
121 array_delete(&cursor1.stack);
122 array_delete(&cursor2.stack);
123 return result;
124}
125
126#ifdef _WIN32
127
128void ts_tree_print_dot_graph(const TSTree *self, int fd) {
129 (void)self;
130 (void)fd;
131}
132
133#else
134
135#include <unistd.h>
136
137void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) {
138 FILE *file = fdopen(dup(file_descriptor), "a");
139 ts_subtree_print_dot_graph(self->root, self->language, file);
140 fclose(file);
141}
142
143#endif
diff --git a/vendor/tree-sitter/lib/src/tree.h b/vendor/tree-sitter/lib/src/tree.h
new file mode 100644
index 0000000..f012f88
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/tree.h
@@ -0,0 +1,31 @@
1#ifndef TREE_SITTER_TREE_H_
2#define TREE_SITTER_TREE_H_
3
4#include "./subtree.h"
5
6#ifdef __cplusplus
7extern "C" {
8#endif
9
10typedef struct {
11 const Subtree *child;
12 const Subtree *parent;
13 Length position;
14 TSSymbol alias_symbol;
15} ParentCacheEntry;
16
17struct TSTree {
18 Subtree root;
19 const TSLanguage *language;
20 TSRange *included_ranges;
21 unsigned included_range_count;
22};
23
24TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned);
25TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol);
26
27#ifdef __cplusplus
28}
29#endif
30
31#endif // TREE_SITTER_TREE_H_
diff --git a/vendor/tree-sitter/lib/src/tree_cursor.c b/vendor/tree-sitter/lib/src/tree_cursor.c
new file mode 100644
index 0000000..63d22c8
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/tree_cursor.c
@@ -0,0 +1,712 @@
1#include "tree_sitter/api.h"
2#include "./alloc.h"
3#include "./tree_cursor.h"
4#include "./language.h"
5#include "./tree.h"
6
7typedef struct {
8 Subtree parent;
9 const TSTree *tree;
10 Length position;
11 uint32_t child_index;
12 uint32_t structural_child_index;
13 uint32_t descendant_index;
14 const TSSymbol *alias_sequence;
15} CursorChildIterator;
16
17// CursorChildIterator
18
19static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) {
20 TreeCursorEntry *entry = &self->stack.contents[index];
21 if (index == 0 || ts_subtree_visible(*entry->subtree)) {
22 return true;
23 } else if (!ts_subtree_extra(*entry->subtree)) {
24 TreeCursorEntry *parent_entry = &self->stack.contents[index - 1];
25 return ts_language_alias_at(
26 self->tree->language,
27 parent_entry->subtree->ptr->production_id,
28 entry->structural_child_index
29 );
30 } else {
31 return false;
32 }
33}
34
35static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
36 TreeCursorEntry *last_entry = array_back(&self->stack);
37 if (ts_subtree_child_count(*last_entry->subtree) == 0) {
38 return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL};
39 }
40 const TSSymbol *alias_sequence = ts_language_alias_sequence(
41 self->tree->language,
42 last_entry->subtree->ptr->production_id
43 );
44
45 uint32_t descendant_index = last_entry->descendant_index;
46 if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) {
47 descendant_index += 1;
48 }
49
50 return (CursorChildIterator) {
51 .tree = self->tree,
52 .parent = *last_entry->subtree,
53 .position = last_entry->position,
54 .child_index = 0,
55 .structural_child_index = 0,
56 .descendant_index = descendant_index,
57 .alias_sequence = alias_sequence,
58 };
59}
60
61static inline bool ts_tree_cursor_child_iterator_next(
62 CursorChildIterator *self,
63 TreeCursorEntry *result,
64 bool *visible
65) {
66 if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
67 const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
68 *result = (TreeCursorEntry) {
69 .subtree = child,
70 .position = self->position,
71 .child_index = self->child_index,
72 .structural_child_index = self->structural_child_index,
73 .descendant_index = self->descendant_index,
74 };
75 *visible = ts_subtree_visible(*child);
76 bool extra = ts_subtree_extra(*child);
77 if (!extra) {
78 if (self->alias_sequence) {
79 *visible |= self->alias_sequence[self->structural_child_index];
80 }
81 self->structural_child_index++;
82 }
83
84 self->descendant_index += ts_subtree_visible_descendant_count(*child);
85 if (*visible) {
86 self->descendant_index += 1;
87 }
88
89 self->position = length_add(self->position, ts_subtree_size(*child));
90 self->child_index++;
91
92 if (self->child_index < self->parent.ptr->child_count) {
93 Subtree next_child = ts_subtree_children(self->parent)[self->child_index];
94 self->position = length_add(self->position, ts_subtree_padding(next_child));
95 }
96
97 return true;
98}
99
100// Return a position that, when `b` is added to it, yields `a`. This
101// can only be computed if `b` has zero rows. Otherwise, this function
102// returns `LENGTH_UNDEFINED`, and the caller needs to recompute
103// the position some other way.
104static inline Length length_backtrack(Length a, Length b) {
105 if (length_is_undefined(a) || b.extent.row != 0) {
106 return LENGTH_UNDEFINED;
107 }
108
109 Length result;
110 result.bytes = a.bytes - b.bytes;
111 result.extent.row = a.extent.row;
112 result.extent.column = a.extent.column - b.extent.column;
113 return result;
114}
115
116static inline bool ts_tree_cursor_child_iterator_previous(
117 CursorChildIterator *self,
118 TreeCursorEntry *result,
119 bool *visible
120) {
121 // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into
122 // account unsigned underflow
123 if (!self->parent.ptr || (int8_t)self->child_index == -1) return false;
124 const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
125 *result = (TreeCursorEntry) {
126 .subtree = child,
127 .position = self->position,
128 .child_index = self->child_index,
129 .structural_child_index = self->structural_child_index,
130 };
131 *visible = ts_subtree_visible(*child);
132 bool extra = ts_subtree_extra(*child);
133 if (!extra && self->alias_sequence) {
134 *visible |= self->alias_sequence[self->structural_child_index];
135 self->structural_child_index--;
136 }
137
138 self->position = length_backtrack(self->position, ts_subtree_padding(*child));
139 self->child_index--;
140
141 // unsigned can underflow so compare it to child_count
142 if (self->child_index < self->parent.ptr->child_count) {
143 Subtree previous_child = ts_subtree_children(self->parent)[self->child_index];
144 Length size = ts_subtree_size(previous_child);
145 self->position = length_backtrack(self->position, size);
146 }
147
148 return true;
149}
150
151// TSTreeCursor - lifecycle
152
153TSTreeCursor ts_tree_cursor_new(TSNode node) {
154 TSTreeCursor self = {NULL, NULL, {0, 0}};
155 ts_tree_cursor_init((TreeCursor *)&self, node);
156 return self;
157}
158
159void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) {
160 ts_tree_cursor_init((TreeCursor *)_self, node);
161}
162
163void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
164 self->tree = node.tree;
165 array_clear(&self->stack);
166 array_push(&self->stack, ((TreeCursorEntry) {
167 .subtree = (const Subtree *)node.id,
168 .position = {
169 ts_node_start_byte(node),
170 ts_node_start_point(node)
171 },
172 .child_index = 0,
173 .structural_child_index = 0,
174 .descendant_index = 0,
175 }));
176}
177
178void ts_tree_cursor_delete(TSTreeCursor *_self) {
179 TreeCursor *self = (TreeCursor *)_self;
180 array_delete(&self->stack);
181}
182
183// TSTreeCursor - walking the tree
184
185TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) {
186 TreeCursor *self = (TreeCursor *)_self;
187 bool visible;
188 TreeCursorEntry entry;
189 CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
190 while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
191 if (visible) {
192 array_push(&self->stack, entry);
193 return TreeCursorStepVisible;
194 }
195 if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
196 array_push(&self->stack, entry);
197 return TreeCursorStepHidden;
198 }
199 }
200 return TreeCursorStepNone;
201}
202
203bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) {
204 for (;;) {
205 switch (ts_tree_cursor_goto_first_child_internal(self)) {
206 case TreeCursorStepHidden:
207 continue;
208 case TreeCursorStepVisible:
209 return true;
210 default:
211 return false;
212 }
213 }
214 return false;
215}
216
217TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) {
218 TreeCursor *self = (TreeCursor *)_self;
219 bool visible;
220 TreeCursorEntry entry;
221 CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
222 if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone;
223
224 TreeCursorEntry last_entry;
225 TreeCursorStep last_step = TreeCursorStepNone;
226 while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
227 if (visible) {
228 last_entry = entry;
229 last_step = TreeCursorStepVisible;
230 }
231 else if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
232 last_entry = entry;
233 last_step = TreeCursorStepHidden;
234 }
235 }
236 if (last_entry.subtree) {
237 array_push(&self->stack, last_entry);
238 return last_step;
239 }
240
241 return TreeCursorStepNone;
242}
243
244bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) {
245 for (;;) {
246 switch (ts_tree_cursor_goto_last_child_internal(self)) {
247 case TreeCursorStepHidden:
248 continue;
249 case TreeCursorStepVisible:
250 return true;
251 default:
252 return false;
253 }
254 }
255 return false;
256}
257
258static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point(
259 TSTreeCursor *_self,
260 uint32_t goal_byte,
261 TSPoint goal_point
262) {
263 TreeCursor *self = (TreeCursor *)_self;
264 uint32_t initial_size = self->stack.size;
265 uint32_t visible_child_index = 0;
266
267 bool did_descend;
268 do {
269 did_descend = false;
270
271 bool visible;
272 TreeCursorEntry entry;
273 CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
274 while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
275 Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree));
276 bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point);
277 uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree);
278 if (at_goal) {
279 if (visible) {
280 array_push(&self->stack, entry);
281 return visible_child_index;
282 }
283 if (visible_child_count > 0) {
284 array_push(&self->stack, entry);
285 did_descend = true;
286 break;
287 }
288 } else if (visible) {
289 visible_child_index++;
290 } else {
291 visible_child_index += visible_child_count;
292 }
293 }
294 } while (did_descend);
295
296 self->stack.size = initial_size;
297 return -1;
298}
299
300int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) {
301 return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO);
302}
303
304int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) {
305 return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point);
306}
307
308TreeCursorStep ts_tree_cursor_goto_sibling_internal(
309 TSTreeCursor *_self,
310 bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) {
311 TreeCursor *self = (TreeCursor *)_self;
312 uint32_t initial_size = self->stack.size;
313
314 while (self->stack.size > 1) {
315 TreeCursorEntry entry = array_pop(&self->stack);
316 CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
317 iterator.child_index = entry.child_index;
318 iterator.structural_child_index = entry.structural_child_index;
319 iterator.position = entry.position;
320 iterator.descendant_index = entry.descendant_index;
321
322 bool visible = false;
323 advance(&iterator, &entry, &visible);
324 if (visible && self->stack.size + 1 < initial_size) break;
325
326 while (advance(&iterator, &entry, &visible)) {
327 if (visible) {
328 array_push(&self->stack, entry);
329 return TreeCursorStepVisible;
330 }
331
332 if (ts_subtree_visible_child_count(*entry.subtree)) {
333 array_push(&self->stack, entry);
334 return TreeCursorStepHidden;
335 }
336 }
337 }
338
339 self->stack.size = initial_size;
340 return TreeCursorStepNone;
341}
342
343TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) {
344 return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next);
345}
346
347bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) {
348 switch (ts_tree_cursor_goto_next_sibling_internal(self)) {
349 case TreeCursorStepHidden:
350 ts_tree_cursor_goto_first_child(self);
351 return true;
352 case TreeCursorStepVisible:
353 return true;
354 default:
355 return false;
356 }
357}
358
359TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) {
360 // since subtracting across row loses column information, we may have to
361 // restore it
362 TreeCursor *self = (TreeCursor *)_self;
363
364 // for that, save current position before traversing
365 Length position = array_back(&self->stack)->position;
366 TreeCursorStep step = ts_tree_cursor_goto_sibling_internal(
367 _self, ts_tree_cursor_child_iterator_previous);
368 if (step == TreeCursorStepNone)
369 return step;
370
371 // if length is already valid, there's no need to recompute it
372 if (!length_is_undefined(array_back(&self->stack)->position))
373 return step;
374
375 // restore position from the parent node
376 const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2];
377 position = parent->position;
378 uint32_t child_index = array_back(&self->stack)->child_index;
379 const Subtree *children = ts_subtree_children((*(parent->subtree)));
380
381 if (child_index > 0) {
382 // skip first child padding since its position should match the position of the parent
383 position = length_add(position, ts_subtree_size(children[0]));
384 for (uint32_t i = 1; i < child_index; ++i) {
385 position = length_add(position, ts_subtree_total_size(children[i]));
386 }
387 position = length_add(position, ts_subtree_padding(children[child_index]));
388 }
389
390 array_back(&self->stack)->position = position;
391
392 return step;
393}
394
395bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) {
396 switch (ts_tree_cursor_goto_previous_sibling_internal(self)) {
397 case TreeCursorStepHidden:
398 ts_tree_cursor_goto_last_child(self);
399 return true;
400 case TreeCursorStepVisible:
401 return true;
402 default:
403 return false;
404 }
405}
406
407bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
408 TreeCursor *self = (TreeCursor *)_self;
409 for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
410 if (ts_tree_cursor_is_entry_visible(self, i)) {
411 self->stack.size = i + 1;
412 return true;
413 }
414 }
415 return false;
416}
417
418void ts_tree_cursor_goto_descendant(
419 TSTreeCursor *_self,
420 uint32_t goal_descendant_index
421) {
422 TreeCursor *self = (TreeCursor *)_self;
423
424 // Ascend to the lowest ancestor that contains the goal node.
425 for (;;) {
426 uint32_t i = self->stack.size - 1;
427 TreeCursorEntry *entry = &self->stack.contents[i];
428 uint32_t next_descendant_index =
429 entry->descendant_index +
430 (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) +
431 ts_subtree_visible_descendant_count(*entry->subtree);
432 if (
433 (entry->descendant_index <= goal_descendant_index) &&
434 (next_descendant_index > goal_descendant_index)
435 ) {
436 break;
437 } else if (self->stack.size <= 1) {
438 return;
439 } else {
440 self->stack.size--;
441 }
442 }
443
444 // Descend to the goal node.
445 bool did_descend = true;
446 do {
447 did_descend = false;
448 bool visible;
449 TreeCursorEntry entry;
450 CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
451 if (iterator.descendant_index > goal_descendant_index) {
452 return;
453 }
454
455 while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
456 if (iterator.descendant_index > goal_descendant_index) {
457 array_push(&self->stack, entry);
458 if (visible && entry.descendant_index == goal_descendant_index) {
459 return;
460 } else {
461 did_descend = true;
462 break;
463 }
464 }
465 }
466 } while (did_descend);
467}
468
469uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) {
470 const TreeCursor *self = (const TreeCursor *)_self;
471 TreeCursorEntry *last_entry = array_back(&self->stack);
472 return last_entry->descendant_index;
473}
474
475TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
476 const TreeCursor *self = (const TreeCursor *)_self;
477 TreeCursorEntry *last_entry = array_back(&self->stack);
478 TSSymbol alias_symbol = 0;
479 if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) {
480 TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
481 alias_symbol = ts_language_alias_at(
482 self->tree->language,
483 parent_entry->subtree->ptr->production_id,
484 last_entry->structural_child_index
485 );
486 }
487 return ts_node_new(
488 self->tree,
489 last_entry->subtree,
490 last_entry->position,
491 alias_symbol
492 );
493}
494
495// Private - Get various facts about the current node that are needed
496// when executing tree queries.
497void ts_tree_cursor_current_status(
498 const TSTreeCursor *_self,
499 TSFieldId *field_id,
500 bool *has_later_siblings,
501 bool *has_later_named_siblings,
502 bool *can_have_later_siblings_with_this_field,
503 TSSymbol *supertypes,
504 unsigned *supertype_count
505) {
506 const TreeCursor *self = (const TreeCursor *)_self;
507 unsigned max_supertypes = *supertype_count;
508 *field_id = 0;
509 *supertype_count = 0;
510 *has_later_siblings = false;
511 *has_later_named_siblings = false;
512 *can_have_later_siblings_with_this_field = false;
513
514 // Walk up the tree, visiting the current node and its invisible ancestors,
515 // because fields can refer to nodes through invisible *wrapper* nodes,
516 for (unsigned i = self->stack.size - 1; i > 0; i--) {
517 TreeCursorEntry *entry = &self->stack.contents[i];
518 TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
519
520 const TSSymbol *alias_sequence = ts_language_alias_sequence(
521 self->tree->language,
522 parent_entry->subtree->ptr->production_id
523 );
524
525 #define subtree_symbol(subtree, structural_child_index) \
526 (( \
527 !ts_subtree_extra(subtree) && \
528 alias_sequence && \
529 alias_sequence[structural_child_index] \
530 ) ? \
531 alias_sequence[structural_child_index] : \
532 ts_subtree_symbol(subtree))
533
534 // Stop walking up when a visible ancestor is found.
535 TSSymbol entry_symbol = subtree_symbol(
536 *entry->subtree,
537 entry->structural_child_index
538 );
539 TSSymbolMetadata entry_metadata = ts_language_symbol_metadata(
540 self->tree->language,
541 entry_symbol
542 );
543 if (i != self->stack.size - 1 && entry_metadata.visible) break;
544
545 // Record any supertypes
546 if (entry_metadata.supertype && *supertype_count < max_supertypes) {
547 supertypes[*supertype_count] = entry_symbol;
548 (*supertype_count)++;
549 }
550
551 // Determine if the current node has later siblings.
552 if (!*has_later_siblings) {
553 unsigned sibling_count = parent_entry->subtree->ptr->child_count;
554 unsigned structural_child_index = entry->structural_child_index;
555 if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
556 for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
557 Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j];
558 TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
559 self->tree->language,
560 subtree_symbol(sibling, structural_child_index)
561 );
562 if (sibling_metadata.visible) {
563 *has_later_siblings = true;
564 if (*has_later_named_siblings) break;
565 if (sibling_metadata.named) {
566 *has_later_named_siblings = true;
567 break;
568 }
569 } else if (ts_subtree_visible_child_count(sibling) > 0) {
570 *has_later_siblings = true;
571 if (*has_later_named_siblings) break;
572 if (sibling.ptr->named_child_count > 0) {
573 *has_later_named_siblings = true;
574 break;
575 }
576 }
577 if (!ts_subtree_extra(sibling)) structural_child_index++;
578 }
579 }
580
581 #undef subtree_symbol
582
583 if (!ts_subtree_extra(*entry->subtree)) {
584 const TSFieldMapEntry *field_map, *field_map_end;
585 ts_language_field_map(
586 self->tree->language,
587 parent_entry->subtree->ptr->production_id,
588 &field_map, &field_map_end
589 );
590
591 // Look for a field name associated with the current node.
592 if (!*field_id) {
593 for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
594 if (!map->inherited && map->child_index == entry->structural_child_index) {
595 *field_id = map->field_id;
596 break;
597 }
598 }
599 }
600
601 // Determine if the current node can have later siblings with the same field name.
602 if (*field_id) {
603 for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
604 if (
605 map->field_id == *field_id &&
606 map->child_index > entry->structural_child_index
607 ) {
608 *can_have_later_siblings_with_this_field = true;
609 break;
610 }
611 }
612 }
613 }
614 }
615}
616
617uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) {
618 const TreeCursor *self = (const TreeCursor *)_self;
619 uint32_t depth = 0;
620 for (unsigned i = 1; i < self->stack.size; i++) {
621 if (ts_tree_cursor_is_entry_visible(self, i)) {
622 depth++;
623 }
624 }
625 return depth;
626}
627
628TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) {
629 const TreeCursor *self = (const TreeCursor *)_self;
630 for (int i = (int)self->stack.size - 2; i >= 0; i--) {
631 TreeCursorEntry *entry = &self->stack.contents[i];
632 bool is_visible = true;
633 TSSymbol alias_symbol = 0;
634 if (i > 0) {
635 TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
636 alias_symbol = ts_language_alias_at(
637 self->tree->language,
638 parent_entry->subtree->ptr->production_id,
639 entry->structural_child_index
640 );
641 is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree);
642 }
643 if (is_visible) {
644 return ts_node_new(
645 self->tree,
646 entry->subtree,
647 entry->position,
648 alias_symbol
649 );
650 }
651 }
652 return ts_node_new(NULL, NULL, length_zero(), 0);
653}
654
655TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
656 const TreeCursor *self = (const TreeCursor *)_self;
657
658 // Walk up the tree, visiting the current node and its invisible ancestors.
659 for (unsigned i = self->stack.size - 1; i > 0; i--) {
660 TreeCursorEntry *entry = &self->stack.contents[i];
661 TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
662
663 // Stop walking up when another visible node is found.
664 if (
665 i != self->stack.size - 1 &&
666 ts_tree_cursor_is_entry_visible(self, i)
667 ) break;
668
669 if (ts_subtree_extra(*entry->subtree)) break;
670
671 const TSFieldMapEntry *field_map, *field_map_end;
672 ts_language_field_map(
673 self->tree->language,
674 parent_entry->subtree->ptr->production_id,
675 &field_map, &field_map_end
676 );
677 for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
678 if (!map->inherited && map->child_index == entry->structural_child_index) {
679 return map->field_id;
680 }
681 }
682 }
683 return 0;
684}
685
686const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
687 TSFieldId id = ts_tree_cursor_current_field_id(_self);
688 if (id) {
689 const TreeCursor *self = (const TreeCursor *)_self;
690 return self->tree->language->field_names[id];
691 } else {
692 return NULL;
693 }
694}
695
696TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
697 const TreeCursor *cursor = (const TreeCursor *)_cursor;
698 TSTreeCursor res = {NULL, NULL, {0, 0}};
699 TreeCursor *copy = (TreeCursor *)&res;
700 copy->tree = cursor->tree;
701 array_init(&copy->stack);
702 array_push_all(&copy->stack, &cursor->stack);
703 return res;
704}
705
706void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) {
707 const TreeCursor *cursor = (const TreeCursor *)_src;
708 TreeCursor *copy = (TreeCursor *)_dst;
709 copy->tree = cursor->tree;
710 array_clear(&copy->stack);
711 array_push_all(&copy->stack, &cursor->stack);
712}
diff --git a/vendor/tree-sitter/lib/src/tree_cursor.h b/vendor/tree-sitter/lib/src/tree_cursor.h
new file mode 100644
index 0000000..6d4c688
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/tree_cursor.h
@@ -0,0 +1,47 @@
1#ifndef TREE_SITTER_TREE_CURSOR_H_
2#define TREE_SITTER_TREE_CURSOR_H_
3
4#include "./subtree.h"
5
6typedef struct {
7 const Subtree *subtree;
8 Length position;
9 uint32_t child_index;
10 uint32_t structural_child_index;
11 uint32_t descendant_index;
12} TreeCursorEntry;
13
14typedef struct {
15 const TSTree *tree;
16 Array(TreeCursorEntry) stack;
17} TreeCursor;
18
19typedef enum {
20 TreeCursorStepNone,
21 TreeCursorStepHidden,
22 TreeCursorStepVisible,
23} TreeCursorStep;
24
25void ts_tree_cursor_init(TreeCursor *, TSNode);
26void ts_tree_cursor_current_status(
27 const TSTreeCursor *,
28 TSFieldId *,
29 bool *,
30 bool *,
31 bool *,
32 TSSymbol *,
33 unsigned *
34);
35
36TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *);
37TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *);
38
39static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) {
40 const TreeCursor *self = (const TreeCursor *)_self;
41 TreeCursorEntry *last_entry = array_back(&self->stack);
42 return *last_entry->subtree;
43}
44
45TSNode ts_tree_cursor_parent_node(const TSTreeCursor *);
46
47#endif // TREE_SITTER_TREE_CURSOR_H_
diff --git a/vendor/tree-sitter/lib/src/unicode.h b/vendor/tree-sitter/lib/src/unicode.h
new file mode 100644
index 0000000..0fba56a
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/unicode.h
@@ -0,0 +1,50 @@
1#ifndef TREE_SITTER_UNICODE_H_
2#define TREE_SITTER_UNICODE_H_
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8#include <limits.h>
9#include <stdint.h>
10
11#define U_EXPORT
12#define U_EXPORT2
13#include "unicode/utf8.h"
14#include "unicode/utf16.h"
15
16static const int32_t TS_DECODE_ERROR = U_SENTINEL;
17
18// These functions read one unicode code point from the given string,
19// returning the number of bytes consumed.
20typedef uint32_t (*UnicodeDecodeFunction)(
21 const uint8_t *string,
22 uint32_t length,
23 int32_t *code_point
24);
25
26static inline uint32_t ts_decode_utf8(
27 const uint8_t *string,
28 uint32_t length,
29 int32_t *code_point
30) {
31 uint32_t i = 0;
32 U8_NEXT(string, i, length, *code_point);
33 return i;
34}
35
36static inline uint32_t ts_decode_utf16(
37 const uint8_t *string,
38 uint32_t length,
39 int32_t *code_point
40) {
41 uint32_t i = 0;
42 U16_NEXT(((uint16_t *)string), i, length, *code_point);
43 return i * 2;
44}
45
46#ifdef __cplusplus
47}
48#endif
49
50#endif // TREE_SITTER_UNICODE_H_
diff --git a/vendor/tree-sitter/lib/src/unicode/ICU_SHA b/vendor/tree-sitter/lib/src/unicode/ICU_SHA
new file mode 100644
index 0000000..3622283
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/unicode/ICU_SHA
@@ -0,0 +1 @@
552b01f61127d30d6589aa4bf99468224979b661
diff --git a/vendor/tree-sitter/lib/src/unicode/LICENSE b/vendor/tree-sitter/lib/src/unicode/LICENSE
new file mode 100644
index 0000000..2e01e36
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/unicode/LICENSE
@@ -0,0 +1,414 @@
1COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
2
3Copyright © 1991-2019 Unicode, Inc. All rights reserved.
4Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
5
6Permission is hereby granted, free of charge, to any person obtaining
7a copy of the Unicode data files and any associated documentation
8(the "Data Files") or Unicode software and any associated documentation
9(the "Software") to deal in the Data Files or Software
10without restriction, including without limitation the rights to use,
11copy, modify, merge, publish, distribute, and/or sell copies of
12the Data Files or Software, and to permit persons to whom the Data Files
13or Software are furnished to do so, provided that either
14(a) this copyright and permission notice appear with all copies
15of the Data Files or Software, or
16(b) this copyright and permission notice appear in associated
17Documentation.
18
19THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
20ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
21WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22NONINFRINGEMENT OF THIRD PARTY RIGHTS.
23IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
24NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
25DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
26DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THE DATA FILES OR SOFTWARE.
29
30Except as contained in this notice, the name of a copyright holder
31shall not be used in advertising or otherwise to promote the sale,
32use or other dealings in these Data Files or Software without prior
33written authorization of the copyright holder.
34
35---------------------
36
37Third-Party Software Licenses
38
39This section contains third-party software notices and/or additional
40terms for licensed third-party software components included within ICU
41libraries.
42
431. ICU License - ICU 1.8.1 to ICU 57.1
44
45COPYRIGHT AND PERMISSION NOTICE
46
47Copyright (c) 1995-2016 International Business Machines Corporation and others
48All rights reserved.
49
50Permission is hereby granted, free of charge, to any person obtaining
51a copy of this software and associated documentation files (the
52"Software"), to deal in the Software without restriction, including
53without limitation the rights to use, copy, modify, merge, publish,
54distribute, and/or sell copies of the Software, and to permit persons
55to whom the Software is furnished to do so, provided that the above
56copyright notice(s) and this permission notice appear in all copies of
57the Software and that both the above copyright notice(s) and this
58permission notice appear in supporting documentation.
59
60THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
61EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
62MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
63OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
64HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
65SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
66RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
67CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
68CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
69
70Except as contained in this notice, the name of a copyright holder
71shall not be used in advertising or otherwise to promote the sale, use
72or other dealings in this Software without prior written authorization
73of the copyright holder.
74
75All trademarks and registered trademarks mentioned herein are the
76property of their respective owners.
77
782. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
79
80 # The Google Chrome software developed by Google is licensed under
81 # the BSD license. Other software included in this distribution is
82 # provided under other licenses, as set forth below.
83 #
84 # The BSD License
85 # http://opensource.org/licenses/bsd-license.php
86 # Copyright (C) 2006-2008, Google Inc.
87 #
88 # All rights reserved.
89 #
90 # Redistribution and use in source and binary forms, with or without
91 # modification, are permitted provided that the following conditions are met:
92 #
93 # Redistributions of source code must retain the above copyright notice,
94 # this list of conditions and the following disclaimer.
95 # Redistributions in binary form must reproduce the above
96 # copyright notice, this list of conditions and the following
97 # disclaimer in the documentation and/or other materials provided with
98 # the distribution.
99 # Neither the name of Google Inc. nor the names of its
100 # contributors may be used to endorse or promote products derived from
101 # this software without specific prior written permission.
102 #
103 #
104 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
105 # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
106 # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
107 # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
108 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
109 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
110 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
111 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
112 # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
113 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
114 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
115 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
116 #
117 #
118 # The word list in cjdict.txt are generated by combining three word lists
119 # listed below with further processing for compound word breaking. The
120 # frequency is generated with an iterative training against Google web
121 # corpora.
122 #
123 # * Libtabe (Chinese)
124 # - https://sourceforge.net/project/?group_id=1519
125 # - Its license terms and conditions are shown below.
126 #
127 # * IPADIC (Japanese)
128 # - http://chasen.aist-nara.ac.jp/chasen/distribution.html
129 # - Its license terms and conditions are shown below.
130 #
131 # ---------COPYING.libtabe ---- BEGIN--------------------
132 #
133 # /*
134 # * Copyright (c) 1999 TaBE Project.
135 # * Copyright (c) 1999 Pai-Hsiang Hsiao.
136 # * All rights reserved.
137 # *
138 # * Redistribution and use in source and binary forms, with or without
139 # * modification, are permitted provided that the following conditions
140 # * are met:
141 # *
142 # * . Redistributions of source code must retain the above copyright
143 # * notice, this list of conditions and the following disclaimer.
144 # * . Redistributions in binary form must reproduce the above copyright
145 # * notice, this list of conditions and the following disclaimer in
146 # * the documentation and/or other materials provided with the
147 # * distribution.
148 # * . Neither the name of the TaBE Project nor the names of its
149 # * contributors may be used to endorse or promote products derived
150 # * from this software without specific prior written permission.
151 # *
152 # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
153 # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
154 # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
155 # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
156 # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
157 # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
158 # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
159 # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
160 # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
161 # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
162 # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
163 # * OF THE POSSIBILITY OF SUCH DAMAGE.
164 # */
165 #
166 # /*
167 # * Copyright (c) 1999 Computer Systems and Communication Lab,
168 # * Institute of Information Science, Academia
169 # * Sinica. All rights reserved.
170 # *
171 # * Redistribution and use in source and binary forms, with or without
172 # * modification, are permitted provided that the following conditions
173 # * are met:
174 # *
175 # * . Redistributions of source code must retain the above copyright
176 # * notice, this list of conditions and the following disclaimer.
177 # * . Redistributions in binary form must reproduce the above copyright
178 # * notice, this list of conditions and the following disclaimer in
179 # * the documentation and/or other materials provided with the
180 # * distribution.
181 # * . Neither the name of the Computer Systems and Communication Lab
182 # * nor the names of its contributors may be used to endorse or
183 # * promote products derived from this software without specific
184 # * prior written permission.
185 # *
186 # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
187 # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
188 # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
189 # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
190 # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
191 # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
192 # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
193 # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
194 # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
195 # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
196 # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
197 # * OF THE POSSIBILITY OF SUCH DAMAGE.
198 # */
199 #
200 # Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
201 # University of Illinois
202 # c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
203 #
204 # ---------------COPYING.libtabe-----END--------------------------------
205 #
206 #
207 # ---------------COPYING.ipadic-----BEGIN-------------------------------
208 #
209 # Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
210 # and Technology. All Rights Reserved.
211 #
212 # Use, reproduction, and distribution of this software is permitted.
213 # Any copy of this software, whether in its original form or modified,
214 # must include both the above copyright notice and the following
215 # paragraphs.
216 #
217 # Nara Institute of Science and Technology (NAIST),
218 # the copyright holders, disclaims all warranties with regard to this
219 # software, including all implied warranties of merchantability and
220 # fitness, in no event shall NAIST be liable for
221 # any special, indirect or consequential damages or any damages
222 # whatsoever resulting from loss of use, data or profits, whether in an
223 # action of contract, negligence or other tortuous action, arising out
224 # of or in connection with the use or performance of this software.
225 #
226 # A large portion of the dictionary entries
227 # originate from ICOT Free Software. The following conditions for ICOT
228 # Free Software applies to the current dictionary as well.
229 #
230 # Each User may also freely distribute the Program, whether in its
231 # original form or modified, to any third party or parties, PROVIDED
232 # that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
233 # on, or be attached to, the Program, which is distributed substantially
234 # in the same form as set out herein and that such intended
235 # distribution, if actually made, will neither violate or otherwise
236 # contravene any of the laws and regulations of the countries having
237 # jurisdiction over the User or the intended distribution itself.
238 #
239 # NO WARRANTY
240 #
241 # The program was produced on an experimental basis in the course of the
242 # research and development conducted during the project and is provided
243 # to users as so produced on an experimental basis. Accordingly, the
244 # program is provided without any warranty whatsoever, whether express,
245 # implied, statutory or otherwise. The term "warranty" used herein
246 # includes, but is not limited to, any warranty of the quality,
247 # performance, merchantability and fitness for a particular purpose of
248 # the program and the nonexistence of any infringement or violation of
249 # any right of any third party.
250 #
251 # Each user of the program will agree and understand, and be deemed to
252 # have agreed and understood, that there is no warranty whatsoever for
253 # the program and, accordingly, the entire risk arising from or
254 # otherwise connected with the program is assumed by the user.
255 #
256 # Therefore, neither ICOT, the copyright holder, or any other
257 # organization that participated in or was otherwise related to the
258 # development of the program and their respective officials, directors,
259 # officers and other employees shall be held liable for any and all
260 # damages, including, without limitation, general, special, incidental
261 # and consequential damages, arising out of or otherwise in connection
262 # with the use or inability to use the program or any product, material
263 # or result produced or otherwise obtained by using the program,
264 # regardless of whether they have been advised of, or otherwise had
265 # knowledge of, the possibility of such damages at any time during the
266 # project or thereafter. Each user will be deemed to have agreed to the
267 # foregoing by his or her commencement of use of the program. The term
268 # "use" as used herein includes, but is not limited to, the use,
269 # modification, copying and distribution of the program and the
270 # production of secondary products from the program.
271 #
272 # In the case where the program, whether in its original form or
273 # modified, was distributed or delivered to or received by a user from
274 # any person, organization or entity other than ICOT, unless it makes or
275 # grants independently of ICOT any specific warranty to the user in
276 # writing, such person, organization or entity, will also be exempted
277 # from and not be held liable to the user for any such damages as noted
278 # above as far as the program is concerned.
279 #
280 # ---------------COPYING.ipadic-----END----------------------------------
281
2823. Lao Word Break Dictionary Data (laodict.txt)
283
284 # Copyright (c) 2013 International Business Machines Corporation
285 # and others. All Rights Reserved.
286 #
287 # Project: http://code.google.com/p/lao-dictionary/
288 # Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
289 # License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
290 # (copied below)
291 #
292 # This file is derived from the above dictionary, with slight
293 # modifications.
294 # ----------------------------------------------------------------------
295 # Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
296 # All rights reserved.
297 #
298 # Redistribution and use in source and binary forms, with or without
299 # modification,
300 # are permitted provided that the following conditions are met:
301 #
302 #
303 # Redistributions of source code must retain the above copyright notice, this
304 # list of conditions and the following disclaimer. Redistributions in
305 # binary form must reproduce the above copyright notice, this list of
306 # conditions and the following disclaimer in the documentation and/or
307 # other materials provided with the distribution.
308 #
309 #
310 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
311 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
312 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
313 # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
314 # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
315 # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
316 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
317 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
318 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
319 # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
320 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
321 # OF THE POSSIBILITY OF SUCH DAMAGE.
322 # --------------------------------------------------------------------------
323
3244. Burmese Word Break Dictionary Data (burmesedict.txt)
325
326 # Copyright (c) 2014 International Business Machines Corporation
327 # and others. All Rights Reserved.
328 #
329 # This list is part of a project hosted at:
330 # github.com/kanyawtech/myanmar-karen-word-lists
331 #
332 # --------------------------------------------------------------------------
333 # Copyright (c) 2013, LeRoy Benjamin Sharon
334 # All rights reserved.
335 #
336 # Redistribution and use in source and binary forms, with or without
337 # modification, are permitted provided that the following conditions
338 # are met: Redistributions of source code must retain the above
339 # copyright notice, this list of conditions and the following
340 # disclaimer. Redistributions in binary form must reproduce the
341 # above copyright notice, this list of conditions and the following
342 # disclaimer in the documentation and/or other materials provided
343 # with the distribution.
344 #
345 # Neither the name Myanmar Karen Word Lists, nor the names of its
346 # contributors may be used to endorse or promote products derived
347 # from this software without specific prior written permission.
348 #
349 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
350 # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
351 # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
352 # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
353 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
354 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
355 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
356 # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
357 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
358 # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
359 # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
360 # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361 # SUCH DAMAGE.
362 # --------------------------------------------------------------------------
363
3645. Time Zone Database
365
366 ICU uses the public domain data and code derived from Time Zone
367Database for its time zone support. The ownership of the TZ database
368is explained in BCP 175: Procedure for Maintaining the Time Zone
369Database section 7.
370
371 # 7. Database Ownership
372 #
373 # The TZ database itself is not an IETF Contribution or an IETF
374 # document. Rather it is a pre-existing and regularly updated work
375 # that is in the public domain, and is intended to remain in the
376 # public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
377 # not apply to the TZ Database or contributions that individuals make
378 # to it. Should any claims be made and substantiated against the TZ
379 # Database, the organization that is providing the IANA
380 # Considerations defined in this RFC, under the memorandum of
381 # understanding with the IETF, currently ICANN, may act in accordance
382 # with all competent court orders. No ownership claims will be made
383 # by ICANN or the IETF Trust on the database or the code. Any person
384 # making a contribution to the database or code waives all rights to
385 # future claims in that contribution or in the TZ Database.
386
3876. Google double-conversion
388
389Copyright 2006-2011, the V8 project authors. All rights reserved.
390Redistribution and use in source and binary forms, with or without
391modification, are permitted provided that the following conditions are
392met:
393
394 * Redistributions of source code must retain the above copyright
395 notice, this list of conditions and the following disclaimer.
396 * Redistributions in binary form must reproduce the above
397 copyright notice, this list of conditions and the following
398 disclaimer in the documentation and/or other materials provided
399 with the distribution.
400 * Neither the name of Google Inc. nor the names of its
401 contributors may be used to endorse or promote products derived
402 from this software without specific prior written permission.
403
404THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
405"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
406LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
407A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
408OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
409SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
410LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
411DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
412THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
413(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
414OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/tree-sitter/lib/src/unicode/README.md b/vendor/tree-sitter/lib/src/unicode/README.md
new file mode 100644
index 0000000..623b8e3
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/unicode/README.md
@@ -0,0 +1,29 @@
1# ICU Parts
2
3This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu).
4
5### License
6
7The license for these files is contained in the `LICENSE` file within this directory.
8
9### Contents
10
11* Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory:
12 * `utf8.h`
13 * `utf16.h`
14 * `umachine.h`
15* Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed:
16 * `ptypes.h`
17 * `urename.h`
18 * `utf.h`
19* `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained.
20* `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository.
21* `README.md` - This text file.
22
23### Updating ICU
24
25To incorporate changes from the upstream `icu` repository:
26
27* Update `ICU_SHA` with the new Git SHA.
28* Update `LICENSE` with the license text from the directory mentioned above.
29* Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository.
diff --git a/vendor/tree-sitter/lib/src/unicode/ptypes.h b/vendor/tree-sitter/lib/src/unicode/ptypes.h
new file mode 100644
index 0000000..ac79ad0
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/unicode/ptypes.h
@@ -0,0 +1 @@
// This file must exist in order for `utf8.h` and `utf16.h` to be used.
diff --git a/vendor/tree-sitter/lib/src/unicode/umachine.h b/vendor/tree-sitter/lib/src/unicode/umachine.h
new file mode 100644
index 0000000..9195824
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/unicode/umachine.h
@@ -0,0 +1,448 @@
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6* Copyright (C) 1999-2015, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10* file name: umachine.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 1999sep13
16* created by: Markus W. Scherer
17*
18* This file defines basic types and constants for ICU to be
19* platform-independent. umachine.h and utf.h are included into
20* utypes.h to provide all the general definitions for ICU.
21* All of these definitions used to be in utypes.h before
22* the UTF-handling macros made this unmaintainable.
23*/
24
25#ifndef __UMACHINE_H__
26#define __UMACHINE_H__
27
28
29/**
30 * \file
31 * \brief Basic types and constants for UTF
32 *
33 * <h2> Basic types and constants for UTF </h2>
34 * This file defines basic types and constants for utf.h to be
35 * platform-independent. umachine.h and utf.h are included into
36 * utypes.h to provide all the general definitions for ICU.
37 * All of these definitions used to be in utypes.h before
38 * the UTF-handling macros made this unmaintainable.
39 *
40 */
41/*==========================================================================*/
42/* Include platform-dependent definitions */
43/* which are contained in the platform-specific file platform.h */
44/*==========================================================================*/
45
46#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
47
48/*
49 * ANSI C headers:
50 * stddef.h defines wchar_t
51 */
52#include <stddef.h>
53
54/*==========================================================================*/
55/* For C wrappers, we use the symbol U_STABLE. */
56/* This works properly if the includer is C or C++. */
57/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */
58/*==========================================================================*/
59
60/**
61 * \def U_CFUNC
62 * This is used in a declaration of a library private ICU C function.
63 * @stable ICU 2.4
64 */
65
66/**
67 * \def U_CDECL_BEGIN
68 * This is used to begin a declaration of a library private ICU C API.
69 * @stable ICU 2.4
70 */
71
72/**
73 * \def U_CDECL_END
74 * This is used to end a declaration of a library private ICU C API
75 * @stable ICU 2.4
76 */
77
78#ifdef __cplusplus
79# define U_CFUNC extern "C"
80# define U_CDECL_BEGIN extern "C" {
81# define U_CDECL_END }
82#else
83# define U_CFUNC extern
84# define U_CDECL_BEGIN
85# define U_CDECL_END
86#endif
87
88#ifndef U_ATTRIBUTE_DEPRECATED
89/**
90 * \def U_ATTRIBUTE_DEPRECATED
91 * This is used for GCC specific attributes
92 * @internal
93 */
94#if U_GCC_MAJOR_MINOR >= 302
95# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
96/**
97 * \def U_ATTRIBUTE_DEPRECATED
98 * This is used for Visual C++ specific attributes
99 * @internal
100 */
101#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
102# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
103#else
104# define U_ATTRIBUTE_DEPRECATED
105#endif
106#endif
107
108/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
109#define U_CAPI U_CFUNC U_EXPORT
110/** This is used to declare a function as a stable public ICU C API*/
111#define U_STABLE U_CAPI
112/** This is used to declare a function as a draft public ICU C API */
113#define U_DRAFT U_CAPI
114/** This is used to declare a function as a deprecated public ICU C API */
115#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
116/** This is used to declare a function as an obsolete public ICU C API */
117#define U_OBSOLETE U_CAPI
118/** This is used to declare a function as an internal ICU C API */
119#define U_INTERNAL U_CAPI
120
121/**
122 * \def U_OVERRIDE
123 * Defined to the C++11 "override" keyword if available.
124 * Denotes a class or member which is an override of the base class.
125 * May result in an error if it applied to something not an override.
126 * @internal
127 */
128#ifndef U_OVERRIDE
129#define U_OVERRIDE override
130#endif
131
132/**
133 * \def U_FINAL
134 * Defined to the C++11 "final" keyword if available.
135 * Denotes a class or member which may not be overridden in subclasses.
136 * May result in an error if subclasses attempt to override.
137 * @internal
138 */
139#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
140#define U_FINAL final
141#endif
142
143// Before ICU 65, function-like, multi-statement ICU macros were just defined as
144// series of statements wrapped in { } blocks and the caller could choose to
145// either treat them as if they were actual functions and end the invocation
146// with a trailing ; creating an empty statement after the block or else omit
147// this trailing ; using the knowledge that the macro would expand to { }.
148//
149// But doing so doesn't work well with macros that look like functions and
150// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
151// switches to the standard solution of wrapping such macros in do { } while.
152//
153// This will however break existing code that depends on being able to invoke
154// these macros without a trailing ; so to be able to remain compatible with
155// such code the wrapper is itself defined as macros so that it's possible to
156// build ICU 65 and later with the old macro behaviour, like this:
157//
158// CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
159// runConfigureICU ...
160
161/**
162 * \def UPRV_BLOCK_MACRO_BEGIN
163 * Defined as the "do" keyword by default.
164 * @internal
165 */
166#ifndef UPRV_BLOCK_MACRO_BEGIN
167#define UPRV_BLOCK_MACRO_BEGIN do
168#endif
169
170/**
171 * \def UPRV_BLOCK_MACRO_END
172 * Defined as "while (FALSE)" by default.
173 * @internal
174 */
175#ifndef UPRV_BLOCK_MACRO_END
176#define UPRV_BLOCK_MACRO_END while (FALSE)
177#endif
178
179/*==========================================================================*/
180/* limits for int32_t etc., like in POSIX inttypes.h */
181/*==========================================================================*/
182
183#ifndef INT8_MIN
184/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
185# define INT8_MIN ((int8_t)(-128))
186#endif
187#ifndef INT16_MIN
188/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
189# define INT16_MIN ((int16_t)(-32767-1))
190#endif
191#ifndef INT32_MIN
192/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
193# define INT32_MIN ((int32_t)(-2147483647-1))
194#endif
195
196#ifndef INT8_MAX
197/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
198# define INT8_MAX ((int8_t)(127))
199#endif
200#ifndef INT16_MAX
201/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
202# define INT16_MAX ((int16_t)(32767))
203#endif
204#ifndef INT32_MAX
205/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
206# define INT32_MAX ((int32_t)(2147483647))
207#endif
208
209#ifndef UINT8_MAX
210/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
211# define UINT8_MAX ((uint8_t)(255U))
212#endif
213#ifndef UINT16_MAX
214/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
215# define UINT16_MAX ((uint16_t)(65535U))
216#endif
217#ifndef UINT32_MAX
218/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
219# define UINT32_MAX ((uint32_t)(4294967295U))
220#endif
221
222#if defined(U_INT64_T_UNAVAILABLE)
223# error int64_t is required for decimal format and rule-based number format.
224#else
225# ifndef INT64_C
226/**
227 * Provides a platform independent way to specify a signed 64-bit integer constant.
228 * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
229 * @stable ICU 2.8
230 */
231# define INT64_C(c) c ## LL
232# endif
233# ifndef UINT64_C
234/**
235 * Provides a platform independent way to specify an unsigned 64-bit integer constant.
236 * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
237 * @stable ICU 2.8
238 */
239# define UINT64_C(c) c ## ULL
240# endif
241# ifndef U_INT64_MIN
242/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
243# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
244# endif
245# ifndef U_INT64_MAX
246/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
247# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
248# endif
249# ifndef U_UINT64_MAX
250/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
251# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
252# endif
253#endif
254
255/*==========================================================================*/
256/* Boolean data type */
257/*==========================================================================*/
258
259/** The ICU boolean type @stable ICU 2.0 */
260typedef int8_t UBool;
261
262#ifndef TRUE
263/** The TRUE value of a UBool @stable ICU 2.0 */
264# define TRUE 1
265#endif
266#ifndef FALSE
267/** The FALSE value of a UBool @stable ICU 2.0 */
268# define FALSE 0
269#endif
270
271
272/*==========================================================================*/
273/* Unicode data types */
274/*==========================================================================*/
275
276/* wchar_t-related definitions -------------------------------------------- */
277
278/*
279 * \def U_WCHAR_IS_UTF16
280 * Defined if wchar_t uses UTF-16.
281 *
282 * @stable ICU 2.0
283 */
284/*
285 * \def U_WCHAR_IS_UTF32
286 * Defined if wchar_t uses UTF-32.
287 *
288 * @stable ICU 2.0
289 */
290#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
291# ifdef __STDC_ISO_10646__
292# if (U_SIZEOF_WCHAR_T==2)
293# define U_WCHAR_IS_UTF16
294# elif (U_SIZEOF_WCHAR_T==4)
295# define U_WCHAR_IS_UTF32
296# endif
297# elif defined __UCS2__
298# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
299# define U_WCHAR_IS_UTF16
300# endif
301# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
302# if (U_SIZEOF_WCHAR_T==4)
303# define U_WCHAR_IS_UTF32
304# endif
305# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
306# define U_WCHAR_IS_UTF32
307# elif U_PLATFORM_HAS_WIN32_API
308# define U_WCHAR_IS_UTF16
309# endif
310#endif
311
312/* UChar and UChar32 definitions -------------------------------------------- */
313
314/** Number of bytes in a UChar. @stable ICU 2.0 */
315#define U_SIZEOF_UCHAR 2
316
317/**
318 * \def U_CHAR16_IS_TYPEDEF
319 * If 1, then char16_t is a typedef and not a real type (yet)
320 * @internal
321 */
322#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
323// for AIX, uchar.h needs to be included
324# include <uchar.h>
325# define U_CHAR16_IS_TYPEDEF 1
326#elif defined(_MSC_VER) && (_MSC_VER < 1900)
327// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
328// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
329# define U_CHAR16_IS_TYPEDEF 1
330#else
331# define U_CHAR16_IS_TYPEDEF 0
332#endif
333
334
335/**
336 * \var UChar
337 *
338 * The base type for UTF-16 code units and pointers.
339 * Unsigned 16-bit integer.
340 * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
341 *
342 * UChar is configurable by defining the macro UCHAR_TYPE
343 * on the preprocessor or compiler command line:
344 * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
345 * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
346 * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
347 *
348 * The default is UChar=char16_t.
349 *
350 * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
351 *
352 * In C, char16_t is a simple typedef of uint_least16_t.
353 * ICU requires uint_least16_t=uint16_t for data memory mapping.
354 * On macOS, char16_t is not available because the uchar.h standard header is missing.
355 *
356 * @stable ICU 4.4
357 */
358
359#if 1
360 // #if 1 is normal. UChar defaults to char16_t in C++.
361 // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
362 // The intltest Makefile #defines UCHAR_TYPE=char16_t,
363 // so we only #define it to uint16_t if it is undefined so far.
364#elif !defined(UCHAR_TYPE)
365# define UCHAR_TYPE uint16_t
366#endif
367
368#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
369 defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
370 // Inside the ICU library code, never configurable.
371 typedef char16_t UChar;
372#elif defined(UCHAR_TYPE)
373 typedef UCHAR_TYPE UChar;
374#elif defined(__cplusplus)
375 typedef char16_t UChar;
376#else
377 typedef uint16_t UChar;
378#endif
379
380/**
381 * \var OldUChar
382 * Default ICU 58 definition of UChar.
383 * A base type for UTF-16 code units and pointers.
384 * Unsigned 16-bit integer.
385 *
386 * Define OldUChar to be wchar_t if that is 16 bits wide.
387 * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
388 *
389 * This makes the definition of OldUChar platform-dependent
390 * but allows direct string type compatibility with platforms with
391 * 16-bit wchar_t types.
392 *
393 * This is how UChar was defined in ICU 58, for transition convenience.
394 * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
395 * The current UChar responds to UCHAR_TYPE but OldUChar does not.
396 *
397 * @stable ICU 59
398 */
399#if U_SIZEOF_WCHAR_T==2
400 typedef wchar_t OldUChar;
401#elif defined(__CHAR16_TYPE__)
402 typedef __CHAR16_TYPE__ OldUChar;
403#else
404 typedef uint16_t OldUChar;
405#endif
406
407/**
408 * Define UChar32 as a type for single Unicode code points.
409 * UChar32 is a signed 32-bit integer (same as int32_t).
410 *
411 * The Unicode code point range is 0..0x10ffff.
412 * All other values (negative or >=0x110000) are illegal as Unicode code points.
413 * They may be used as sentinel values to indicate "done", "error"
414 * or similar non-code point conditions.
415 *
416 * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
417 * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
418 * or else to be uint32_t.
419 * That is, the definition of UChar32 was platform-dependent.
420 *
421 * @see U_SENTINEL
422 * @stable ICU 2.4
423 */
424typedef int32_t UChar32;
425
426/**
427 * This value is intended for sentinel values for APIs that
428 * (take or) return single code points (UChar32).
429 * It is outside of the Unicode code point range 0..0x10ffff.
430 *
431 * For example, a "done" or "error" value in a new API
432 * could be indicated with U_SENTINEL.
433 *
434 * ICU APIs designed before ICU 2.4 usually define service-specific "done"
435 * values, mostly 0xffff.
436 * Those may need to be distinguished from
437 * actual U+ffff text contents by calling functions like
438 * CharacterIterator::hasNext() or UnicodeString::length().
439 *
440 * @return -1
441 * @see UChar32
442 * @stable ICU 2.4
443 */
444#define U_SENTINEL (-1)
445
446#include "unicode/urename.h"
447
448#endif
diff --git a/vendor/tree-sitter/lib/src/unicode/urename.h b/vendor/tree-sitter/lib/src/unicode/urename.h
new file mode 100644
index 0000000..ac79ad0
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/unicode/urename.h
@@ -0,0 +1 @@
// This file must exist in order for `utf8.h` and `utf16.h` to be used.
diff --git a/vendor/tree-sitter/lib/src/unicode/utf.h b/vendor/tree-sitter/lib/src/unicode/utf.h
new file mode 100644
index 0000000..ac79ad0
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/unicode/utf.h
@@ -0,0 +1 @@
// This file must exist in order for `utf8.h` and `utf16.h` to be used.
diff --git a/vendor/tree-sitter/lib/src/unicode/utf16.h b/vendor/tree-sitter/lib/src/unicode/utf16.h
new file mode 100644
index 0000000..9fd7d5c
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/unicode/utf16.h
@@ -0,0 +1,733 @@
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 1999-2012, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: utf16.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 1999sep09
16* created by: Markus W. Scherer
17*/
18
19/**
20 * \file
21 * \brief C API: 16-bit Unicode handling macros
22 *
23 * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
24 *
25 * For more information see utf.h and the ICU User Guide Strings chapter
26 * (http://userguide.icu-project.org/strings).
27 *
28 * <em>Usage:</em>
29 * ICU coding guidelines for if() statements should be followed when using these macros.
30 * Compound statements (curly braces {}) must be used for if-else-while...
31 * bodies and all macro statements should be terminated with semicolon.
32 */
33
34#ifndef __UTF16_H__
35#define __UTF16_H__
36
37#include "unicode/umachine.h"
38#ifndef __UTF_H__
39# include "unicode/utf.h"
40#endif
41
42/* single-code point definitions -------------------------------------------- */
43
44/**
45 * Does this code unit alone encode a code point (BMP, not a surrogate)?
46 * @param c 16-bit code unit
47 * @return TRUE or FALSE
48 * @stable ICU 2.4
49 */
50#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
51
52/**
53 * Is this code unit a lead surrogate (U+d800..U+dbff)?
54 * @param c 16-bit code unit
55 * @return TRUE or FALSE
56 * @stable ICU 2.4
57 */
58#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
59
60/**
61 * Is this code unit a trail surrogate (U+dc00..U+dfff)?
62 * @param c 16-bit code unit
63 * @return TRUE or FALSE
64 * @stable ICU 2.4
65 */
66#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
67
68/**
69 * Is this code unit a surrogate (U+d800..U+dfff)?
70 * @param c 16-bit code unit
71 * @return TRUE or FALSE
72 * @stable ICU 2.4
73 */
74#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
75
76/**
77 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
78 * is it a lead surrogate?
79 * @param c 16-bit code unit
80 * @return TRUE or FALSE
81 * @stable ICU 2.4
82 */
83#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
84
85/**
86 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
87 * is it a trail surrogate?
88 * @param c 16-bit code unit
89 * @return TRUE or FALSE
90 * @stable ICU 4.2
91 */
92#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
93
94/**
95 * Helper constant for U16_GET_SUPPLEMENTARY.
96 * @internal
97 */
98#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
99
100/**
101 * Get a supplementary code point value (U+10000..U+10ffff)
102 * from its lead and trail surrogates.
103 * The result is undefined if the input values are not
104 * lead and trail surrogates.
105 *
106 * @param lead lead surrogate (U+d800..U+dbff)
107 * @param trail trail surrogate (U+dc00..U+dfff)
108 * @return supplementary code point (U+10000..U+10ffff)
109 * @stable ICU 2.4
110 */
111#define U16_GET_SUPPLEMENTARY(lead, trail) \
112 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
113
114
115/**
116 * Get the lead surrogate (0xd800..0xdbff) for a
117 * supplementary code point (0x10000..0x10ffff).
118 * @param supplementary 32-bit code point (U+10000..U+10ffff)
119 * @return lead surrogate (U+d800..U+dbff) for supplementary
120 * @stable ICU 2.4
121 */
122#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
123
124/**
125 * Get the trail surrogate (0xdc00..0xdfff) for a
126 * supplementary code point (0x10000..0x10ffff).
127 * @param supplementary 32-bit code point (U+10000..U+10ffff)
128 * @return trail surrogate (U+dc00..U+dfff) for supplementary
129 * @stable ICU 2.4
130 */
131#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
132
133/**
134 * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
135 * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
136 * @param c 32-bit code point
137 * @return 1 or 2
138 * @stable ICU 2.4
139 */
140#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
141
142/**
143 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
144 * @return 2
145 * @stable ICU 2.4
146 */
147#define U16_MAX_LENGTH 2
148
149/**
150 * Get a code point from a string at a random-access offset,
151 * without changing the offset.
152 * "Unsafe" macro, assumes well-formed UTF-16.
153 *
154 * The offset may point to either the lead or trail surrogate unit
155 * for a supplementary code point, in which case the macro will read
156 * the adjacent matching surrogate as well.
157 * The result is undefined if the offset points to a single, unpaired surrogate.
158 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
159 *
160 * @param s const UChar * string
161 * @param i string offset
162 * @param c output UChar32 variable
163 * @see U16_GET
164 * @stable ICU 2.4
165 */
166#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
167 (c)=(s)[i]; \
168 if(U16_IS_SURROGATE(c)) { \
169 if(U16_IS_SURROGATE_LEAD(c)) { \
170 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
171 } else { \
172 (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
173 } \
174 } \
175} UPRV_BLOCK_MACRO_END
176
177/**
178 * Get a code point from a string at a random-access offset,
179 * without changing the offset.
180 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
181 *
182 * The offset may point to either the lead or trail surrogate unit
183 * for a supplementary code point, in which case the macro will read
184 * the adjacent matching surrogate as well.
185 *
186 * The length can be negative for a NUL-terminated string.
187 *
188 * If the offset points to a single, unpaired surrogate, then
189 * c is set to that unpaired surrogate.
190 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
191 *
192 * @param s const UChar * string
193 * @param start starting string offset (usually 0)
194 * @param i string offset, must be start<=i<length
195 * @param length string length
196 * @param c output UChar32 variable
197 * @see U16_GET_UNSAFE
198 * @stable ICU 2.4
199 */
200#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
201 (c)=(s)[i]; \
202 if(U16_IS_SURROGATE(c)) { \
203 uint16_t __c2; \
204 if(U16_IS_SURROGATE_LEAD(c)) { \
205 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
206 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
207 } \
208 } else { \
209 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
210 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
211 } \
212 } \
213 } \
214} UPRV_BLOCK_MACRO_END
215
216/**
217 * Get a code point from a string at a random-access offset,
218 * without changing the offset.
219 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
220 *
221 * The offset may point to either the lead or trail surrogate unit
222 * for a supplementary code point, in which case the macro will read
223 * the adjacent matching surrogate as well.
224 *
225 * The length can be negative for a NUL-terminated string.
226 *
227 * If the offset points to a single, unpaired surrogate, then
228 * c is set to U+FFFD.
229 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
230 *
231 * @param s const UChar * string
232 * @param start starting string offset (usually 0)
233 * @param i string offset, must be start<=i<length
234 * @param length string length
235 * @param c output UChar32 variable
236 * @see U16_GET_UNSAFE
237 * @stable ICU 60
238 */
239#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
240 (c)=(s)[i]; \
241 if(U16_IS_SURROGATE(c)) { \
242 uint16_t __c2; \
243 if(U16_IS_SURROGATE_LEAD(c)) { \
244 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
245 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
246 } else { \
247 (c)=0xfffd; \
248 } \
249 } else { \
250 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
251 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
252 } else { \
253 (c)=0xfffd; \
254 } \
255 } \
256 } \
257} UPRV_BLOCK_MACRO_END
258
259/* definitions with forward iteration --------------------------------------- */
260
261/**
262 * Get a code point from a string at a code point boundary offset,
263 * and advance the offset to the next code point boundary.
264 * (Post-incrementing forward iteration.)
265 * "Unsafe" macro, assumes well-formed UTF-16.
266 *
267 * The offset may point to the lead surrogate unit
268 * for a supplementary code point, in which case the macro will read
269 * the following trail surrogate as well.
270 * If the offset points to a trail surrogate, then that itself
271 * will be returned as the code point.
272 * The result is undefined if the offset points to a single, unpaired lead surrogate.
273 *
274 * @param s const UChar * string
275 * @param i string offset
276 * @param c output UChar32 variable
277 * @see U16_NEXT
278 * @stable ICU 2.4
279 */
280#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
281 (c)=(s)[(i)++]; \
282 if(U16_IS_LEAD(c)) { \
283 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
284 } \
285} UPRV_BLOCK_MACRO_END
286
287/**
288 * Get a code point from a string at a code point boundary offset,
289 * and advance the offset to the next code point boundary.
290 * (Post-incrementing forward iteration.)
291 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
292 *
293 * The length can be negative for a NUL-terminated string.
294 *
295 * The offset may point to the lead surrogate unit
296 * for a supplementary code point, in which case the macro will read
297 * the following trail surrogate as well.
298 * If the offset points to a trail surrogate or
299 * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
300 *
301 * @param s const UChar * string
302 * @param i string offset, must be i<length
303 * @param length string length
304 * @param c output UChar32 variable
305 * @see U16_NEXT_UNSAFE
306 * @stable ICU 2.4
307 */
308#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
309 (c)=(s)[(i)++]; \
310 if(U16_IS_LEAD(c)) { \
311 uint16_t __c2; \
312 if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
313 ++(i); \
314 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
315 } \
316 } \
317} UPRV_BLOCK_MACRO_END
318
319/**
320 * Get a code point from a string at a code point boundary offset,
321 * and advance the offset to the next code point boundary.
322 * (Post-incrementing forward iteration.)
323 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
324 *
325 * The length can be negative for a NUL-terminated string.
326 *
327 * The offset may point to the lead surrogate unit
328 * for a supplementary code point, in which case the macro will read
329 * the following trail surrogate as well.
330 * If the offset points to a trail surrogate or
331 * to a single, unpaired lead surrogate, then c is set to U+FFFD.
332 *
333 * @param s const UChar * string
334 * @param i string offset, must be i<length
335 * @param length string length
336 * @param c output UChar32 variable
337 * @see U16_NEXT_UNSAFE
338 * @stable ICU 60
339 */
340#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
341 (c)=(s)[(i)++]; \
342 if(U16_IS_SURROGATE(c)) { \
343 uint16_t __c2; \
344 if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
345 ++(i); \
346 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
347 } else { \
348 (c)=0xfffd; \
349 } \
350 } \
351} UPRV_BLOCK_MACRO_END
352
353/**
354 * Append a code point to a string, overwriting 1 or 2 code units.
355 * The offset points to the current end of the string contents
356 * and is advanced (post-increment).
357 * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
358 * Otherwise, the result is undefined.
359 *
360 * @param s const UChar * string buffer
361 * @param i string offset
362 * @param c code point to append
363 * @see U16_APPEND
364 * @stable ICU 2.4
365 */
366#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
367 if((uint32_t)(c)<=0xffff) { \
368 (s)[(i)++]=(uint16_t)(c); \
369 } else { \
370 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
371 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
372 } \
373} UPRV_BLOCK_MACRO_END
374
375/**
376 * Append a code point to a string, overwriting 1 or 2 code units.
377 * The offset points to the current end of the string contents
378 * and is advanced (post-increment).
379 * "Safe" macro, checks for a valid code point.
380 * If a surrogate pair is written, checks for sufficient space in the string.
381 * If the code point is not valid or a trail surrogate does not fit,
382 * then isError is set to TRUE.
383 *
384 * @param s const UChar * string buffer
385 * @param i string offset, must be i<capacity
386 * @param capacity size of the string buffer
387 * @param c code point to append
388 * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
389 * @see U16_APPEND_UNSAFE
390 * @stable ICU 2.4
391 */
392#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
393 if((uint32_t)(c)<=0xffff) { \
394 (s)[(i)++]=(uint16_t)(c); \
395 } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
396 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
397 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
398 } else /* c>0x10ffff or not enough space */ { \
399 (isError)=TRUE; \
400 } \
401} UPRV_BLOCK_MACRO_END
402
403/**
404 * Advance the string offset from one code point boundary to the next.
405 * (Post-incrementing iteration.)
406 * "Unsafe" macro, assumes well-formed UTF-16.
407 *
408 * @param s const UChar * string
409 * @param i string offset
410 * @see U16_FWD_1
411 * @stable ICU 2.4
412 */
413#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
414 if(U16_IS_LEAD((s)[(i)++])) { \
415 ++(i); \
416 } \
417} UPRV_BLOCK_MACRO_END
418
419/**
420 * Advance the string offset from one code point boundary to the next.
421 * (Post-incrementing iteration.)
422 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
423 *
424 * The length can be negative for a NUL-terminated string.
425 *
426 * @param s const UChar * string
427 * @param i string offset, must be i<length
428 * @param length string length
429 * @see U16_FWD_1_UNSAFE
430 * @stable ICU 2.4
431 */
432#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
433 if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
434 ++(i); \
435 } \
436} UPRV_BLOCK_MACRO_END
437
438/**
439 * Advance the string offset from one code point boundary to the n-th next one,
440 * i.e., move forward by n code points.
441 * (Post-incrementing iteration.)
442 * "Unsafe" macro, assumes well-formed UTF-16.
443 *
444 * @param s const UChar * string
445 * @param i string offset
446 * @param n number of code points to skip
447 * @see U16_FWD_N
448 * @stable ICU 2.4
449 */
450#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
451 int32_t __N=(n); \
452 while(__N>0) { \
453 U16_FWD_1_UNSAFE(s, i); \
454 --__N; \
455 } \
456} UPRV_BLOCK_MACRO_END
457
458/**
459 * Advance the string offset from one code point boundary to the n-th next one,
460 * i.e., move forward by n code points.
461 * (Post-incrementing iteration.)
462 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
463 *
464 * The length can be negative for a NUL-terminated string.
465 *
466 * @param s const UChar * string
467 * @param i int32_t string offset, must be i<length
468 * @param length int32_t string length
469 * @param n number of code points to skip
470 * @see U16_FWD_N_UNSAFE
471 * @stable ICU 2.4
472 */
473#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
474 int32_t __N=(n); \
475 while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
476 U16_FWD_1(s, i, length); \
477 --__N; \
478 } \
479} UPRV_BLOCK_MACRO_END
480
481/**
482 * Adjust a random-access offset to a code point boundary
483 * at the start of a code point.
484 * If the offset points to the trail surrogate of a surrogate pair,
485 * then the offset is decremented.
486 * Otherwise, it is not modified.
487 * "Unsafe" macro, assumes well-formed UTF-16.
488 *
489 * @param s const UChar * string
490 * @param i string offset
491 * @see U16_SET_CP_START
492 * @stable ICU 2.4
493 */
494#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
495 if(U16_IS_TRAIL((s)[i])) { \
496 --(i); \
497 } \
498} UPRV_BLOCK_MACRO_END
499
500/**
501 * Adjust a random-access offset to a code point boundary
502 * at the start of a code point.
503 * If the offset points to the trail surrogate of a surrogate pair,
504 * then the offset is decremented.
505 * Otherwise, it is not modified.
506 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
507 *
508 * @param s const UChar * string
509 * @param start starting string offset (usually 0)
510 * @param i string offset, must be start<=i
511 * @see U16_SET_CP_START_UNSAFE
512 * @stable ICU 2.4
513 */
514#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
515 if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
516 --(i); \
517 } \
518} UPRV_BLOCK_MACRO_END
519
520/* definitions with backward iteration -------------------------------------- */
521
522/**
523 * Move the string offset from one code point boundary to the previous one
524 * and get the code point between them.
525 * (Pre-decrementing backward iteration.)
526 * "Unsafe" macro, assumes well-formed UTF-16.
527 *
528 * The input offset may be the same as the string length.
529 * If the offset is behind a trail surrogate unit
530 * for a supplementary code point, then the macro will read
531 * the preceding lead surrogate as well.
532 * If the offset is behind a lead surrogate, then that itself
533 * will be returned as the code point.
534 * The result is undefined if the offset is behind a single, unpaired trail surrogate.
535 *
536 * @param s const UChar * string
537 * @param i string offset
538 * @param c output UChar32 variable
539 * @see U16_PREV
540 * @stable ICU 2.4
541 */
542#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
543 (c)=(s)[--(i)]; \
544 if(U16_IS_TRAIL(c)) { \
545 (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
546 } \
547} UPRV_BLOCK_MACRO_END
548
549/**
550 * Move the string offset from one code point boundary to the previous one
551 * and get the code point between them.
552 * (Pre-decrementing backward iteration.)
553 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
554 *
555 * The input offset may be the same as the string length.
556 * If the offset is behind a trail surrogate unit
557 * for a supplementary code point, then the macro will read
558 * the preceding lead surrogate as well.
559 * If the offset is behind a lead surrogate or behind a single, unpaired
560 * trail surrogate, then c is set to that unpaired surrogate.
561 *
562 * @param s const UChar * string
563 * @param start starting string offset (usually 0)
564 * @param i string offset, must be start<i
565 * @param c output UChar32 variable
566 * @see U16_PREV_UNSAFE
567 * @stable ICU 2.4
568 */
569#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
570 (c)=(s)[--(i)]; \
571 if(U16_IS_TRAIL(c)) { \
572 uint16_t __c2; \
573 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
574 --(i); \
575 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
576 } \
577 } \
578} UPRV_BLOCK_MACRO_END
579
580/**
581 * Move the string offset from one code point boundary to the previous one
582 * and get the code point between them.
583 * (Pre-decrementing backward iteration.)
584 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
585 *
586 * The input offset may be the same as the string length.
587 * If the offset is behind a trail surrogate unit
588 * for a supplementary code point, then the macro will read
589 * the preceding lead surrogate as well.
590 * If the offset is behind a lead surrogate or behind a single, unpaired
591 * trail surrogate, then c is set to U+FFFD.
592 *
593 * @param s const UChar * string
594 * @param start starting string offset (usually 0)
595 * @param i string offset, must be start<i
596 * @param c output UChar32 variable
597 * @see U16_PREV_UNSAFE
598 * @stable ICU 60
599 */
600#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
601 (c)=(s)[--(i)]; \
602 if(U16_IS_SURROGATE(c)) { \
603 uint16_t __c2; \
604 if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
605 --(i); \
606 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
607 } else { \
608 (c)=0xfffd; \
609 } \
610 } \
611} UPRV_BLOCK_MACRO_END
612
613/**
614 * Move the string offset from one code point boundary to the previous one.
615 * (Pre-decrementing backward iteration.)
616 * The input offset may be the same as the string length.
617 * "Unsafe" macro, assumes well-formed UTF-16.
618 *
619 * @param s const UChar * string
620 * @param i string offset
621 * @see U16_BACK_1
622 * @stable ICU 2.4
623 */
624#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
625 if(U16_IS_TRAIL((s)[--(i)])) { \
626 --(i); \
627 } \
628} UPRV_BLOCK_MACRO_END
629
630/**
631 * Move the string offset from one code point boundary to the previous one.
632 * (Pre-decrementing backward iteration.)
633 * The input offset may be the same as the string length.
634 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
635 *
636 * @param s const UChar * string
637 * @param start starting string offset (usually 0)
638 * @param i string offset, must be start<i
639 * @see U16_BACK_1_UNSAFE
640 * @stable ICU 2.4
641 */
642#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
643 if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
644 --(i); \
645 } \
646} UPRV_BLOCK_MACRO_END
647
648/**
649 * Move the string offset from one code point boundary to the n-th one before it,
650 * i.e., move backward by n code points.
651 * (Pre-decrementing backward iteration.)
652 * The input offset may be the same as the string length.
653 * "Unsafe" macro, assumes well-formed UTF-16.
654 *
655 * @param s const UChar * string
656 * @param i string offset
657 * @param n number of code points to skip
658 * @see U16_BACK_N
659 * @stable ICU 2.4
660 */
661#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
662 int32_t __N=(n); \
663 while(__N>0) { \
664 U16_BACK_1_UNSAFE(s, i); \
665 --__N; \
666 } \
667} UPRV_BLOCK_MACRO_END
668
669/**
670 * Move the string offset from one code point boundary to the n-th one before it,
671 * i.e., move backward by n code points.
672 * (Pre-decrementing backward iteration.)
673 * The input offset may be the same as the string length.
674 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
675 *
676 * @param s const UChar * string
677 * @param start start of string
678 * @param i string offset, must be start<i
679 * @param n number of code points to skip
680 * @see U16_BACK_N_UNSAFE
681 * @stable ICU 2.4
682 */
683#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
684 int32_t __N=(n); \
685 while(__N>0 && (i)>(start)) { \
686 U16_BACK_1(s, start, i); \
687 --__N; \
688 } \
689} UPRV_BLOCK_MACRO_END
690
691/**
692 * Adjust a random-access offset to a code point boundary after a code point.
693 * If the offset is behind the lead surrogate of a surrogate pair,
694 * then the offset is incremented.
695 * Otherwise, it is not modified.
696 * The input offset may be the same as the string length.
697 * "Unsafe" macro, assumes well-formed UTF-16.
698 *
699 * @param s const UChar * string
700 * @param i string offset
701 * @see U16_SET_CP_LIMIT
702 * @stable ICU 2.4
703 */
704#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
705 if(U16_IS_LEAD((s)[(i)-1])) { \
706 ++(i); \
707 } \
708} UPRV_BLOCK_MACRO_END
709
710/**
711 * Adjust a random-access offset to a code point boundary after a code point.
712 * If the offset is behind the lead surrogate of a surrogate pair,
713 * then the offset is incremented.
714 * Otherwise, it is not modified.
715 * The input offset may be the same as the string length.
716 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
717 *
718 * The length can be negative for a NUL-terminated string.
719 *
720 * @param s const UChar * string
721 * @param start int32_t starting string offset (usually 0)
722 * @param i int32_t string offset, start<=i<=length
723 * @param length int32_t string length
724 * @see U16_SET_CP_LIMIT_UNSAFE
725 * @stable ICU 2.4
726 */
727#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
728 if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
729 ++(i); \
730 } \
731} UPRV_BLOCK_MACRO_END
732
733#endif
diff --git a/vendor/tree-sitter/lib/src/unicode/utf8.h b/vendor/tree-sitter/lib/src/unicode/utf8.h
new file mode 100644
index 0000000..bb00130
--- /dev/null
+++ b/vendor/tree-sitter/lib/src/unicode/utf8.h
@@ -0,0 +1,881 @@
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 1999-2015, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: utf8.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 1999sep13
16* created by: Markus W. Scherer
17*/
18
19/**
20 * \file
21 * \brief C API: 8-bit Unicode handling macros
22 *
23 * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
24 *
25 * For more information see utf.h and the ICU User Guide Strings chapter
26 * (http://userguide.icu-project.org/strings).
27 *
28 * <em>Usage:</em>
29 * ICU coding guidelines for if() statements should be followed when using these macros.
30 * Compound statements (curly braces {}) must be used for if-else-while...
31 * bodies and all macro statements should be terminated with semicolon.
32 */
33
34#ifndef __UTF8_H__
35#define __UTF8_H__
36
37#include "unicode/umachine.h"
38#ifndef __UTF_H__
39# include "unicode/utf.h"
40#endif
41
42/* internal definitions ----------------------------------------------------- */
43
44/**
45 * Counts the trail bytes for a UTF-8 lead byte.
46 * Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
47 * leadByte might be evaluated multiple times.
48 *
49 * This is internal since it is not meant to be called directly by external clients;
50 * however it is called by public macros in this file and thus must remain stable.
51 *
52 * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
53 * @internal
54 */
55#define U8_COUNT_TRAIL_BYTES(leadByte) \
56 (U8_IS_LEAD(leadByte) ? \
57 ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
58
59/**
60 * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
61 * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
62 * leadByte might be evaluated multiple times.
63 *
64 * This is internal since it is not meant to be called directly by external clients;
65 * however it is called by public macros in this file and thus must remain stable.
66 *
67 * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
68 * @internal
69 */
70#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
71 (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
72
73/**
74 * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
75 *
76 * This is internal since it is not meant to be called directly by external clients;
77 * however it is called by public macros in this file and thus must remain stable.
78 * @internal
79 */
80#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
81
82/**
83 * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
84 * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
85 * Lead byte E0..EF bits 3..0 are used as byte index,
86 * first trail byte bits 7..5 are used as bit index into that byte.
87 * @see U8_IS_VALID_LEAD3_AND_T1
88 * @internal
89 */
90#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
91
92/**
93 * Internal 3-byte UTF-8 validity check.
94 * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
95 * @internal
96 */
97#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
98
99/**
100 * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
101 * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
102 * First trail byte bits 7..4 are used as byte index,
103 * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
104 * @see U8_IS_VALID_LEAD4_AND_T1
105 * @internal
106 */
107#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
108
109/**
110 * Internal 4-byte UTF-8 validity check.
111 * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
112 * @internal
113 */
114#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
115
116/**
117 * Function for handling "next code point" with error-checking.
118 *
119 * This is internal since it is not meant to be called directly by external clients;
120 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
121 * file and thus must remain stable, and should not be hidden when other internal
122 * functions are hidden (otherwise public macros would fail to compile).
123 * @internal
124 */
125U_STABLE UChar32 U_EXPORT2
126utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
127
128/**
129 * Function for handling "append code point" with error-checking.
130 *
131 * This is internal since it is not meant to be called directly by external clients;
132 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
133 * file and thus must remain stable, and should not be hidden when other internal
134 * functions are hidden (otherwise public macros would fail to compile).
135 * @internal
136 */
137U_STABLE int32_t U_EXPORT2
138utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
139
140/**
141 * Function for handling "previous code point" with error-checking.
142 *
143 * This is internal since it is not meant to be called directly by external clients;
144 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
145 * file and thus must remain stable, and should not be hidden when other internal
146 * functions are hidden (otherwise public macros would fail to compile).
147 * @internal
148 */
149U_STABLE UChar32 U_EXPORT2
150utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
151
152/**
153 * Function for handling "skip backward one code point" with error-checking.
154 *
155 * This is internal since it is not meant to be called directly by external clients;
156 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
157 * file and thus must remain stable, and should not be hidden when other internal
158 * functions are hidden (otherwise public macros would fail to compile).
159 * @internal
160 */
161U_STABLE int32_t U_EXPORT2
162utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
163
164/* single-code point definitions -------------------------------------------- */
165
166/**
167 * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
168 * @param c 8-bit code unit (byte)
169 * @return TRUE or FALSE
170 * @stable ICU 2.4
171 */
172#define U8_IS_SINGLE(c) (((c)&0x80)==0)
173
174/**
175 * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
176 * @param c 8-bit code unit (byte)
177 * @return TRUE or FALSE
178 * @stable ICU 2.4
179 */
180#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
181// 0x32=0xf4-0xc2
182
183/**
184 * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
185 * @param c 8-bit code unit (byte)
186 * @return TRUE or FALSE
187 * @stable ICU 2.4
188 */
189#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
190
191/**
192 * How many code units (bytes) are used for the UTF-8 encoding
193 * of this Unicode code point?
194 * @param c 32-bit code point
195 * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
196 * @stable ICU 2.4
197 */
198#define U8_LENGTH(c) \
199 ((uint32_t)(c)<=0x7f ? 1 : \
200 ((uint32_t)(c)<=0x7ff ? 2 : \
201 ((uint32_t)(c)<=0xd7ff ? 3 : \
202 ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
203 ((uint32_t)(c)<=0xffff ? 3 : 4)\
204 ) \
205 ) \
206 ) \
207 )
208
209/**
210 * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
211 * @return 4
212 * @stable ICU 2.4
213 */
214#define U8_MAX_LENGTH 4
215
216/**
217 * Get a code point from a string at a random-access offset,
218 * without changing the offset.
219 * The offset may point to either the lead byte or one of the trail bytes
220 * for a code point, in which case the macro will read all of the bytes
221 * for the code point.
222 * The result is undefined if the offset points to an illegal UTF-8
223 * byte sequence.
224 * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
225 *
226 * @param s const uint8_t * string
227 * @param i string offset
228 * @param c output UChar32 variable
229 * @see U8_GET
230 * @stable ICU 2.4
231 */
232#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
233 int32_t _u8_get_unsafe_index=(int32_t)(i); \
234 U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
235 U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
236} UPRV_BLOCK_MACRO_END
237
238/**
239 * Get a code point from a string at a random-access offset,
240 * without changing the offset.
241 * The offset may point to either the lead byte or one of the trail bytes
242 * for a code point, in which case the macro will read all of the bytes
243 * for the code point.
244 *
245 * The length can be negative for a NUL-terminated string.
246 *
247 * If the offset points to an illegal UTF-8 byte sequence, then
248 * c is set to a negative value.
249 * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
250 *
251 * @param s const uint8_t * string
252 * @param start int32_t starting string offset
253 * @param i int32_t string offset, must be start<=i<length
254 * @param length int32_t string length
255 * @param c output UChar32 variable, set to <0 in case of an error
256 * @see U8_GET_UNSAFE
257 * @stable ICU 2.4
258 */
259#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
260 int32_t _u8_get_index=(i); \
261 U8_SET_CP_START(s, start, _u8_get_index); \
262 U8_NEXT(s, _u8_get_index, length, c); \
263} UPRV_BLOCK_MACRO_END
264
265/**
266 * Get a code point from a string at a random-access offset,
267 * without changing the offset.
268 * The offset may point to either the lead byte or one of the trail bytes
269 * for a code point, in which case the macro will read all of the bytes
270 * for the code point.
271 *
272 * The length can be negative for a NUL-terminated string.
273 *
274 * If the offset points to an illegal UTF-8 byte sequence, then
275 * c is set to U+FFFD.
276 * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
277 *
278 * This macro does not distinguish between a real U+FFFD in the text
279 * and U+FFFD returned for an ill-formed sequence.
280 * Use U8_GET() if that distinction is important.
281 *
282 * @param s const uint8_t * string
283 * @param start int32_t starting string offset
284 * @param i int32_t string offset, must be start<=i<length
285 * @param length int32_t string length
286 * @param c output UChar32 variable, set to U+FFFD in case of an error
287 * @see U8_GET
288 * @stable ICU 51
289 */
290#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
291 int32_t _u8_get_index=(i); \
292 U8_SET_CP_START(s, start, _u8_get_index); \
293 U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
294} UPRV_BLOCK_MACRO_END
295
296/* definitions with forward iteration --------------------------------------- */
297
298/**
299 * Get a code point from a string at a code point boundary offset,
300 * and advance the offset to the next code point boundary.
301 * (Post-incrementing forward iteration.)
302 * "Unsafe" macro, assumes well-formed UTF-8.
303 *
304 * The offset may point to the lead byte of a multi-byte sequence,
305 * in which case the macro will read the whole sequence.
306 * The result is undefined if the offset points to a trail byte
307 * or an illegal UTF-8 sequence.
308 *
309 * @param s const uint8_t * string
310 * @param i string offset
311 * @param c output UChar32 variable
312 * @see U8_NEXT
313 * @stable ICU 2.4
314 */
315#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
316 (c)=(uint8_t)(s)[(i)++]; \
317 if(!U8_IS_SINGLE(c)) { \
318 if((c)<0xe0) { \
319 (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
320 } else if((c)<0xf0) { \
321 /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
322 (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
323 (i)+=2; \
324 } else { \
325 (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
326 (i)+=3; \
327 } \
328 } \
329} UPRV_BLOCK_MACRO_END
330
331/**
332 * Get a code point from a string at a code point boundary offset,
333 * and advance the offset to the next code point boundary.
334 * (Post-incrementing forward iteration.)
335 * "Safe" macro, checks for illegal sequences and for string boundaries.
336 *
337 * The length can be negative for a NUL-terminated string.
338 *
339 * The offset may point to the lead byte of a multi-byte sequence,
340 * in which case the macro will read the whole sequence.
341 * If the offset points to a trail byte or an illegal UTF-8 sequence, then
342 * c is set to a negative value.
343 *
344 * @param s const uint8_t * string
345 * @param i int32_t string offset, must be i<length
346 * @param length int32_t string length
347 * @param c output UChar32 variable, set to <0 in case of an error
348 * @see U8_NEXT_UNSAFE
349 * @stable ICU 2.4
350 */
351#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
352
353/**
354 * Get a code point from a string at a code point boundary offset,
355 * and advance the offset to the next code point boundary.
356 * (Post-incrementing forward iteration.)
357 * "Safe" macro, checks for illegal sequences and for string boundaries.
358 *
359 * The length can be negative for a NUL-terminated string.
360 *
361 * The offset may point to the lead byte of a multi-byte sequence,
362 * in which case the macro will read the whole sequence.
363 * If the offset points to a trail byte or an illegal UTF-8 sequence, then
364 * c is set to U+FFFD.
365 *
366 * This macro does not distinguish between a real U+FFFD in the text
367 * and U+FFFD returned for an ill-formed sequence.
368 * Use U8_NEXT() if that distinction is important.
369 *
370 * @param s const uint8_t * string
371 * @param i int32_t string offset, must be i<length
372 * @param length int32_t string length
373 * @param c output UChar32 variable, set to U+FFFD in case of an error
374 * @see U8_NEXT
375 * @stable ICU 51
376 */
377#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
378
379/** @internal */
380#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
381 (c)=(uint8_t)(s)[(i)++]; \
382 if(!U8_IS_SINGLE(c)) { \
383 uint8_t __t = 0; \
384 if((i)!=(length) && \
385 /* fetch/validate/assemble all but last trail byte */ \
386 ((c)>=0xe0 ? \
387 ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
388 U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
389 (__t&=0x3f, 1) \
390 : /* U+10000..U+10FFFF */ \
391 ((c)-=0xf0)<=4 && \
392 U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
393 ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
394 (__t=(s)[i]-0x80)<=0x3f) && \
395 /* valid second-to-last trail byte */ \
396 ((c)=((c)<<6)|__t, ++(i)!=(length)) \
397 : /* U+0080..U+07FF */ \
398 (c)>=0xc2 && ((c)&=0x1f, 1)) && \
399 /* last trail byte */ \
400 (__t=(s)[i]-0x80)<=0x3f && \
401 ((c)=((c)<<6)|__t, ++(i), 1)) { \
402 } else { \
403 (c)=(sub); /* ill-formed*/ \
404 } \
405 } \
406} UPRV_BLOCK_MACRO_END
407
408/**
409 * Append a code point to a string, overwriting 1 to 4 bytes.
410 * The offset points to the current end of the string contents
411 * and is advanced (post-increment).
412 * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
413 * Otherwise, the result is undefined.
414 *
415 * @param s const uint8_t * string buffer
416 * @param i string offset
417 * @param c code point to append
418 * @see U8_APPEND
419 * @stable ICU 2.4
420 */
421#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
422 uint32_t __uc=(c); \
423 if(__uc<=0x7f) { \
424 (s)[(i)++]=(uint8_t)__uc; \
425 } else { \
426 if(__uc<=0x7ff) { \
427 (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
428 } else { \
429 if(__uc<=0xffff) { \
430 (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
431 } else { \
432 (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
433 (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
434 } \
435 (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
436 } \
437 (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
438 } \
439} UPRV_BLOCK_MACRO_END
440
441/**
442 * Append a code point to a string, overwriting 1 to 4 bytes.
443 * The offset points to the current end of the string contents
444 * and is advanced (post-increment).
445 * "Safe" macro, checks for a valid code point.
446 * If a non-ASCII code point is written, checks for sufficient space in the string.
447 * If the code point is not valid or trail bytes do not fit,
448 * then isError is set to TRUE.
449 *
450 * @param s const uint8_t * string buffer
451 * @param i int32_t string offset, must be i<capacity
452 * @param capacity int32_t size of the string buffer
453 * @param c UChar32 code point to append
454 * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
455 * @see U8_APPEND_UNSAFE
456 * @stable ICU 2.4
457 */
458#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
459 uint32_t __uc=(c); \
460 if(__uc<=0x7f) { \
461 (s)[(i)++]=(uint8_t)__uc; \
462 } else if(__uc<=0x7ff && (i)+1<(capacity)) { \
463 (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
464 (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
465 } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
466 (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
467 (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
468 (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
469 } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
470 (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
471 (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
472 (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
473 (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
474 } else { \
475 (isError)=TRUE; \
476 } \
477} UPRV_BLOCK_MACRO_END
478
479/**
480 * Advance the string offset from one code point boundary to the next.
481 * (Post-incrementing iteration.)
482 * "Unsafe" macro, assumes well-formed UTF-8.
483 *
484 * @param s const uint8_t * string
485 * @param i string offset
486 * @see U8_FWD_1
487 * @stable ICU 2.4
488 */
489#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
490 (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
491} UPRV_BLOCK_MACRO_END
492
493/**
494 * Advance the string offset from one code point boundary to the next.
495 * (Post-incrementing iteration.)
496 * "Safe" macro, checks for illegal sequences and for string boundaries.
497 *
498 * The length can be negative for a NUL-terminated string.
499 *
500 * @param s const uint8_t * string
501 * @param i int32_t string offset, must be i<length
502 * @param length int32_t string length
503 * @see U8_FWD_1_UNSAFE
504 * @stable ICU 2.4
505 */
506#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
507 uint8_t __b=(s)[(i)++]; \
508 if(U8_IS_LEAD(__b) && (i)!=(length)) { \
509 uint8_t __t1=(s)[i]; \
510 if((0xe0<=__b && __b<0xf0)) { \
511 if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
512 ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
513 ++(i); \
514 } \
515 } else if(__b<0xe0) { \
516 if(U8_IS_TRAIL(__t1)) { \
517 ++(i); \
518 } \
519 } else /* c>=0xf0 */ { \
520 if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
521 ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
522 ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
523 ++(i); \
524 } \
525 } \
526 } \
527} UPRV_BLOCK_MACRO_END
528
529/**
530 * Advance the string offset from one code point boundary to the n-th next one,
531 * i.e., move forward by n code points.
532 * (Post-incrementing iteration.)
533 * "Unsafe" macro, assumes well-formed UTF-8.
534 *
535 * @param s const uint8_t * string
536 * @param i string offset
537 * @param n number of code points to skip
538 * @see U8_FWD_N
539 * @stable ICU 2.4
540 */
541#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
542 int32_t __N=(n); \
543 while(__N>0) { \
544 U8_FWD_1_UNSAFE(s, i); \
545 --__N; \
546 } \
547} UPRV_BLOCK_MACRO_END
548
549/**
550 * Advance the string offset from one code point boundary to the n-th next one,
551 * i.e., move forward by n code points.
552 * (Post-incrementing iteration.)
553 * "Safe" macro, checks for illegal sequences and for string boundaries.
554 *
555 * The length can be negative for a NUL-terminated string.
556 *
557 * @param s const uint8_t * string
558 * @param i int32_t string offset, must be i<length
559 * @param length int32_t string length
560 * @param n number of code points to skip
561 * @see U8_FWD_N_UNSAFE
562 * @stable ICU 2.4
563 */
564#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
565 int32_t __N=(n); \
566 while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
567 U8_FWD_1(s, i, length); \
568 --__N; \
569 } \
570} UPRV_BLOCK_MACRO_END
571
572/**
573 * Adjust a random-access offset to a code point boundary
574 * at the start of a code point.
575 * If the offset points to a UTF-8 trail byte,
576 * then the offset is moved backward to the corresponding lead byte.
577 * Otherwise, it is not modified.
578 * "Unsafe" macro, assumes well-formed UTF-8.
579 *
580 * @param s const uint8_t * string
581 * @param i string offset
582 * @see U8_SET_CP_START
583 * @stable ICU 2.4
584 */
585#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
586 while(U8_IS_TRAIL((s)[i])) { --(i); } \
587} UPRV_BLOCK_MACRO_END
588
589/**
590 * Adjust a random-access offset to a code point boundary
591 * at the start of a code point.
592 * If the offset points to a UTF-8 trail byte,
593 * then the offset is moved backward to the corresponding lead byte.
594 * Otherwise, it is not modified.
595 *
596 * "Safe" macro, checks for illegal sequences and for string boundaries.
597 * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
598 *
599 * @param s const uint8_t * string
600 * @param start int32_t starting string offset (usually 0)
601 * @param i int32_t string offset, must be start<=i
602 * @see U8_SET_CP_START_UNSAFE
603 * @see U8_TRUNCATE_IF_INCOMPLETE
604 * @stable ICU 2.4
605 */
606#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
607 if(U8_IS_TRAIL((s)[(i)])) { \
608 (i)=utf8_back1SafeBody(s, start, (i)); \
609 } \
610} UPRV_BLOCK_MACRO_END
611
612/**
613 * If the string ends with a UTF-8 byte sequence that is valid so far
614 * but incomplete, then reduce the length of the string to end before
615 * the lead byte of that incomplete sequence.
616 * For example, if the string ends with E1 80, the length is reduced by 2.
617 *
618 * In all other cases (the string ends with a complete sequence, or it is not
619 * possible for any further trail byte to extend the trailing sequence)
620 * the length remains unchanged.
621 *
622 * Useful for processing text split across multiple buffers
623 * (save the incomplete sequence for later)
624 * and for optimizing iteration
625 * (check for string length only once per character).
626 *
627 * "Safe" macro, checks for illegal sequences and for string boundaries.
628 * Unlike U8_SET_CP_START(), this macro never reads s[length].
629 *
630 * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
631 *
632 * @param s const uint8_t * string
633 * @param start int32_t starting string offset (usually 0)
634 * @param length int32_t string length (usually start<=length)
635 * @see U8_SET_CP_START
636 * @stable ICU 61
637 */
638#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
639 if((length)>(start)) { \
640 uint8_t __b1=s[(length)-1]; \
641 if(U8_IS_SINGLE(__b1)) { \
642 /* common ASCII character */ \
643 } else if(U8_IS_LEAD(__b1)) { \
644 --(length); \
645 } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
646 uint8_t __b2=s[(length)-2]; \
647 if(0xe0<=__b2 && __b2<=0xf4) { \
648 if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
649 U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
650 (length)-=2; \
651 } \
652 } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
653 uint8_t __b3=s[(length)-3]; \
654 if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
655 (length)-=3; \
656 } \
657 } \
658 } \
659 } \
660} UPRV_BLOCK_MACRO_END
661
662/* definitions with backward iteration -------------------------------------- */
663
664/**
665 * Move the string offset from one code point boundary to the previous one
666 * and get the code point between them.
667 * (Pre-decrementing backward iteration.)
668 * "Unsafe" macro, assumes well-formed UTF-8.
669 *
670 * The input offset may be the same as the string length.
671 * If the offset is behind a multi-byte sequence, then the macro will read
672 * the whole sequence.
673 * If the offset is behind a lead byte, then that itself
674 * will be returned as the code point.
675 * The result is undefined if the offset is behind an illegal UTF-8 sequence.
676 *
677 * @param s const uint8_t * string
678 * @param i string offset
679 * @param c output UChar32 variable
680 * @see U8_PREV
681 * @stable ICU 2.4
682 */
683#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
684 (c)=(uint8_t)(s)[--(i)]; \
685 if(U8_IS_TRAIL(c)) { \
686 uint8_t __b, __count=1, __shift=6; \
687\
688 /* c is a trail byte */ \
689 (c)&=0x3f; \
690 for(;;) { \
691 __b=(s)[--(i)]; \
692 if(__b>=0xc0) { \
693 U8_MASK_LEAD_BYTE(__b, __count); \
694 (c)|=(UChar32)__b<<__shift; \
695 break; \
696 } else { \
697 (c)|=(UChar32)(__b&0x3f)<<__shift; \
698 ++__count; \
699 __shift+=6; \
700 } \
701 } \
702 } \
703} UPRV_BLOCK_MACRO_END
704
705/**
706 * Move the string offset from one code point boundary to the previous one
707 * and get the code point between them.
708 * (Pre-decrementing backward iteration.)
709 * "Safe" macro, checks for illegal sequences and for string boundaries.
710 *
711 * The input offset may be the same as the string length.
712 * If the offset is behind a multi-byte sequence, then the macro will read
713 * the whole sequence.
714 * If the offset is behind a lead byte, then that itself
715 * will be returned as the code point.
716 * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
717 *
718 * @param s const uint8_t * string
719 * @param start int32_t starting string offset (usually 0)
720 * @param i int32_t string offset, must be start<i
721 * @param c output UChar32 variable, set to <0 in case of an error
722 * @see U8_PREV_UNSAFE
723 * @stable ICU 2.4
724 */
725#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
726 (c)=(uint8_t)(s)[--(i)]; \
727 if(!U8_IS_SINGLE(c)) { \
728 (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
729 } \
730} UPRV_BLOCK_MACRO_END
731
732/**
733 * Move the string offset from one code point boundary to the previous one
734 * and get the code point between them.
735 * (Pre-decrementing backward iteration.)
736 * "Safe" macro, checks for illegal sequences and for string boundaries.
737 *
738 * The input offset may be the same as the string length.
739 * If the offset is behind a multi-byte sequence, then the macro will read
740 * the whole sequence.
741 * If the offset is behind a lead byte, then that itself
742 * will be returned as the code point.
743 * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
744 *
745 * This macro does not distinguish between a real U+FFFD in the text
746 * and U+FFFD returned for an ill-formed sequence.
747 * Use U8_PREV() if that distinction is important.
748 *
749 * @param s const uint8_t * string
750 * @param start int32_t starting string offset (usually 0)
751 * @param i int32_t string offset, must be start<i
752 * @param c output UChar32 variable, set to U+FFFD in case of an error
753 * @see U8_PREV
754 * @stable ICU 51
755 */
756#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
757 (c)=(uint8_t)(s)[--(i)]; \
758 if(!U8_IS_SINGLE(c)) { \
759 (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
760 } \
761} UPRV_BLOCK_MACRO_END
762
763/**
764 * Move the string offset from one code point boundary to the previous one.
765 * (Pre-decrementing backward iteration.)
766 * The input offset may be the same as the string length.
767 * "Unsafe" macro, assumes well-formed UTF-8.
768 *
769 * @param s const uint8_t * string
770 * @param i string offset
771 * @see U8_BACK_1
772 * @stable ICU 2.4
773 */
774#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
775 while(U8_IS_TRAIL((s)[--(i)])) {} \
776} UPRV_BLOCK_MACRO_END
777
778/**
779 * Move the string offset from one code point boundary to the previous one.
780 * (Pre-decrementing backward iteration.)
781 * The input offset may be the same as the string length.
782 * "Safe" macro, checks for illegal sequences and for string boundaries.
783 *
784 * @param s const uint8_t * string
785 * @param start int32_t starting string offset (usually 0)
786 * @param i int32_t string offset, must be start<i
787 * @see U8_BACK_1_UNSAFE
788 * @stable ICU 2.4
789 */
790#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
791 if(U8_IS_TRAIL((s)[--(i)])) { \
792 (i)=utf8_back1SafeBody(s, start, (i)); \
793 } \
794} UPRV_BLOCK_MACRO_END
795
796/**
797 * Move the string offset from one code point boundary to the n-th one before it,
798 * i.e., move backward by n code points.
799 * (Pre-decrementing backward iteration.)
800 * The input offset may be the same as the string length.
801 * "Unsafe" macro, assumes well-formed UTF-8.
802 *
803 * @param s const uint8_t * string
804 * @param i string offset
805 * @param n number of code points to skip
806 * @see U8_BACK_N
807 * @stable ICU 2.4
808 */
809#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
810 int32_t __N=(n); \
811 while(__N>0) { \
812 U8_BACK_1_UNSAFE(s, i); \
813 --__N; \
814 } \
815} UPRV_BLOCK_MACRO_END
816
817/**
818 * Move the string offset from one code point boundary to the n-th one before it,
819 * i.e., move backward by n code points.
820 * (Pre-decrementing backward iteration.)
821 * The input offset may be the same as the string length.
822 * "Safe" macro, checks for illegal sequences and for string boundaries.
823 *
824 * @param s const uint8_t * string
825 * @param start int32_t index of the start of the string
826 * @param i int32_t string offset, must be start<i
827 * @param n number of code points to skip
828 * @see U8_BACK_N_UNSAFE
829 * @stable ICU 2.4
830 */
831#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
832 int32_t __N=(n); \
833 while(__N>0 && (i)>(start)) { \
834 U8_BACK_1(s, start, i); \
835 --__N; \
836 } \
837} UPRV_BLOCK_MACRO_END
838
839/**
840 * Adjust a random-access offset to a code point boundary after a code point.
841 * If the offset is behind a partial multi-byte sequence,
842 * then the offset is incremented to behind the whole sequence.
843 * Otherwise, it is not modified.
844 * The input offset may be the same as the string length.
845 * "Unsafe" macro, assumes well-formed UTF-8.
846 *
847 * @param s const uint8_t * string
848 * @param i string offset
849 * @see U8_SET_CP_LIMIT
850 * @stable ICU 2.4
851 */
852#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
853 U8_BACK_1_UNSAFE(s, i); \
854 U8_FWD_1_UNSAFE(s, i); \
855} UPRV_BLOCK_MACRO_END
856
857/**
858 * Adjust a random-access offset to a code point boundary after a code point.
859 * If the offset is behind a partial multi-byte sequence,
860 * then the offset is incremented to behind the whole sequence.
861 * Otherwise, it is not modified.
862 * The input offset may be the same as the string length.
863 * "Safe" macro, checks for illegal sequences and for string boundaries.
864 *
865 * The length can be negative for a NUL-terminated string.
866 *
867 * @param s const uint8_t * string
868 * @param start int32_t starting string offset (usually 0)
869 * @param i int32_t string offset, must be start<=i<=length
870 * @param length int32_t string length
871 * @see U8_SET_CP_LIMIT_UNSAFE
872 * @stable ICU 2.4
873 */
874#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
875 if((start)<(i) && ((i)<(length) || (length)<0)) { \
876 U8_BACK_1(s, start, i); \
877 U8_FWD_1(s, i, length); \
878 } \
879} UPRV_BLOCK_MACRO_END
880
881#endif