diff options
Diffstat (limited to 'vendor/tree-sitter')
43 files changed, 16703 insertions, 0 deletions
diff --git a/vendor/tree-sitter/LICENSE b/vendor/tree-sitter/LICENSE new file mode 100644 index 0000000..3f67411 --- /dev/null +++ b/vendor/tree-sitter/LICENSE | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | The MIT License (MIT) | ||
| 2 | |||
| 3 | Copyright (c) 2018-2023 Max Brunsfeld | ||
| 4 | |||
| 5 | Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| 6 | of this software and associated documentation files (the "Software"), to deal | ||
| 7 | in the Software without restriction, including without limitation the rights | ||
| 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| 9 | copies of the Software, and to permit persons to whom the Software is | ||
| 10 | furnished to do so, subject to the following conditions: | ||
| 11 | |||
| 12 | The above copyright notice and this permission notice shall be included in all | ||
| 13 | copies or substantial portions of the Software. | ||
| 14 | |||
| 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
| 21 | SOFTWARE. | ||
diff --git a/vendor/tree-sitter/Makefile b/vendor/tree-sitter/Makefile new file mode 100644 index 0000000..eb4075f --- /dev/null +++ b/vendor/tree-sitter/Makefile | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | VERSION := 0.20.10 | ||
| 2 | |||
| 3 | # install directory layout | ||
| 4 | PREFIX ?= /usr/local | ||
| 5 | INCLUDEDIR ?= $(PREFIX)/include | ||
| 6 | LIBDIR ?= $(PREFIX)/lib | ||
| 7 | PCLIBDIR ?= $(LIBDIR)/pkgconfig | ||
| 8 | |||
| 9 | # collect sources | ||
| 10 | ifneq ($(AMALGAMATED),1) | ||
| 11 | SRC := $(wildcard lib/src/*.c) | ||
| 12 | # do not double-include amalgamation | ||
| 13 | SRC := $(filter-out lib/src/lib.c,$(SRC)) | ||
| 14 | else | ||
| 15 | # use amalgamated build | ||
| 16 | SRC := lib/src/lib.c | ||
| 17 | endif | ||
| 18 | OBJ := $(SRC:.c=.o) | ||
| 19 | |||
| 20 | # define default flags, and override to append mandatory flags | ||
| 21 | override CFLAGS := -O3 -std=gnu99 -fPIC -fvisibility=hidden -Wall -Wextra -Wshadow $(CFLAGS) | ||
| 22 | override CFLAGS += -Ilib/src -Ilib/include | ||
| 23 | |||
| 24 | # ABI versioning | ||
| 25 | SONAME_MAJOR := 0 | ||
| 26 | SONAME_MINOR := 0 | ||
| 27 | |||
| 28 | # OS-specific bits | ||
| 29 | ifeq ($(shell uname),Darwin) | ||
| 30 | SOEXT = dylib | ||
| 31 | SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib | ||
| 32 | SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib | ||
| 33 | LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SONAME_MAJOR).dylib | ||
| 34 | else | ||
| 35 | SOEXT = so | ||
| 36 | SOEXTVER_MAJOR = so.$(SONAME_MAJOR) | ||
| 37 | SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) | ||
| 38 | LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR) | ||
| 39 | endif | ||
| 40 | ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly)) | ||
| 41 | PCLIBDIR := $(PREFIX)/libdata/pkgconfig | ||
| 42 | endif | ||
| 43 | |||
| 44 | all: libtree-sitter.a libtree-sitter.$(SOEXTVER) | ||
| 45 | |||
| 46 | libtree-sitter.a: $(OBJ) | ||
| 47 | $(AR) rcs $@ $^ | ||
| 48 | |||
| 49 | libtree-sitter.$(SOEXTVER): $(OBJ) | ||
| 50 | $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ | ||
| 51 | ln -sf $@ libtree-sitter.$(SOEXT) | ||
| 52 | ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR) | ||
| 53 | ifneq ($(STRIP),) | ||
| 54 | $(STRIP) $@ | ||
| 55 | endif | ||
| 56 | |||
| 57 | install: all | ||
| 58 | sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \ | ||
| 59 | -e 's|=$(PREFIX)|=$${prefix}|' \ | ||
| 60 | -e 's|@PREFIX@|$(PREFIX)|' \ | ||
| 61 | tree-sitter.pc.in > tree-sitter.pc | ||
| 62 | |||
| 63 | install -d '$(DESTDIR)$(LIBDIR)' | ||
| 64 | install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/ | ||
| 65 | install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/ | ||
| 66 | ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) | ||
| 67 | ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) | ||
| 68 | |||
| 69 | install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter | ||
| 70 | install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ | ||
| 71 | |||
| 72 | install -d '$(DESTDIR)$(PCLIBDIR)' | ||
| 73 | install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/ | ||
| 74 | |||
| 75 | clean: | ||
| 76 | rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER) | ||
| 77 | |||
| 78 | .PHONY: all install clean | ||
diff --git a/vendor/tree-sitter/lib/include/tree_sitter/api.h b/vendor/tree-sitter/lib/include/tree_sitter/api.h new file mode 100644 index 0000000..56093d9 --- /dev/null +++ b/vendor/tree-sitter/lib/include/tree_sitter/api.h | |||
| @@ -0,0 +1,1180 @@ | |||
| 1 | #ifndef TREE_SITTER_API_H_ | ||
| 2 | #define TREE_SITTER_API_H_ | ||
| 3 | |||
| 4 | #if defined(__GNUC__) || defined(__clang__) | ||
| 5 | #pragma GCC visibility push(default) | ||
| 6 | #endif | ||
| 7 | |||
| 8 | #ifdef __cplusplus | ||
| 9 | extern "C" { | ||
| 10 | #endif | ||
| 11 | |||
| 12 | #include <stdlib.h> | ||
| 13 | #include <stdint.h> | ||
| 14 | #include <stdbool.h> | ||
| 15 | |||
| 16 | /****************************/ | ||
| 17 | /* Section - ABI Versioning */ | ||
| 18 | /****************************/ | ||
| 19 | |||
| 20 | /** | ||
| 21 | * The latest ABI version that is supported by the current version of the | ||
| 22 | * library. When Languages are generated by the Tree-sitter CLI, they are | ||
| 23 | * assigned an ABI version number that corresponds to the current CLI version. | ||
| 24 | * The Tree-sitter library is generally backwards-compatible with languages | ||
| 25 | * generated using older CLI versions, but is not forwards-compatible. | ||
| 26 | */ | ||
| 27 | #define TREE_SITTER_LANGUAGE_VERSION 14 | ||
| 28 | |||
| 29 | /** | ||
| 30 | * The earliest ABI version that is supported by the current version of the | ||
| 31 | * library. | ||
| 32 | */ | ||
| 33 | #define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13 | ||
| 34 | |||
| 35 | /*******************/ | ||
| 36 | /* Section - Types */ | ||
| 37 | /*******************/ | ||
| 38 | |||
| 39 | typedef uint16_t TSStateId; | ||
| 40 | typedef uint16_t TSSymbol; | ||
| 41 | typedef uint16_t TSFieldId; | ||
| 42 | typedef struct TSLanguage TSLanguage; | ||
| 43 | typedef struct TSParser TSParser; | ||
| 44 | typedef struct TSTree TSTree; | ||
| 45 | typedef struct TSQuery TSQuery; | ||
| 46 | typedef struct TSQueryCursor TSQueryCursor; | ||
| 47 | typedef struct TSLookaheadIterator TSLookaheadIterator; | ||
| 48 | |||
| 49 | typedef enum { | ||
| 50 | TSInputEncodingUTF8, | ||
| 51 | TSInputEncodingUTF16, | ||
| 52 | } TSInputEncoding; | ||
| 53 | |||
| 54 | typedef enum { | ||
| 55 | TSSymbolTypeRegular, | ||
| 56 | TSSymbolTypeAnonymous, | ||
| 57 | TSSymbolTypeAuxiliary, | ||
| 58 | } TSSymbolType; | ||
| 59 | |||
| 60 | typedef struct { | ||
| 61 | uint32_t row; | ||
| 62 | uint32_t column; | ||
| 63 | } TSPoint; | ||
| 64 | |||
| 65 | typedef struct { | ||
| 66 | TSPoint start_point; | ||
| 67 | TSPoint end_point; | ||
| 68 | uint32_t start_byte; | ||
| 69 | uint32_t end_byte; | ||
| 70 | } TSRange; | ||
| 71 | |||
| 72 | typedef struct { | ||
| 73 | void *payload; | ||
| 74 | const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read); | ||
| 75 | TSInputEncoding encoding; | ||
| 76 | } TSInput; | ||
| 77 | |||
| 78 | typedef enum { | ||
| 79 | TSLogTypeParse, | ||
| 80 | TSLogTypeLex, | ||
| 81 | } TSLogType; | ||
| 82 | |||
| 83 | typedef struct { | ||
| 84 | void *payload; | ||
| 85 | void (*log)(void *payload, TSLogType log_type, const char *buffer); | ||
| 86 | } TSLogger; | ||
| 87 | |||
| 88 | typedef struct { | ||
| 89 | uint32_t start_byte; | ||
| 90 | uint32_t old_end_byte; | ||
| 91 | uint32_t new_end_byte; | ||
| 92 | TSPoint start_point; | ||
| 93 | TSPoint old_end_point; | ||
| 94 | TSPoint new_end_point; | ||
| 95 | } TSInputEdit; | ||
| 96 | |||
| 97 | typedef struct { | ||
| 98 | uint32_t context[4]; | ||
| 99 | const void *id; | ||
| 100 | const TSTree *tree; | ||
| 101 | } TSNode; | ||
| 102 | |||
| 103 | typedef struct { | ||
| 104 | const void *tree; | ||
| 105 | const void *id; | ||
| 106 | uint32_t context[2]; | ||
| 107 | } TSTreeCursor; | ||
| 108 | |||
| 109 | typedef struct { | ||
| 110 | TSNode node; | ||
| 111 | uint32_t index; | ||
| 112 | } TSQueryCapture; | ||
| 113 | |||
| 114 | typedef enum { | ||
| 115 | TSQuantifierZero = 0, // must match the array initialization value | ||
| 116 | TSQuantifierZeroOrOne, | ||
| 117 | TSQuantifierZeroOrMore, | ||
| 118 | TSQuantifierOne, | ||
| 119 | TSQuantifierOneOrMore, | ||
| 120 | } TSQuantifier; | ||
| 121 | |||
| 122 | typedef struct { | ||
| 123 | uint32_t id; | ||
| 124 | uint16_t pattern_index; | ||
| 125 | uint16_t capture_count; | ||
| 126 | const TSQueryCapture *captures; | ||
| 127 | } TSQueryMatch; | ||
| 128 | |||
| 129 | typedef enum { | ||
| 130 | TSQueryPredicateStepTypeDone, | ||
| 131 | TSQueryPredicateStepTypeCapture, | ||
| 132 | TSQueryPredicateStepTypeString, | ||
| 133 | } TSQueryPredicateStepType; | ||
| 134 | |||
| 135 | typedef struct { | ||
| 136 | TSQueryPredicateStepType type; | ||
| 137 | uint32_t value_id; | ||
| 138 | } TSQueryPredicateStep; | ||
| 139 | |||
| 140 | typedef enum { | ||
| 141 | TSQueryErrorNone = 0, | ||
| 142 | TSQueryErrorSyntax, | ||
| 143 | TSQueryErrorNodeType, | ||
| 144 | TSQueryErrorField, | ||
| 145 | TSQueryErrorCapture, | ||
| 146 | TSQueryErrorStructure, | ||
| 147 | TSQueryErrorLanguage, | ||
| 148 | } TSQueryError; | ||
| 149 | |||
| 150 | /********************/ | ||
| 151 | /* Section - Parser */ | ||
| 152 | /********************/ | ||
| 153 | |||
| 154 | /** | ||
| 155 | * Create a new parser. | ||
| 156 | */ | ||
| 157 | TSParser *ts_parser_new(void); | ||
| 158 | |||
| 159 | /** | ||
| 160 | * Delete the parser, freeing all of the memory that it used. | ||
| 161 | */ | ||
| 162 | void ts_parser_delete(TSParser *self); | ||
| 163 | |||
| 164 | /** | ||
| 165 | * Get the parser's current language. | ||
| 166 | */ | ||
| 167 | const TSLanguage *ts_parser_language(const TSParser *self); | ||
| 168 | |||
| 169 | /** | ||
| 170 | * Set the language that the parser should use for parsing. | ||
| 171 | * | ||
| 172 | * Returns a boolean indicating whether or not the language was successfully | ||
| 173 | * assigned. True means assignment succeeded. False means there was a version | ||
| 174 | * mismatch: the language was generated with an incompatible version of the | ||
| 175 | * Tree-sitter CLI. Check the language's version using [`ts_language_version`] | ||
| 176 | * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and | ||
| 177 | * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. | ||
| 178 | */ | ||
| 179 | bool ts_parser_set_language(TSParser *self, const TSLanguage *language); | ||
| 180 | |||
| 181 | /** | ||
| 182 | * Set the ranges of text that the parser should include when parsing. | ||
| 183 | * | ||
| 184 | * By default, the parser will always include entire documents. This function | ||
| 185 | * allows you to parse only a *portion* of a document but still return a syntax | ||
| 186 | * tree whose ranges match up with the document as a whole. You can also pass | ||
| 187 | * multiple disjoint ranges. | ||
| 188 | * | ||
| 189 | * The second and third parameters specify the location and length of an array | ||
| 190 | * of ranges. The parser does *not* take ownership of these ranges; it copies | ||
| 191 | * the data, so it doesn't matter how these ranges are allocated. | ||
| 192 | * | ||
| 193 | * If `count` is zero, then the entire document will be parsed. Otherwise, | ||
| 194 | * the given ranges must be ordered from earliest to latest in the document, | ||
| 195 | * and they must not overlap. That is, the following must hold for all: | ||
| 196 | * | ||
| 197 | * `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte` | ||
| 198 | * | ||
| 199 | * If this requirement is not satisfied, the operation will fail, the ranges | ||
| 200 | * will not be assigned, and this function will return `false`. On success, | ||
| 201 | * this function returns `true` | ||
| 202 | */ | ||
| 203 | bool ts_parser_set_included_ranges( | ||
| 204 | TSParser *self, | ||
| 205 | const TSRange *ranges, | ||
| 206 | uint32_t count | ||
| 207 | ); | ||
| 208 | |||
| 209 | /** | ||
| 210 | * Get the ranges of text that the parser will include when parsing. | ||
| 211 | * | ||
| 212 | * The returned pointer is owned by the parser. The caller should not free it | ||
| 213 | * or write to it. The length of the array will be written to the given | ||
| 214 | * `count` pointer. | ||
| 215 | */ | ||
| 216 | const TSRange *ts_parser_included_ranges( | ||
| 217 | const TSParser *self, | ||
| 218 | uint32_t *count | ||
| 219 | ); | ||
| 220 | |||
| 221 | /** | ||
| 222 | * Use the parser to parse some source code and create a syntax tree. | ||
| 223 | * | ||
| 224 | * If you are parsing this document for the first time, pass `NULL` for the | ||
| 225 | * `old_tree` parameter. Otherwise, if you have already parsed an earlier | ||
| 226 | * version of this document and the document has since been edited, pass the | ||
| 227 | * previous syntax tree so that the unchanged parts of it can be reused. | ||
| 228 | * This will save time and memory. For this to work correctly, you must have | ||
| 229 | * already edited the old syntax tree using the [`ts_tree_edit`] function in a | ||
| 230 | * way that exactly matches the source code changes. | ||
| 231 | * | ||
| 232 | * The [`TSInput`] parameter lets you specify how to read the text. It has the | ||
| 233 | * following three fields: | ||
| 234 | * 1. [`read`]: A function to retrieve a chunk of text at a given byte offset | ||
| 235 | * and (row, column) position. The function should return a pointer to the | ||
| 236 | * text and write its length to the [`bytes_read`] pointer. The parser does | ||
| 237 | * not take ownership of this buffer; it just borrows it until it has | ||
| 238 | * finished reading it. The function should write a zero value to the | ||
| 239 | * [`bytes_read`] pointer to indicate the end of the document. | ||
| 240 | * 2. [`payload`]: An arbitrary pointer that will be passed to each invocation | ||
| 241 | * of the [`read`] function. | ||
| 242 | * 3. [`encoding`]: An indication of how the text is encoded. Either | ||
| 243 | * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. | ||
| 244 | * | ||
| 245 | * This function returns a syntax tree on success, and `NULL` on failure. There | ||
| 246 | * are three possible reasons for failure: | ||
| 247 | * 1. The parser does not have a language assigned. Check for this using the | ||
| 248 | [`ts_parser_language`] function. | ||
| 249 | * 2. Parsing was cancelled due to a timeout that was set by an earlier call to | ||
| 250 | * the [`ts_parser_set_timeout_micros`] function. You can resume parsing from | ||
| 251 | * where the parser left out by calling [`ts_parser_parse`] again with the | ||
| 252 | * same arguments. Or you can start parsing from scratch by first calling | ||
| 253 | * [`ts_parser_reset`]. | ||
| 254 | * 3. Parsing was cancelled using a cancellation flag that was set by an | ||
| 255 | * earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing | ||
| 256 | * from where the parser left out by calling [`ts_parser_parse`] again with | ||
| 257 | * the same arguments. | ||
| 258 | * | ||
| 259 | * [`read`]: TSInput::read | ||
| 260 | * [`payload`]: TSInput::payload | ||
| 261 | * [`encoding`]: TSInput::encoding | ||
| 262 | * [`bytes_read`]: TSInput::read | ||
| 263 | */ | ||
| 264 | TSTree *ts_parser_parse( | ||
| 265 | TSParser *self, | ||
| 266 | const TSTree *old_tree, | ||
| 267 | TSInput input | ||
| 268 | ); | ||
| 269 | |||
| 270 | /** | ||
| 271 | * Use the parser to parse some source code stored in one contiguous buffer. | ||
| 272 | * The first two parameters are the same as in the [`ts_parser_parse`] function | ||
| 273 | * above. The second two parameters indicate the location of the buffer and its | ||
| 274 | * length in bytes. | ||
| 275 | */ | ||
| 276 | TSTree *ts_parser_parse_string( | ||
| 277 | TSParser *self, | ||
| 278 | const TSTree *old_tree, | ||
| 279 | const char *string, | ||
| 280 | uint32_t length | ||
| 281 | ); | ||
| 282 | |||
| 283 | /** | ||
| 284 | * Use the parser to parse some source code stored in one contiguous buffer with | ||
| 285 | * a given encoding. The first four parameters work the same as in the | ||
| 286 | * [`ts_parser_parse_string`] method above. The final parameter indicates whether | ||
| 287 | * the text is encoded as UTF8 or UTF16. | ||
| 288 | */ | ||
| 289 | TSTree *ts_parser_parse_string_encoding( | ||
| 290 | TSParser *self, | ||
| 291 | const TSTree *old_tree, | ||
| 292 | const char *string, | ||
| 293 | uint32_t length, | ||
| 294 | TSInputEncoding encoding | ||
| 295 | ); | ||
| 296 | |||
| 297 | /** | ||
| 298 | * Instruct the parser to start the next parse from the beginning. | ||
| 299 | * | ||
| 300 | * If the parser previously failed because of a timeout or a cancellation, then | ||
| 301 | * by default, it will resume where it left off on the next call to | ||
| 302 | * [`ts_parser_parse`] or other parsing functions. If you don't want to resume, | ||
| 303 | * and instead intend to use this parser to parse some other document, you must | ||
| 304 | * call [`ts_parser_reset`] first. | ||
| 305 | */ | ||
| 306 | void ts_parser_reset(TSParser *self); | ||
| 307 | |||
| 308 | /** | ||
| 309 | * Set the maximum duration in microseconds that parsing should be allowed to | ||
| 310 | * take before halting. | ||
| 311 | * | ||
| 312 | * If parsing takes longer than this, it will halt early, returning NULL. | ||
| 313 | * See [`ts_parser_parse`] for more information. | ||
| 314 | */ | ||
| 315 | void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros); | ||
| 316 | |||
| 317 | /** | ||
| 318 | * Get the duration in microseconds that parsing is allowed to take. | ||
| 319 | */ | ||
| 320 | uint64_t ts_parser_timeout_micros(const TSParser *self); | ||
| 321 | |||
| 322 | /** | ||
| 323 | * Set the parser's current cancellation flag pointer. | ||
| 324 | * | ||
| 325 | * If a non-null pointer is assigned, then the parser will periodically read | ||
| 326 | * from this pointer during parsing. If it reads a non-zero value, it will | ||
| 327 | * halt early, returning NULL. See [`ts_parser_parse`] for more information. | ||
| 328 | */ | ||
| 329 | void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag); | ||
| 330 | |||
| 331 | /** | ||
| 332 | * Get the parser's current cancellation flag pointer. | ||
| 333 | */ | ||
| 334 | const size_t *ts_parser_cancellation_flag(const TSParser *self); | ||
| 335 | |||
| 336 | /** | ||
| 337 | * Set the logger that a parser should use during parsing. | ||
| 338 | * | ||
| 339 | * The parser does not take ownership over the logger payload. If a logger was | ||
| 340 | * previously assigned, the caller is responsible for releasing any memory | ||
| 341 | * owned by the previous logger. | ||
| 342 | */ | ||
| 343 | void ts_parser_set_logger(TSParser *self, TSLogger logger); | ||
| 344 | |||
| 345 | /** | ||
| 346 | * Get the parser's current logger. | ||
| 347 | */ | ||
| 348 | TSLogger ts_parser_logger(const TSParser *self); | ||
| 349 | |||
| 350 | /** | ||
| 351 | * Set the file descriptor to which the parser should write debugging graphs | ||
| 352 | * during parsing. The graphs are formatted in the DOT language. You may want | ||
| 353 | * to pipe these graphs directly to a `dot(1)` process in order to generate | ||
| 354 | * SVG output. You can turn off this logging by passing a negative number. | ||
| 355 | */ | ||
| 356 | void ts_parser_print_dot_graphs(TSParser *self, int fd); | ||
| 357 | |||
| 358 | /******************/ | ||
| 359 | /* Section - Tree */ | ||
| 360 | /******************/ | ||
| 361 | |||
| 362 | /** | ||
| 363 | * Create a shallow copy of the syntax tree. This is very fast. | ||
| 364 | * | ||
| 365 | * You need to copy a syntax tree in order to use it on more than one thread at | ||
| 366 | * a time, as syntax trees are not thread safe. | ||
| 367 | */ | ||
| 368 | TSTree *ts_tree_copy(const TSTree *self); | ||
| 369 | |||
| 370 | /** | ||
| 371 | * Delete the syntax tree, freeing all of the memory that it used. | ||
| 372 | */ | ||
| 373 | void ts_tree_delete(TSTree *self); | ||
| 374 | |||
| 375 | /** | ||
| 376 | * Get the root node of the syntax tree. | ||
| 377 | */ | ||
| 378 | TSNode ts_tree_root_node(const TSTree *self); | ||
| 379 | |||
| 380 | /** | ||
| 381 | * Get the root node of the syntax tree, but with its position | ||
| 382 | * shifted forward by the given offset. | ||
| 383 | */ | ||
| 384 | TSNode ts_tree_root_node_with_offset( | ||
| 385 | const TSTree *self, | ||
| 386 | uint32_t offset_bytes, | ||
| 387 | TSPoint offset_extent | ||
| 388 | ); | ||
| 389 | |||
| 390 | /** | ||
| 391 | * Get the language that was used to parse the syntax tree. | ||
| 392 | */ | ||
| 393 | const TSLanguage *ts_tree_language(const TSTree *self); | ||
| 394 | |||
| 395 | /** | ||
| 396 | * Get the array of included ranges that was used to parse the syntax tree. | ||
| 397 | * | ||
| 398 | * The returned pointer must be freed by the caller. | ||
| 399 | */ | ||
| 400 | TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length); | ||
| 401 | |||
| 402 | /** | ||
| 403 | * Edit the syntax tree to keep it in sync with source code that has been | ||
| 404 | * edited. | ||
| 405 | * | ||
| 406 | * You must describe the edit both in terms of byte offsets and in terms of | ||
| 407 | * (row, column) coordinates. | ||
| 408 | */ | ||
| 409 | void ts_tree_edit(TSTree *self, const TSInputEdit *edit); | ||
| 410 | |||
| 411 | /** | ||
| 412 | * Compare an old edited syntax tree to a new syntax tree representing the same | ||
| 413 | * document, returning an array of ranges whose syntactic structure has changed. | ||
| 414 | * | ||
| 415 | * For this to work correctly, the old syntax tree must have been edited such | ||
| 416 | * that its ranges match up to the new tree. Generally, you'll want to call | ||
| 417 | * this function right after calling one of the [`ts_parser_parse`] functions. | ||
| 418 | * You need to pass the old tree that was passed to parse, as well as the new | ||
| 419 | * tree that was returned from that function. | ||
| 420 | * | ||
| 421 | * The returned array is allocated using `malloc` and the caller is responsible | ||
| 422 | * for freeing it using `free`. The length of the array will be written to the | ||
| 423 | * given `length` pointer. | ||
| 424 | */ | ||
| 425 | TSRange *ts_tree_get_changed_ranges( | ||
| 426 | const TSTree *old_tree, | ||
| 427 | const TSTree *new_tree, | ||
| 428 | uint32_t *length | ||
| 429 | ); | ||
| 430 | |||
| 431 | /** | ||
| 432 | * Write a DOT graph describing the syntax tree to the given file. | ||
| 433 | */ | ||
| 434 | void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor); | ||
| 435 | |||
| 436 | /******************/ | ||
| 437 | /* Section - Node */ | ||
| 438 | /******************/ | ||
| 439 | |||
| 440 | /** | ||
| 441 | * Get the node's type as a null-terminated string. | ||
| 442 | */ | ||
| 443 | const char *ts_node_type(TSNode self); | ||
| 444 | |||
| 445 | /** | ||
| 446 | * Get the node's type as a numerical id. | ||
| 447 | */ | ||
| 448 | TSSymbol ts_node_symbol(TSNode self); | ||
| 449 | |||
| 450 | /** | ||
| 451 | * Get the node's language. | ||
| 452 | */ | ||
| 453 | const TSLanguage *ts_node_language(TSNode self); | ||
| 454 | |||
| 455 | /** | ||
| 456 | * Get the node's type as it appears in the grammar ignoring aliases as a | ||
| 457 | * null-terminated string. | ||
| 458 | */ | ||
| 459 | const char *ts_node_grammar_type(TSNode self); | ||
| 460 | |||
| 461 | /** | ||
| 462 | * Get the node's type as a numerical id as it appears in the grammar ignoring | ||
| 463 | * aliases. This should be used in [`ts_language_next_state`] instead of | ||
| 464 | * [`ts_node_symbol`]. | ||
| 465 | */ | ||
| 466 | TSSymbol ts_node_grammar_symbol(TSNode self); | ||
| 467 | |||
| 468 | /** | ||
| 469 | * Get the node's start byte. | ||
| 470 | */ | ||
| 471 | uint32_t ts_node_start_byte(TSNode self); | ||
| 472 | |||
| 473 | /** | ||
| 474 | * Get the node's start position in terms of rows and columns. | ||
| 475 | */ | ||
| 476 | TSPoint ts_node_start_point(TSNode self); | ||
| 477 | |||
| 478 | /** | ||
| 479 | * Get the node's end byte. | ||
| 480 | */ | ||
| 481 | uint32_t ts_node_end_byte(TSNode self); | ||
| 482 | |||
| 483 | /** | ||
| 484 | * Get the node's end position in terms of rows and columns. | ||
| 485 | */ | ||
| 486 | TSPoint ts_node_end_point(TSNode self); | ||
| 487 | |||
| 488 | /** | ||
| 489 | * Get an S-expression representing the node as a string. | ||
| 490 | * | ||
| 491 | * This string is allocated with `malloc` and the caller is responsible for | ||
| 492 | * freeing it using `free`. | ||
| 493 | */ | ||
| 494 | char *ts_node_string(TSNode self); | ||
| 495 | |||
| 496 | /** | ||
| 497 | * Check if the node is null. Functions like [`ts_node_child`] and | ||
| 498 | * [`ts_node_next_sibling`] will return a null node to indicate that no such node | ||
| 499 | * was found. | ||
| 500 | */ | ||
| 501 | bool ts_node_is_null(TSNode self); | ||
| 502 | |||
| 503 | /** | ||
| 504 | * Check if the node is *named*. Named nodes correspond to named rules in the | ||
| 505 | * grammar, whereas *anonymous* nodes correspond to string literals in the | ||
| 506 | * grammar. | ||
| 507 | */ | ||
| 508 | bool ts_node_is_named(TSNode self); | ||
| 509 | |||
| 510 | /** | ||
| 511 | * Check if the node is *missing*. Missing nodes are inserted by the parser in | ||
| 512 | * order to recover from certain kinds of syntax errors. | ||
| 513 | */ | ||
| 514 | bool ts_node_is_missing(TSNode self); | ||
| 515 | |||
| 516 | /** | ||
| 517 | * Check if the node is *extra*. Extra nodes represent things like comments, | ||
| 518 | * which are not required the grammar, but can appear anywhere. | ||
| 519 | */ | ||
| 520 | bool ts_node_is_extra(TSNode self); | ||
| 521 | |||
| 522 | /** | ||
| 523 | * Check if a syntax node has been edited. | ||
| 524 | */ | ||
| 525 | bool ts_node_has_changes(TSNode self); | ||
| 526 | |||
| 527 | /** | ||
| 528 | * Check if the node is a syntax error or contains any syntax errors. | ||
| 529 | */ | ||
| 530 | bool ts_node_has_error(TSNode self); | ||
| 531 | |||
| 532 | /** | ||
| 533 | * Check if the node is a syntax error. | ||
| 534 | */ | ||
| 535 | bool ts_node_is_error(TSNode self); | ||
| 536 | |||
| 537 | /** | ||
| 538 | * Get this node's parse state. | ||
| 539 | */ | ||
| 540 | TSStateId ts_node_parse_state(TSNode self); | ||
| 541 | |||
| 542 | /** | ||
| 543 | * Get the parse state after this node. | ||
| 544 | */ | ||
| 545 | TSStateId ts_node_next_parse_state(TSNode self); | ||
| 546 | |||
| 547 | /** | ||
| 548 | * Get the node's immediate parent. | ||
| 549 | */ | ||
| 550 | TSNode ts_node_parent(TSNode self); | ||
| 551 | |||
| 552 | /** | ||
| 553 | * Get the node's child at the given index, where zero represents the first | ||
| 554 | * child. | ||
| 555 | */ | ||
| 556 | TSNode ts_node_child(TSNode self, uint32_t child_index); | ||
| 557 | |||
| 558 | /** | ||
| 559 | * Get the field name for node's child at the given index, where zero represents | ||
| 560 | * the first child. Returns NULL, if no field is found. | ||
| 561 | */ | ||
| 562 | const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index); | ||
| 563 | |||
| 564 | /** | ||
| 565 | * Get the node's number of children. | ||
| 566 | */ | ||
| 567 | uint32_t ts_node_child_count(TSNode self); | ||
| 568 | |||
| 569 | /** | ||
| 570 | * Get the node's *named* child at the given index. | ||
| 571 | * | ||
| 572 | * See also [`ts_node_is_named`]. | ||
| 573 | */ | ||
| 574 | TSNode ts_node_named_child(TSNode self, uint32_t child_index); | ||
| 575 | |||
| 576 | /** | ||
| 577 | * Get the node's number of *named* children. | ||
| 578 | * | ||
| 579 | * See also [`ts_node_is_named`]. | ||
| 580 | */ | ||
| 581 | uint32_t ts_node_named_child_count(TSNode self); | ||
| 582 | |||
| 583 | /** | ||
| 584 | * Get the node's child with the given field name. | ||
| 585 | */ | ||
| 586 | TSNode ts_node_child_by_field_name( | ||
| 587 | TSNode self, | ||
| 588 | const char *name, | ||
| 589 | uint32_t name_length | ||
| 590 | ); | ||
| 591 | |||
| 592 | /** | ||
| 593 | * Get the node's child with the given numerical field id. | ||
| 594 | * | ||
| 595 | * You can convert a field name to an id using the | ||
| 596 | * [`ts_language_field_id_for_name`] function. | ||
| 597 | */ | ||
| 598 | TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id); | ||
| 599 | |||
| 600 | /** | ||
| 601 | * Get the node's next / previous sibling. | ||
| 602 | */ | ||
| 603 | TSNode ts_node_next_sibling(TSNode self); | ||
| 604 | TSNode ts_node_prev_sibling(TSNode self); | ||
| 605 | |||
| 606 | /** | ||
| 607 | * Get the node's next / previous *named* sibling. | ||
| 608 | */ | ||
| 609 | TSNode ts_node_next_named_sibling(TSNode self); | ||
| 610 | TSNode ts_node_prev_named_sibling(TSNode self); | ||
| 611 | |||
| 612 | /** | ||
| 613 | * Get the node's first child that extends beyond the given byte offset. | ||
| 614 | */ | ||
| 615 | TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte); | ||
| 616 | |||
| 617 | /** | ||
| 618 | * Get the node's first named child that extends beyond the given byte offset. | ||
| 619 | */ | ||
| 620 | TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte); | ||
| 621 | |||
| 622 | /** | ||
| 623 | * Get the node's number of descendants, including one for the node itself. | ||
| 624 | */ | ||
| 625 | uint32_t ts_node_descendant_count(TSNode self); | ||
| 626 | |||
| 627 | /** | ||
| 628 | * Get the smallest node within this node that spans the given range of bytes | ||
| 629 | * or (row, column) positions. | ||
| 630 | */ | ||
| 631 | TSNode ts_node_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end); | ||
| 632 | TSNode ts_node_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end); | ||
| 633 | |||
| 634 | /** | ||
| 635 | * Get the smallest named node within this node that spans the given range of | ||
| 636 | * bytes or (row, column) positions. | ||
| 637 | */ | ||
| 638 | TSNode ts_node_named_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end); | ||
| 639 | TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end); | ||
| 640 | |||
| 641 | /** | ||
| 642 | * Edit the node to keep it in-sync with source code that has been edited. | ||
| 643 | * | ||
| 644 | * This function is only rarely needed. When you edit a syntax tree with the | ||
| 645 | * [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree | ||
| 646 | * afterward will already reflect the edit. You only need to use [`ts_node_edit`] | ||
| 647 | * when you have a [`TSNode`] instance that you want to keep and continue to use | ||
| 648 | * after an edit. | ||
| 649 | */ | ||
| 650 | void ts_node_edit(TSNode *self, const TSInputEdit *edit); | ||
| 651 | |||
| 652 | /** | ||
| 653 | * Check if two nodes are identical. | ||
| 654 | */ | ||
| 655 | bool ts_node_eq(TSNode self, TSNode other); | ||
| 656 | |||
| 657 | /************************/ | ||
| 658 | /* Section - TreeCursor */ | ||
| 659 | /************************/ | ||
| 660 | |||
| 661 | /** | ||
| 662 | * Create a new tree cursor starting from the given node. | ||
| 663 | * | ||
| 664 | * A tree cursor allows you to walk a syntax tree more efficiently than is | ||
| 665 | * possible using the [`TSNode`] functions. It is a mutable object that is always | ||
| 666 | * on a certain syntax node, and can be moved imperatively to different nodes. | ||
| 667 | */ | ||
| 668 | TSTreeCursor ts_tree_cursor_new(TSNode node); | ||
| 669 | |||
| 670 | /** | ||
| 671 | * Delete a tree cursor, freeing all of the memory that it used. | ||
| 672 | */ | ||
| 673 | void ts_tree_cursor_delete(TSTreeCursor *self); | ||
| 674 | |||
| 675 | /** | ||
| 676 | * Re-initialize a tree cursor to start at a different node. | ||
| 677 | */ | ||
| 678 | void ts_tree_cursor_reset(TSTreeCursor *self, TSNode node); | ||
| 679 | |||
| 680 | /** | ||
| 681 | * Re-initialize a tree cursor to the same position as another cursor. | ||
| 682 | * | ||
| 683 | * Unlike [`ts_tree_cursor_reset`], this will not lose parent information and | ||
| 684 | * allows reusing already created cursors. | ||
| 685 | */ | ||
| 686 | void ts_tree_cursor_reset_to(TSTreeCursor *dst, const TSTreeCursor *src); | ||
| 687 | |||
| 688 | /** | ||
| 689 | * Get the tree cursor's current node. | ||
| 690 | */ | ||
| 691 | TSNode ts_tree_cursor_current_node(const TSTreeCursor *self); | ||
| 692 | |||
| 693 | /** | ||
| 694 | * Get the field name of the tree cursor's current node. | ||
| 695 | * | ||
| 696 | * This returns `NULL` if the current node doesn't have a field. | ||
| 697 | * See also [`ts_node_child_by_field_name`]. | ||
| 698 | */ | ||
| 699 | const char *ts_tree_cursor_current_field_name(const TSTreeCursor *self); | ||
| 700 | |||
| 701 | /** | ||
| 702 | * Get the field id of the tree cursor's current node. | ||
| 703 | * | ||
| 704 | * This returns zero if the current node doesn't have a field. | ||
| 705 | * See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]. | ||
| 706 | */ | ||
| 707 | TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self); | ||
| 708 | |||
| 709 | /** | ||
| 710 | * Move the cursor to the parent of its current node. | ||
| 711 | * | ||
| 712 | * This returns `true` if the cursor successfully moved, and returns `false` | ||
| 713 | * if there was no parent node (the cursor was already on the root node). | ||
| 714 | */ | ||
| 715 | bool ts_tree_cursor_goto_parent(TSTreeCursor *self); | ||
| 716 | |||
| 717 | /** | ||
| 718 | * Move the cursor to the next sibling of its current node. | ||
| 719 | * | ||
| 720 | * This returns `true` if the cursor successfully moved, and returns `false` | ||
| 721 | * if there was no next sibling node. | ||
| 722 | */ | ||
| 723 | bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self); | ||
| 724 | |||
| 725 | /** | ||
| 726 | * Move the cursor to the previous sibling of its current node. | ||
| 727 | * | ||
| 728 | * This returns `true` if the cursor successfully moved, and returns `false` if | ||
| 729 | * there was no previous sibling node. | ||
| 730 | * | ||
| 731 | * Note, that this function may be slower than | ||
| 732 | * [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In | ||
| 733 | * the worst case, this will need to iterate through all the children upto the | ||
| 734 | * previous sibling node to recalculate its position. | ||
| 735 | */ | ||
| 736 | bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self); | ||
| 737 | |||
| 738 | /** | ||
| 739 | * Move the cursor to the first child of its current node. | ||
| 740 | * | ||
| 741 | * This returns `true` if the cursor successfully moved, and returns `false` | ||
| 742 | * if there were no children. | ||
| 743 | */ | ||
| 744 | bool ts_tree_cursor_goto_first_child(TSTreeCursor *self); | ||
| 745 | |||
| 746 | /** | ||
| 747 | * Move the cursor to the last child of its current node. | ||
| 748 | * | ||
| 749 | * This returns `true` if the cursor successfully moved, and returns `false` if | ||
| 750 | * there were no children. | ||
| 751 | * | ||
| 752 | * Note that this function may be slower than [`ts_tree_cursor_goto_first_child`] | ||
| 753 | * because it needs to iterate through all the children to compute the child's | ||
| 754 | * position. | ||
| 755 | */ | ||
| 756 | bool ts_tree_cursor_goto_last_child(TSTreeCursor *self); | ||
| 757 | |||
| 758 | /** | ||
| 759 | * Move the cursor to the node that is the nth descendant of | ||
| 760 | * the original node that the cursor was constructed with, where | ||
| 761 | * zero represents the original node itself. | ||
| 762 | */ | ||
| 763 | void ts_tree_cursor_goto_descendant(TSTreeCursor *self, uint32_t goal_descendant_index); | ||
| 764 | |||
| 765 | /** | ||
| 766 | * Get the index of the cursor's current node out of all of the | ||
| 767 | * descendants of the original node that the cursor was constructed with. | ||
| 768 | */ | ||
| 769 | uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *self); | ||
| 770 | |||
| 771 | /** | ||
| 772 | * Get the depth of the cursor's current node relative to the original | ||
| 773 | * node that the cursor was constructed with. | ||
| 774 | */ | ||
| 775 | uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *self); | ||
| 776 | |||
| 777 | /** | ||
| 778 | * Move the cursor to the first child of its current node that extends beyond | ||
| 779 | * the given byte offset or point. | ||
| 780 | * | ||
| 781 | * This returns the index of the child node if one was found, and returns -1 | ||
| 782 | * if no such child was found. | ||
| 783 | */ | ||
| 784 | int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte); | ||
| 785 | int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point); | ||
| 786 | |||
| 787 | TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *cursor); | ||
| 788 | |||
| 789 | /*******************/ | ||
| 790 | /* Section - Query */ | ||
| 791 | /*******************/ | ||
| 792 | |||
| 793 | /** | ||
| 794 | * Create a new query from a string containing one or more S-expression | ||
| 795 | * patterns. The query is associated with a particular language, and can | ||
| 796 | * only be run on syntax nodes parsed with that language. | ||
| 797 | * | ||
| 798 | * If all of the given patterns are valid, this returns a [`TSQuery`]. | ||
| 799 | * If a pattern is invalid, this returns `NULL`, and provides two pieces | ||
| 800 | * of information about the problem: | ||
| 801 | * 1. The byte offset of the error is written to the `error_offset` parameter. | ||
| 802 | * 2. The type of error is written to the `error_type` parameter. | ||
| 803 | */ | ||
| 804 | TSQuery *ts_query_new( | ||
| 805 | const TSLanguage *language, | ||
| 806 | const char *source, | ||
| 807 | uint32_t source_len, | ||
| 808 | uint32_t *error_offset, | ||
| 809 | TSQueryError *error_type | ||
| 810 | ); | ||
| 811 | |||
| 812 | /** | ||
| 813 | * Delete a query, freeing all of the memory that it used. | ||
| 814 | */ | ||
| 815 | void ts_query_delete(TSQuery *self); | ||
| 816 | |||
| 817 | /** | ||
| 818 | * Get the number of patterns, captures, or string literals in the query. | ||
| 819 | */ | ||
| 820 | uint32_t ts_query_pattern_count(const TSQuery *self); | ||
| 821 | uint32_t ts_query_capture_count(const TSQuery *self); | ||
| 822 | uint32_t ts_query_string_count(const TSQuery *self); | ||
| 823 | |||
| 824 | /** | ||
| 825 | * Get the byte offset where the given pattern starts in the query's source. | ||
| 826 | * | ||
| 827 | * This can be useful when combining queries by concatenating their source | ||
| 828 | * code strings. | ||
| 829 | */ | ||
| 830 | uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_index); | ||
| 831 | |||
| 832 | /** | ||
| 833 | * Get all of the predicates for the given pattern in the query. | ||
| 834 | * | ||
| 835 | * The predicates are represented as a single array of steps. There are three | ||
| 836 | * types of steps in this array, which correspond to the three legal values for | ||
| 837 | * the `type` field: | ||
| 838 | * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names | ||
| 839 | * of captures. Their `value_id` can be used with the | ||
| 840 | * [`ts_query_capture_name_for_id`] function to obtain the name of the capture. | ||
| 841 | * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal | ||
| 842 | * strings. Their `value_id` can be used with the | ||
| 843 | * [`ts_query_string_value_for_id`] function to obtain their string value. | ||
| 844 | * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels* | ||
| 845 | * that represent the end of an individual predicate. If a pattern has two | ||
| 846 | * predicates, then there will be two steps with this `type` in the array. | ||
| 847 | */ | ||
| 848 | const TSQueryPredicateStep *ts_query_predicates_for_pattern( | ||
| 849 | const TSQuery *self, | ||
| 850 | uint32_t pattern_index, | ||
| 851 | uint32_t *step_count | ||
| 852 | ); | ||
| 853 | |||
| 854 | /* | ||
| 855 | * Check if the given pattern in the query has a single root node. | ||
| 856 | */ | ||
| 857 | bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index); | ||
| 858 | |||
| 859 | /* | ||
| 860 | * Check if the given pattern in the query is 'non local'. | ||
| 861 | * | ||
| 862 | * A non-local pattern has multiple root nodes and can match within a | ||
| 863 | * repeating sequence of nodes, as specified by the grammar. Non-local | ||
| 864 | * patterns disable certain optimizations that would otherwise be possible | ||
| 865 | * when executing a query on a specific range of a syntax tree. | ||
| 866 | */ | ||
| 867 | bool ts_query_is_pattern_non_local(const TSQuery *self, uint32_t pattern_index); | ||
| 868 | |||
| 869 | /* | ||
| 870 | * Check if a given pattern is guaranteed to match once a given step is reached. | ||
| 871 | * The step is specified by its byte offset in the query's source code. | ||
| 872 | */ | ||
| 873 | bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_offset); | ||
| 874 | |||
| 875 | /** | ||
| 876 | * Get the name and length of one of the query's captures, or one of the | ||
| 877 | * query's string literals. Each capture and string is associated with a | ||
| 878 | * numeric id based on the order that it appeared in the query's source. | ||
| 879 | */ | ||
| 880 | const char *ts_query_capture_name_for_id( | ||
| 881 | const TSQuery *self, | ||
| 882 | uint32_t index, | ||
| 883 | uint32_t *length | ||
| 884 | ); | ||
| 885 | |||
| 886 | /** | ||
| 887 | * Get the quantifier of the query's captures. Each capture is * associated | ||
| 888 | * with a numeric id based on the order that it appeared in the query's source. | ||
| 889 | */ | ||
| 890 | TSQuantifier ts_query_capture_quantifier_for_id( | ||
| 891 | const TSQuery *self, | ||
| 892 | uint32_t pattern_index, | ||
| 893 | uint32_t capture_index | ||
| 894 | ); | ||
| 895 | |||
| 896 | const char *ts_query_string_value_for_id( | ||
| 897 | const TSQuery *self, | ||
| 898 | uint32_t index, | ||
| 899 | uint32_t *length | ||
| 900 | ); | ||
| 901 | |||
| 902 | /** | ||
| 903 | * Disable a certain capture within a query. | ||
| 904 | * | ||
| 905 | * This prevents the capture from being returned in matches, and also avoids | ||
| 906 | * any resource usage associated with recording the capture. Currently, there | ||
| 907 | * is no way to undo this. | ||
| 908 | */ | ||
| 909 | void ts_query_disable_capture(TSQuery *self, const char *name, uint32_t length); | ||
| 910 | |||
| 911 | /** | ||
| 912 | * Disable a certain pattern within a query. | ||
| 913 | * | ||
| 914 | * This prevents the pattern from matching and removes most of the overhead | ||
| 915 | * associated with the pattern. Currently, there is no way to undo this. | ||
| 916 | */ | ||
| 917 | void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index); | ||
| 918 | |||
| 919 | /** | ||
| 920 | * Create a new cursor for executing a given query. | ||
| 921 | * | ||
| 922 | * The cursor stores the state that is needed to iteratively search | ||
| 923 | * for matches. To use the query cursor, first call [`ts_query_cursor_exec`] | ||
| 924 | * to start running a given query on a given syntax node. Then, there are | ||
| 925 | * two options for consuming the results of the query: | ||
| 926 | * 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the | ||
| 927 | * *matches* in the order that they were found. Each match contains the | ||
| 928 | * index of the pattern that matched, and an array of captures. Because | ||
| 929 | * multiple patterns can match the same set of nodes, one match may contain | ||
| 930 | * captures that appear *before* some of the captures from a previous match. | ||
| 931 | * 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the | ||
| 932 | * individual *captures* in the order that they appear. This is useful if | ||
| 933 | * don't care about which pattern matched, and just want a single ordered | ||
| 934 | * sequence of captures. | ||
| 935 | * | ||
| 936 | * If you don't care about consuming all of the results, you can stop calling | ||
| 937 | * [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point. | ||
| 938 | * You can then start executing another query on another node by calling | ||
| 939 | * [`ts_query_cursor_exec`] again. | ||
| 940 | */ | ||
| 941 | TSQueryCursor *ts_query_cursor_new(void); | ||
| 942 | |||
| 943 | /** | ||
| 944 | * Delete a query cursor, freeing all of the memory that it used. | ||
| 945 | */ | ||
| 946 | void ts_query_cursor_delete(TSQueryCursor *self); | ||
| 947 | |||
| 948 | /** | ||
| 949 | * Start running a given query on a given node. | ||
| 950 | */ | ||
| 951 | void ts_query_cursor_exec(TSQueryCursor *self, const TSQuery *query, TSNode node); | ||
| 952 | |||
| 953 | /** | ||
| 954 | * Manage the maximum number of in-progress matches allowed by this query | ||
| 955 | * cursor. | ||
| 956 | * | ||
| 957 | * Query cursors have an optional maximum capacity for storing lists of | ||
| 958 | * in-progress captures. If this capacity is exceeded, then the | ||
| 959 | * earliest-starting match will silently be dropped to make room for further | ||
| 960 | * matches. This maximum capacity is optional — by default, query cursors allow | ||
| 961 | * any number of pending matches, dynamically allocating new space for them as | ||
| 962 | * needed as the query is executed. | ||
| 963 | */ | ||
| 964 | bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self); | ||
| 965 | uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self); | ||
| 966 | void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit); | ||
| 967 | |||
| 968 | /** | ||
| 969 | * Set the range of bytes or (row, column) positions in which the query | ||
| 970 | * will be executed. | ||
| 971 | */ | ||
| 972 | void ts_query_cursor_set_byte_range(TSQueryCursor *self, uint32_t start_byte, uint32_t end_byte); | ||
| 973 | void ts_query_cursor_set_point_range(TSQueryCursor *self, TSPoint start_point, TSPoint end_point); | ||
| 974 | |||
| 975 | /** | ||
| 976 | * Advance to the next match of the currently running query. | ||
| 977 | * | ||
| 978 | * If there is a match, write it to `*match` and return `true`. | ||
| 979 | * Otherwise, return `false`. | ||
| 980 | */ | ||
| 981 | bool ts_query_cursor_next_match(TSQueryCursor *self, TSQueryMatch *match); | ||
| 982 | void ts_query_cursor_remove_match(TSQueryCursor *self, uint32_t match_id); | ||
| 983 | |||
| 984 | /** | ||
| 985 | * Advance to the next capture of the currently running query. | ||
| 986 | * | ||
| 987 | * If there is a capture, write its match to `*match` and its index within | ||
| 988 | * the matche's capture list to `*capture_index`. Otherwise, return `false`. | ||
| 989 | */ | ||
| 990 | bool ts_query_cursor_next_capture( | ||
| 991 | TSQueryCursor *self, | ||
| 992 | TSQueryMatch *match, | ||
| 993 | uint32_t *capture_index | ||
| 994 | ); | ||
| 995 | |||
| 996 | /** | ||
| 997 | * Set the maximum start depth for a query cursor. | ||
| 998 | * | ||
| 999 | * This prevents cursors from exploring children nodes at a certain depth. | ||
| 1000 | * Note if a pattern includes many children, then they will still be checked. | ||
| 1001 | * | ||
| 1002 | * The zero max start depth value can be used as a special behavior and | ||
| 1003 | * it helps to destructure a subtree by staying on a node and using captures | ||
| 1004 | * for interested parts. Note that the zero max start depth only limit a search | ||
| 1005 | * depth for a pattern's root node but other nodes that are parts of the pattern | ||
| 1006 | * may be searched at any depth what defined by the pattern structure. | ||
| 1007 | * | ||
| 1008 | * Set to `UINT32_MAX` to remove the maximum start depth. | ||
| 1009 | */ | ||
| 1010 | void ts_query_cursor_set_max_start_depth(TSQueryCursor *self, uint32_t max_start_depth); | ||
| 1011 | |||
| 1012 | /**********************/ | ||
| 1013 | /* Section - Language */ | ||
| 1014 | /**********************/ | ||
| 1015 | |||
| 1016 | /** | ||
| 1017 | * Get the number of distinct node types in the language. | ||
| 1018 | */ | ||
| 1019 | uint32_t ts_language_symbol_count(const TSLanguage *self); | ||
| 1020 | |||
| 1021 | /** | ||
| 1022 | * Get the number of valid states in this language. | ||
| 1023 | */ | ||
| 1024 | uint32_t ts_language_state_count(const TSLanguage *self); | ||
| 1025 | |||
| 1026 | /** | ||
| 1027 | * Get a node type string for the given numerical id. | ||
| 1028 | */ | ||
| 1029 | const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol); | ||
| 1030 | |||
| 1031 | /** | ||
| 1032 | * Get the numerical id for the given node type string. | ||
| 1033 | */ | ||
| 1034 | TSSymbol ts_language_symbol_for_name( | ||
| 1035 | const TSLanguage *self, | ||
| 1036 | const char *string, | ||
| 1037 | uint32_t length, | ||
| 1038 | bool is_named | ||
| 1039 | ); | ||
| 1040 | |||
| 1041 | /** | ||
| 1042 | * Get the number of distinct field names in the language. | ||
| 1043 | */ | ||
| 1044 | uint32_t ts_language_field_count(const TSLanguage *self); | ||
| 1045 | |||
| 1046 | /** | ||
| 1047 | * Get the field name string for the given numerical id. | ||
| 1048 | */ | ||
| 1049 | const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id); | ||
| 1050 | |||
| 1051 | /** | ||
| 1052 | * Get the numerical id for the given field name string. | ||
| 1053 | */ | ||
| 1054 | TSFieldId ts_language_field_id_for_name(const TSLanguage *self, const char *name, uint32_t name_length); | ||
| 1055 | |||
| 1056 | /** | ||
| 1057 | * Check whether the given node type id belongs to named nodes, anonymous nodes, | ||
| 1058 | * or a hidden nodes. | ||
| 1059 | * | ||
| 1060 | * See also [`ts_node_is_named`]. Hidden nodes are never returned from the API. | ||
| 1061 | */ | ||
| 1062 | TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol); | ||
| 1063 | |||
| 1064 | /** | ||
| 1065 | * Get the ABI version number for this language. This version number is used | ||
| 1066 | * to ensure that languages were generated by a compatible version of | ||
| 1067 | * Tree-sitter. | ||
| 1068 | * | ||
| 1069 | * See also [`ts_parser_set_language`]. | ||
| 1070 | */ | ||
| 1071 | uint32_t ts_language_version(const TSLanguage *self); | ||
| 1072 | |||
| 1073 | /** | ||
| 1074 | * Get the next parse state. Combine this with lookahead iterators to generate | ||
| 1075 | * completion suggestions or valid symbols in error nodes. Use | ||
| 1076 | * [`ts_node_grammar_symbol`] for valid symbols. | ||
| 1077 | */ | ||
| 1078 | TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); | ||
| 1079 | |||
| 1080 | /********************************/ | ||
| 1081 | /* Section - Lookahead Iterator */ | ||
| 1082 | /********************************/ | ||
| 1083 | |||
| 1084 | /** | ||
| 1085 | * Create a new lookahead iterator for the given language and parse state. | ||
| 1086 | * | ||
| 1087 | * This returns `NULL` if state is invalid for the language. | ||
| 1088 | * | ||
| 1089 | * Repeatedly using [`ts_lookahead_iterator_next`] and | ||
| 1090 | * [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the | ||
| 1091 | * given parse state. Newly created lookahead iterators will contain the `ERROR` | ||
| 1092 | * symbol. | ||
| 1093 | * | ||
| 1094 | * Lookahead iterators can be useful to generate suggestions and improve syntax | ||
| 1095 | * error diagnostics. To get symbols valid in an ERROR node, use the lookahead | ||
| 1096 | * iterator on its first leaf node state. For `MISSING` nodes, a lookahead | ||
| 1097 | * iterator created on the previous non-extra leaf node may be appropriate. | ||
| 1098 | */ | ||
| 1099 | TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state); | ||
| 1100 | |||
| 1101 | /** | ||
| 1102 | * Delete a lookahead iterator freeing all the memory used. | ||
| 1103 | */ | ||
| 1104 | void ts_lookahead_iterator_delete(TSLookaheadIterator *self); | ||
| 1105 | |||
| 1106 | /** | ||
| 1107 | * Reset the lookahead iterator to another state. | ||
| 1108 | * | ||
| 1109 | * This returns `true` if the iterator was reset to the given state and `false` | ||
| 1110 | * otherwise. | ||
| 1111 | */ | ||
| 1112 | bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, TSStateId state); | ||
| 1113 | |||
| 1114 | /** | ||
| 1115 | * Reset the lookahead iterator. | ||
| 1116 | * | ||
| 1117 | * This returns `true` if the language was set successfully and `false` | ||
| 1118 | * otherwise. | ||
| 1119 | */ | ||
| 1120 | bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state); | ||
| 1121 | |||
| 1122 | /** | ||
| 1123 | * Get the current language of the lookahead iterator. | ||
| 1124 | */ | ||
| 1125 | const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self); | ||
| 1126 | |||
| 1127 | /** | ||
| 1128 | * Advance the lookahead iterator to the next symbol. | ||
| 1129 | * | ||
| 1130 | * This returns `true` if there is a new symbol and `false` otherwise. | ||
| 1131 | */ | ||
| 1132 | bool ts_lookahead_iterator_next(TSLookaheadIterator *self); | ||
| 1133 | |||
| 1134 | /** | ||
| 1135 | * Get the current symbol of the lookahead iterator; | ||
| 1136 | */ | ||
| 1137 | TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self); | ||
| 1138 | |||
| 1139 | /** | ||
| 1140 | * Get the current symbol type of the lookahead iterator as a null terminated | ||
| 1141 | * string. | ||
| 1142 | */ | ||
| 1143 | const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self); | ||
| 1144 | |||
| 1145 | /**********************************/ | ||
| 1146 | /* Section - Global Configuration */ | ||
| 1147 | /**********************************/ | ||
| 1148 | |||
| 1149 | /** | ||
| 1150 | * Set the allocation functions used by the library. | ||
| 1151 | * | ||
| 1152 | * By default, Tree-sitter uses the standard libc allocation functions, | ||
| 1153 | * but aborts the process when an allocation fails. This function lets | ||
| 1154 | * you supply alternative allocation functions at runtime. | ||
| 1155 | * | ||
| 1156 | * If you pass `NULL` for any parameter, Tree-sitter will switch back to | ||
| 1157 | * its default implementation of that function. | ||
| 1158 | * | ||
| 1159 | * If you call this function after the library has already been used, then | ||
| 1160 | * you must ensure that either: | ||
| 1161 | * 1. All the existing objects have been freed. | ||
| 1162 | * 2. The new allocator shares its state with the old one, so it is capable | ||
| 1163 | * of freeing memory that was allocated by the old allocator. | ||
| 1164 | */ | ||
| 1165 | void ts_set_allocator( | ||
| 1166 | void *(*new_malloc)(size_t), | ||
| 1167 | void *(*new_calloc)(size_t, size_t), | ||
| 1168 | void *(*new_realloc)(void *, size_t), | ||
| 1169 | void (*new_free)(void *) | ||
| 1170 | ); | ||
| 1171 | |||
| 1172 | #ifdef __cplusplus | ||
| 1173 | } | ||
| 1174 | #endif | ||
| 1175 | |||
| 1176 | #if defined(__GNUC__) || defined(__clang__) | ||
| 1177 | #pragma GCC visibility pop | ||
| 1178 | #endif | ||
| 1179 | |||
| 1180 | #endif // TREE_SITTER_API_H_ | ||
diff --git a/vendor/tree-sitter/lib/include/tree_sitter/parser.h b/vendor/tree-sitter/lib/include/tree_sitter/parser.h new file mode 100644 index 0000000..d210325 --- /dev/null +++ b/vendor/tree-sitter/lib/include/tree_sitter/parser.h | |||
| @@ -0,0 +1,224 @@ | |||
| 1 | #ifndef TREE_SITTER_PARSER_H_ | ||
| 2 | #define TREE_SITTER_PARSER_H_ | ||
| 3 | |||
| 4 | #ifdef __cplusplus | ||
| 5 | extern "C" { | ||
| 6 | #endif | ||
| 7 | |||
| 8 | #include <stdbool.h> | ||
| 9 | #include <stdint.h> | ||
| 10 | #include <stdlib.h> | ||
| 11 | |||
| 12 | #define ts_builtin_sym_error ((TSSymbol)-1) | ||
| 13 | #define ts_builtin_sym_end 0 | ||
| 14 | #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 | ||
| 15 | |||
| 16 | #ifndef TREE_SITTER_API_H_ | ||
| 17 | typedef uint16_t TSStateId; | ||
| 18 | typedef uint16_t TSSymbol; | ||
| 19 | typedef uint16_t TSFieldId; | ||
| 20 | typedef struct TSLanguage TSLanguage; | ||
| 21 | #endif | ||
| 22 | |||
| 23 | typedef struct { | ||
| 24 | TSFieldId field_id; | ||
| 25 | uint8_t child_index; | ||
| 26 | bool inherited; | ||
| 27 | } TSFieldMapEntry; | ||
| 28 | |||
| 29 | typedef struct { | ||
| 30 | uint16_t index; | ||
| 31 | uint16_t length; | ||
| 32 | } TSFieldMapSlice; | ||
| 33 | |||
| 34 | typedef struct { | ||
| 35 | bool visible; | ||
| 36 | bool named; | ||
| 37 | bool supertype; | ||
| 38 | } TSSymbolMetadata; | ||
| 39 | |||
| 40 | typedef struct TSLexer TSLexer; | ||
| 41 | |||
| 42 | struct TSLexer { | ||
| 43 | int32_t lookahead; | ||
| 44 | TSSymbol result_symbol; | ||
| 45 | void (*advance)(TSLexer *, bool); | ||
| 46 | void (*mark_end)(TSLexer *); | ||
| 47 | uint32_t (*get_column)(TSLexer *); | ||
| 48 | bool (*is_at_included_range_start)(const TSLexer *); | ||
| 49 | bool (*eof)(const TSLexer *); | ||
| 50 | }; | ||
| 51 | |||
| 52 | typedef enum { | ||
| 53 | TSParseActionTypeShift, | ||
| 54 | TSParseActionTypeReduce, | ||
| 55 | TSParseActionTypeAccept, | ||
| 56 | TSParseActionTypeRecover, | ||
| 57 | } TSParseActionType; | ||
| 58 | |||
| 59 | typedef union { | ||
| 60 | struct { | ||
| 61 | uint8_t type; | ||
| 62 | TSStateId state; | ||
| 63 | bool extra; | ||
| 64 | bool repetition; | ||
| 65 | } shift; | ||
| 66 | struct { | ||
| 67 | uint8_t type; | ||
| 68 | uint8_t child_count; | ||
| 69 | TSSymbol symbol; | ||
| 70 | int16_t dynamic_precedence; | ||
| 71 | uint16_t production_id; | ||
| 72 | } reduce; | ||
| 73 | uint8_t type; | ||
| 74 | } TSParseAction; | ||
| 75 | |||
| 76 | typedef struct { | ||
| 77 | uint16_t lex_state; | ||
| 78 | uint16_t external_lex_state; | ||
| 79 | } TSLexMode; | ||
| 80 | |||
| 81 | typedef union { | ||
| 82 | TSParseAction action; | ||
| 83 | struct { | ||
| 84 | uint8_t count; | ||
| 85 | bool reusable; | ||
| 86 | } entry; | ||
| 87 | } TSParseActionEntry; | ||
| 88 | |||
| 89 | struct TSLanguage { | ||
| 90 | uint32_t version; | ||
| 91 | uint32_t symbol_count; | ||
| 92 | uint32_t alias_count; | ||
| 93 | uint32_t token_count; | ||
| 94 | uint32_t external_token_count; | ||
| 95 | uint32_t state_count; | ||
| 96 | uint32_t large_state_count; | ||
| 97 | uint32_t production_id_count; | ||
| 98 | uint32_t field_count; | ||
| 99 | uint16_t max_alias_sequence_length; | ||
| 100 | const uint16_t *parse_table; | ||
| 101 | const uint16_t *small_parse_table; | ||
| 102 | const uint32_t *small_parse_table_map; | ||
| 103 | const TSParseActionEntry *parse_actions; | ||
| 104 | const char * const *symbol_names; | ||
| 105 | const char * const *field_names; | ||
| 106 | const TSFieldMapSlice *field_map_slices; | ||
| 107 | const TSFieldMapEntry *field_map_entries; | ||
| 108 | const TSSymbolMetadata *symbol_metadata; | ||
| 109 | const TSSymbol *public_symbol_map; | ||
| 110 | const uint16_t *alias_map; | ||
| 111 | const TSSymbol *alias_sequences; | ||
| 112 | const TSLexMode *lex_modes; | ||
| 113 | bool (*lex_fn)(TSLexer *, TSStateId); | ||
| 114 | bool (*keyword_lex_fn)(TSLexer *, TSStateId); | ||
| 115 | TSSymbol keyword_capture_token; | ||
| 116 | struct { | ||
| 117 | const bool *states; | ||
| 118 | const TSSymbol *symbol_map; | ||
| 119 | void *(*create)(void); | ||
| 120 | void (*destroy)(void *); | ||
| 121 | bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); | ||
| 122 | unsigned (*serialize)(void *, char *); | ||
| 123 | void (*deserialize)(void *, const char *, unsigned); | ||
| 124 | } external_scanner; | ||
| 125 | const TSStateId *primary_state_ids; | ||
| 126 | }; | ||
| 127 | |||
| 128 | /* | ||
| 129 | * Lexer Macros | ||
| 130 | */ | ||
| 131 | |||
| 132 | #define START_LEXER() \ | ||
| 133 | bool result = false; \ | ||
| 134 | bool skip = false; \ | ||
| 135 | bool eof = false; \ | ||
| 136 | int32_t lookahead; \ | ||
| 137 | goto start; \ | ||
| 138 | next_state: \ | ||
| 139 | lexer->advance(lexer, skip); \ | ||
| 140 | start: \ | ||
| 141 | skip = false; \ | ||
| 142 | lookahead = lexer->lookahead; \ | ||
| 143 | eof = lexer->eof(lexer); | ||
| 144 | |||
| 145 | #define ADVANCE(state_value) \ | ||
| 146 | { \ | ||
| 147 | state = state_value; \ | ||
| 148 | goto next_state; \ | ||
| 149 | } | ||
| 150 | |||
| 151 | #define SKIP(state_value) \ | ||
| 152 | { \ | ||
| 153 | skip = true; \ | ||
| 154 | state = state_value; \ | ||
| 155 | goto next_state; \ | ||
| 156 | } | ||
| 157 | |||
| 158 | #define ACCEPT_TOKEN(symbol_value) \ | ||
| 159 | result = true; \ | ||
| 160 | lexer->result_symbol = symbol_value; \ | ||
| 161 | lexer->mark_end(lexer); | ||
| 162 | |||
| 163 | #define END_STATE() return result; | ||
| 164 | |||
| 165 | /* | ||
| 166 | * Parse Table Macros | ||
| 167 | */ | ||
| 168 | |||
| 169 | #define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) | ||
| 170 | |||
| 171 | #define STATE(id) id | ||
| 172 | |||
| 173 | #define ACTIONS(id) id | ||
| 174 | |||
| 175 | #define SHIFT(state_value) \ | ||
| 176 | {{ \ | ||
| 177 | .shift = { \ | ||
| 178 | .type = TSParseActionTypeShift, \ | ||
| 179 | .state = (state_value) \ | ||
| 180 | } \ | ||
| 181 | }} | ||
| 182 | |||
| 183 | #define SHIFT_REPEAT(state_value) \ | ||
| 184 | {{ \ | ||
| 185 | .shift = { \ | ||
| 186 | .type = TSParseActionTypeShift, \ | ||
| 187 | .state = (state_value), \ | ||
| 188 | .repetition = true \ | ||
| 189 | } \ | ||
| 190 | }} | ||
| 191 | |||
| 192 | #define SHIFT_EXTRA() \ | ||
| 193 | {{ \ | ||
| 194 | .shift = { \ | ||
| 195 | .type = TSParseActionTypeShift, \ | ||
| 196 | .extra = true \ | ||
| 197 | } \ | ||
| 198 | }} | ||
| 199 | |||
| 200 | #define REDUCE(symbol_val, child_count_val, ...) \ | ||
| 201 | {{ \ | ||
| 202 | .reduce = { \ | ||
| 203 | .type = TSParseActionTypeReduce, \ | ||
| 204 | .symbol = symbol_val, \ | ||
| 205 | .child_count = child_count_val, \ | ||
| 206 | __VA_ARGS__ \ | ||
| 207 | }, \ | ||
| 208 | }} | ||
| 209 | |||
| 210 | #define RECOVER() \ | ||
| 211 | {{ \ | ||
| 212 | .type = TSParseActionTypeRecover \ | ||
| 213 | }} | ||
| 214 | |||
| 215 | #define ACCEPT_INPUT() \ | ||
| 216 | {{ \ | ||
| 217 | .type = TSParseActionTypeAccept \ | ||
| 218 | }} | ||
| 219 | |||
| 220 | #ifdef __cplusplus | ||
| 221 | } | ||
| 222 | #endif | ||
| 223 | |||
| 224 | #endif // TREE_SITTER_PARSER_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/alloc.c b/vendor/tree-sitter/lib/src/alloc.c new file mode 100644 index 0000000..78b8057 --- /dev/null +++ b/vendor/tree-sitter/lib/src/alloc.c | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | #include "alloc.h" | ||
| 2 | #include <stdlib.h> | ||
| 3 | |||
| 4 | static void *ts_malloc_default(size_t size) { | ||
| 5 | void *result = malloc(size); | ||
| 6 | if (size > 0 && !result) { | ||
| 7 | fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); | ||
| 8 | abort(); | ||
| 9 | } | ||
| 10 | return result; | ||
| 11 | } | ||
| 12 | |||
| 13 | static void *ts_calloc_default(size_t count, size_t size) { | ||
| 14 | void *result = calloc(count, size); | ||
| 15 | if (count > 0 && !result) { | ||
| 16 | fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); | ||
| 17 | abort(); | ||
| 18 | } | ||
| 19 | return result; | ||
| 20 | } | ||
| 21 | |||
| 22 | static void *ts_realloc_default(void *buffer, size_t size) { | ||
| 23 | void *result = realloc(buffer, size); | ||
| 24 | if (size > 0 && !result) { | ||
| 25 | fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); | ||
| 26 | abort(); | ||
| 27 | } | ||
| 28 | return result; | ||
| 29 | } | ||
| 30 | |||
| 31 | // Allow clients to override allocation functions dynamically | ||
| 32 | void *(*ts_current_malloc)(size_t) = ts_malloc_default; | ||
| 33 | void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default; | ||
| 34 | void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default; | ||
| 35 | void (*ts_current_free)(void *) = free; | ||
| 36 | |||
| 37 | void ts_set_allocator( | ||
| 38 | void *(*new_malloc)(size_t size), | ||
| 39 | void *(*new_calloc)(size_t count, size_t size), | ||
| 40 | void *(*new_realloc)(void *ptr, size_t size), | ||
| 41 | void (*new_free)(void *ptr) | ||
| 42 | ) { | ||
| 43 | ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default; | ||
| 44 | ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default; | ||
| 45 | ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default; | ||
| 46 | ts_current_free = new_free ? new_free : free; | ||
| 47 | } | ||
| 48 | |||
diff --git a/vendor/tree-sitter/lib/src/alloc.h b/vendor/tree-sitter/lib/src/alloc.h new file mode 100644 index 0000000..c51f84a --- /dev/null +++ b/vendor/tree-sitter/lib/src/alloc.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | #ifndef TREE_SITTER_ALLOC_H_ | ||
| 2 | #define TREE_SITTER_ALLOC_H_ | ||
| 3 | |||
| 4 | #include "tree_sitter/api.h" | ||
| 5 | |||
| 6 | #ifdef __cplusplus | ||
| 7 | extern "C" { | ||
| 8 | #endif | ||
| 9 | |||
| 10 | #include <stdlib.h> | ||
| 11 | #include <stdbool.h> | ||
| 12 | #include <stdio.h> | ||
| 13 | |||
| 14 | extern void *(*ts_current_malloc)(size_t); | ||
| 15 | extern void *(*ts_current_calloc)(size_t, size_t); | ||
| 16 | extern void *(*ts_current_realloc)(void *, size_t); | ||
| 17 | extern void (*ts_current_free)(void *); | ||
| 18 | |||
| 19 | // Allow clients to override allocation functions | ||
| 20 | #ifndef ts_malloc | ||
| 21 | #define ts_malloc ts_current_malloc | ||
| 22 | #endif | ||
| 23 | #ifndef ts_calloc | ||
| 24 | #define ts_calloc ts_current_calloc | ||
| 25 | #endif | ||
| 26 | #ifndef ts_realloc | ||
| 27 | #define ts_realloc ts_current_realloc | ||
| 28 | #endif | ||
| 29 | #ifndef ts_free | ||
| 30 | #define ts_free ts_current_free | ||
| 31 | #endif | ||
| 32 | |||
| 33 | #ifdef __cplusplus | ||
| 34 | } | ||
| 35 | #endif | ||
| 36 | |||
| 37 | #endif // TREE_SITTER_ALLOC_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/array.h b/vendor/tree-sitter/lib/src/array.h new file mode 100644 index 0000000..e026f6b --- /dev/null +++ b/vendor/tree-sitter/lib/src/array.h | |||
| @@ -0,0 +1,249 @@ | |||
| 1 | #ifndef TREE_SITTER_ARRAY_H_ | ||
| 2 | #define TREE_SITTER_ARRAY_H_ | ||
| 3 | |||
| 4 | #ifdef __cplusplus | ||
| 5 | extern "C" { | ||
| 6 | #endif | ||
| 7 | |||
| 8 | #include <string.h> | ||
| 9 | #include <stdlib.h> | ||
| 10 | #include <stdint.h> | ||
| 11 | #include <assert.h> | ||
| 12 | #include <stdbool.h> | ||
| 13 | #include "./alloc.h" | ||
| 14 | |||
| 15 | #define Array(T) \ | ||
| 16 | struct { \ | ||
| 17 | T *contents; \ | ||
| 18 | uint32_t size; \ | ||
| 19 | uint32_t capacity; \ | ||
| 20 | } | ||
| 21 | |||
| 22 | #define array_init(self) \ | ||
| 23 | ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) | ||
| 24 | |||
| 25 | #define array_new() \ | ||
| 26 | { NULL, 0, 0 } | ||
| 27 | |||
| 28 | #define array_get(self, _index) \ | ||
| 29 | (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) | ||
| 30 | |||
| 31 | #define array_front(self) array_get(self, 0) | ||
| 32 | |||
| 33 | #define array_back(self) array_get(self, (self)->size - 1) | ||
| 34 | |||
| 35 | #define array_clear(self) ((self)->size = 0) | ||
| 36 | |||
| 37 | #define array_reserve(self, new_capacity) \ | ||
| 38 | array__reserve((VoidArray *)(self), array__elem_size(self), new_capacity) | ||
| 39 | |||
| 40 | // Free any memory allocated for this array. | ||
| 41 | #define array_delete(self) array__delete((VoidArray *)(self)) | ||
| 42 | |||
| 43 | #define array_push(self, element) \ | ||
| 44 | (array__grow((VoidArray *)(self), 1, array__elem_size(self)), \ | ||
| 45 | (self)->contents[(self)->size++] = (element)) | ||
| 46 | |||
| 47 | // Increase the array's size by a given number of elements, reallocating | ||
| 48 | // if necessary. New elements are zero-initialized. | ||
| 49 | #define array_grow_by(self, count) \ | ||
| 50 | (array__grow((VoidArray *)(self), count, array__elem_size(self)), \ | ||
| 51 | memset((self)->contents + (self)->size, 0, (count) * array__elem_size(self)), \ | ||
| 52 | (self)->size += (count)) | ||
| 53 | |||
| 54 | #define array_push_all(self, other) \ | ||
| 55 | array_extend((self), (other)->size, (other)->contents) | ||
| 56 | |||
| 57 | // Append `count` elements to the end of the array, reading their values from the | ||
| 58 | // `contents` pointer. | ||
| 59 | #define array_extend(self, count, contents) \ | ||
| 60 | array__splice( \ | ||
| 61 | (VoidArray *)(self), array__elem_size(self), (self)->size, \ | ||
| 62 | 0, count, contents \ | ||
| 63 | ) | ||
| 64 | |||
| 65 | // Remove `old_count` elements from the array starting at the given `index`. At | ||
| 66 | // the same index, insert `new_count` new elements, reading their values from the | ||
| 67 | // `new_contents` pointer. | ||
| 68 | #define array_splice(self, _index, old_count, new_count, new_contents) \ | ||
| 69 | array__splice( \ | ||
| 70 | (VoidArray *)(self), array__elem_size(self), _index, \ | ||
| 71 | old_count, new_count, new_contents \ | ||
| 72 | ) | ||
| 73 | |||
| 74 | // Insert one `element` into the array at the given `index`. | ||
| 75 | #define array_insert(self, _index, element) \ | ||
| 76 | array__splice((VoidArray *)(self), array__elem_size(self), _index, 0, 1, &(element)) | ||
| 77 | |||
| 78 | // Remove one `element` from the array at the given `index`. | ||
| 79 | #define array_erase(self, _index) \ | ||
| 80 | array__erase((VoidArray *)(self), array__elem_size(self), _index) | ||
| 81 | |||
| 82 | #define array_pop(self) ((self)->contents[--(self)->size]) | ||
| 83 | |||
| 84 | #define array_assign(self, other) \ | ||
| 85 | array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self)) | ||
| 86 | |||
| 87 | #define array_swap(self, other) \ | ||
| 88 | array__swap((VoidArray *)(self), (VoidArray *)(other)) | ||
| 89 | |||
| 90 | // Search a sorted array for a given `needle` value, using the given `compare` | ||
| 91 | // callback to determine the order. | ||
| 92 | // | ||
| 93 | // If an existing element is found to be equal to `needle`, then the `index` | ||
| 94 | // out-parameter is set to the existing value's index, and the `exists` | ||
| 95 | // out-parameter is set to true. Otherwise, `index` is set to an index where | ||
| 96 | // `needle` should be inserted in order to preserve the sorting, and `exists` | ||
| 97 | // is set to false. | ||
| 98 | #define array_search_sorted_with(self, compare, needle, _index, _exists) \ | ||
| 99 | array__search_sorted(self, 0, compare, , needle, _index, _exists) | ||
| 100 | |||
| 101 | // Search a sorted array for a given `needle` value, using integer comparisons | ||
| 102 | // of a given struct field (specified with a leading dot) to determine the order. | ||
| 103 | // | ||
| 104 | // See also `array_search_sorted_with`. | ||
| 105 | #define array_search_sorted_by(self, field, needle, _index, _exists) \ | ||
| 106 | array__search_sorted(self, 0, compare_int, field, needle, _index, _exists) | ||
| 107 | |||
| 108 | // Insert a given `value` into a sorted array, using the given `compare` | ||
| 109 | // callback to determine the order. | ||
| 110 | #define array_insert_sorted_with(self, compare, value) \ | ||
| 111 | do { \ | ||
| 112 | unsigned _index, _exists; \ | ||
| 113 | array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ | ||
| 114 | if (!_exists) array_insert(self, _index, value); \ | ||
| 115 | } while (0) | ||
| 116 | |||
| 117 | // Insert a given `value` into a sorted array, using integer comparisons of | ||
| 118 | // a given struct field (specified with a leading dot) to determine the order. | ||
| 119 | // | ||
| 120 | // See also `array_search_sorted_by`. | ||
| 121 | #define array_insert_sorted_by(self, field, value) \ | ||
| 122 | do { \ | ||
| 123 | unsigned _index, _exists; \ | ||
| 124 | array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ | ||
| 125 | if (!_exists) array_insert(self, _index, value); \ | ||
| 126 | } while (0) | ||
| 127 | |||
| 128 | // Private | ||
| 129 | |||
| 130 | typedef Array(void) VoidArray; | ||
| 131 | |||
| 132 | #define array__elem_size(self) sizeof(*(self)->contents) | ||
| 133 | |||
| 134 | static inline void array__delete(VoidArray *self) { | ||
| 135 | if (self->contents) { | ||
| 136 | ts_free(self->contents); | ||
| 137 | self->contents = NULL; | ||
| 138 | self->size = 0; | ||
| 139 | self->capacity = 0; | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | static inline void array__erase(VoidArray *self, size_t element_size, | ||
| 144 | uint32_t index) { | ||
| 145 | assert(index < self->size); | ||
| 146 | char *contents = (char *)self->contents; | ||
| 147 | memmove(contents + index * element_size, contents + (index + 1) * element_size, | ||
| 148 | (self->size - index - 1) * element_size); | ||
| 149 | self->size--; | ||
| 150 | } | ||
| 151 | |||
| 152 | static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t new_capacity) { | ||
| 153 | if (new_capacity > self->capacity) { | ||
| 154 | if (self->contents) { | ||
| 155 | self->contents = ts_realloc(self->contents, new_capacity * element_size); | ||
| 156 | } else { | ||
| 157 | self->contents = ts_malloc(new_capacity * element_size); | ||
| 158 | } | ||
| 159 | self->capacity = new_capacity; | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | static inline void array__assign(VoidArray *self, const VoidArray *other, size_t element_size) { | ||
| 164 | array__reserve(self, element_size, other->size); | ||
| 165 | self->size = other->size; | ||
| 166 | memcpy(self->contents, other->contents, self->size * element_size); | ||
| 167 | } | ||
| 168 | |||
| 169 | static inline void array__swap(VoidArray *self, VoidArray *other) { | ||
| 170 | VoidArray swap = *other; | ||
| 171 | *other = *self; | ||
| 172 | *self = swap; | ||
| 173 | } | ||
| 174 | |||
| 175 | static inline void array__grow(VoidArray *self, uint32_t count, size_t element_size) { | ||
| 176 | uint32_t new_size = self->size + count; | ||
| 177 | if (new_size > self->capacity) { | ||
| 178 | uint32_t new_capacity = self->capacity * 2; | ||
| 179 | if (new_capacity < 8) new_capacity = 8; | ||
| 180 | if (new_capacity < new_size) new_capacity = new_size; | ||
| 181 | array__reserve(self, element_size, new_capacity); | ||
| 182 | } | ||
| 183 | } | ||
| 184 | |||
| 185 | static inline void array__splice(VoidArray *self, size_t element_size, | ||
| 186 | uint32_t index, uint32_t old_count, | ||
| 187 | uint32_t new_count, const void *elements) { | ||
| 188 | uint32_t new_size = self->size + new_count - old_count; | ||
| 189 | uint32_t old_end = index + old_count; | ||
| 190 | uint32_t new_end = index + new_count; | ||
| 191 | assert(old_end <= self->size); | ||
| 192 | |||
| 193 | array__reserve(self, element_size, new_size); | ||
| 194 | |||
| 195 | char *contents = (char *)self->contents; | ||
| 196 | if (self->size > old_end) { | ||
| 197 | memmove( | ||
| 198 | contents + new_end * element_size, | ||
| 199 | contents + old_end * element_size, | ||
| 200 | (self->size - old_end) * element_size | ||
| 201 | ); | ||
| 202 | } | ||
| 203 | if (new_count > 0) { | ||
| 204 | if (elements) { | ||
| 205 | memcpy( | ||
| 206 | (contents + index * element_size), | ||
| 207 | elements, | ||
| 208 | new_count * element_size | ||
| 209 | ); | ||
| 210 | } else { | ||
| 211 | memset( | ||
| 212 | (contents + index * element_size), | ||
| 213 | 0, | ||
| 214 | new_count * element_size | ||
| 215 | ); | ||
| 216 | } | ||
| 217 | } | ||
| 218 | self->size += new_count - old_count; | ||
| 219 | } | ||
| 220 | |||
| 221 | // A binary search routine, based on Rust's `std::slice::binary_search_by`. | ||
| 222 | #define array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ | ||
| 223 | do { \ | ||
| 224 | *(_index) = start; \ | ||
| 225 | *(_exists) = false; \ | ||
| 226 | uint32_t size = (self)->size - *(_index); \ | ||
| 227 | if (size == 0) break; \ | ||
| 228 | int comparison; \ | ||
| 229 | while (size > 1) { \ | ||
| 230 | uint32_t half_size = size / 2; \ | ||
| 231 | uint32_t mid_index = *(_index) + half_size; \ | ||
| 232 | comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ | ||
| 233 | if (comparison <= 0) *(_index) = mid_index; \ | ||
| 234 | size -= half_size; \ | ||
| 235 | } \ | ||
| 236 | comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ | ||
| 237 | if (comparison == 0) *(_exists) = true; \ | ||
| 238 | else if (comparison < 0) *(_index) += 1; \ | ||
| 239 | } while (0) | ||
| 240 | |||
| 241 | // Helper macro for the `_sorted_by` routines below. This takes the left (existing) | ||
| 242 | // parameter by reference in order to work with the generic sorting function above. | ||
| 243 | #define compare_int(a, b) ((int)*(a) - (int)(b)) | ||
| 244 | |||
| 245 | #ifdef __cplusplus | ||
| 246 | } | ||
| 247 | #endif | ||
| 248 | |||
| 249 | #endif // TREE_SITTER_ARRAY_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/atomic.h b/vendor/tree-sitter/lib/src/atomic.h new file mode 100644 index 0000000..9e9269c --- /dev/null +++ b/vendor/tree-sitter/lib/src/atomic.h | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | #ifndef TREE_SITTER_ATOMIC_H_ | ||
| 2 | #define TREE_SITTER_ATOMIC_H_ | ||
| 3 | |||
| 4 | #include <stddef.h> | ||
| 5 | #include <stdint.h> | ||
| 6 | |||
| 7 | #ifdef __TINYC__ | ||
| 8 | |||
| 9 | static inline size_t atomic_load(const volatile size_t *p) { | ||
| 10 | return *p; | ||
| 11 | } | ||
| 12 | |||
| 13 | static inline uint32_t atomic_inc(volatile uint32_t *p) { | ||
| 14 | *p += 1; | ||
| 15 | return *p; | ||
| 16 | } | ||
| 17 | |||
| 18 | static inline uint32_t atomic_dec(volatile uint32_t *p) { | ||
| 19 | *p-= 1; | ||
| 20 | return *p; | ||
| 21 | } | ||
| 22 | |||
| 23 | #elif defined(_WIN32) | ||
| 24 | |||
| 25 | #include <windows.h> | ||
| 26 | |||
| 27 | static inline size_t atomic_load(const volatile size_t *p) { | ||
| 28 | return *p; | ||
| 29 | } | ||
| 30 | |||
| 31 | static inline uint32_t atomic_inc(volatile uint32_t *p) { | ||
| 32 | return InterlockedIncrement((long volatile *)p); | ||
| 33 | } | ||
| 34 | |||
| 35 | static inline uint32_t atomic_dec(volatile uint32_t *p) { | ||
| 36 | return InterlockedDecrement((long volatile *)p); | ||
| 37 | } | ||
| 38 | |||
| 39 | #else | ||
| 40 | |||
| 41 | static inline size_t atomic_load(const volatile size_t *p) { | ||
| 42 | #ifdef __ATOMIC_RELAXED | ||
| 43 | return __atomic_load_n(p, __ATOMIC_RELAXED); | ||
| 44 | #else | ||
| 45 | return __sync_fetch_and_add((volatile size_t *)p, 0); | ||
| 46 | #endif | ||
| 47 | } | ||
| 48 | |||
| 49 | static inline uint32_t atomic_inc(volatile uint32_t *p) { | ||
| 50 | #ifdef __ATOMIC_RELAXED | ||
| 51 | return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST); | ||
| 52 | #else | ||
| 53 | return __sync_add_and_fetch(p, 1U); | ||
| 54 | #endif | ||
| 55 | } | ||
| 56 | |||
| 57 | static inline uint32_t atomic_dec(volatile uint32_t *p) { | ||
| 58 | #ifdef __ATOMIC_RELAXED | ||
| 59 | return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST); | ||
| 60 | #else | ||
| 61 | return __sync_sub_and_fetch(p, 1U); | ||
| 62 | #endif | ||
| 63 | } | ||
| 64 | |||
| 65 | #endif | ||
| 66 | |||
| 67 | #endif // TREE_SITTER_ATOMIC_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/clock.h b/vendor/tree-sitter/lib/src/clock.h new file mode 100644 index 0000000..6e75729 --- /dev/null +++ b/vendor/tree-sitter/lib/src/clock.h | |||
| @@ -0,0 +1,146 @@ | |||
| 1 | #ifndef TREE_SITTER_CLOCK_H_ | ||
| 2 | #define TREE_SITTER_CLOCK_H_ | ||
| 3 | |||
| 4 | #include <stdbool.h> | ||
| 5 | #include <stdint.h> | ||
| 6 | |||
| 7 | typedef uint64_t TSDuration; | ||
| 8 | |||
| 9 | #ifdef _WIN32 | ||
| 10 | |||
| 11 | // Windows: | ||
| 12 | // * Represent a time as a performance counter value. | ||
| 13 | // * Represent a duration as a number of performance counter ticks. | ||
| 14 | |||
| 15 | #include <windows.h> | ||
| 16 | typedef uint64_t TSClock; | ||
| 17 | |||
| 18 | static inline TSDuration duration_from_micros(uint64_t micros) { | ||
| 19 | LARGE_INTEGER frequency; | ||
| 20 | QueryPerformanceFrequency(&frequency); | ||
| 21 | return micros * (uint64_t)frequency.QuadPart / 1000000; | ||
| 22 | } | ||
| 23 | |||
| 24 | static inline uint64_t duration_to_micros(TSDuration self) { | ||
| 25 | LARGE_INTEGER frequency; | ||
| 26 | QueryPerformanceFrequency(&frequency); | ||
| 27 | return self * 1000000 / (uint64_t)frequency.QuadPart; | ||
| 28 | } | ||
| 29 | |||
| 30 | static inline TSClock clock_null(void) { | ||
| 31 | return 0; | ||
| 32 | } | ||
| 33 | |||
| 34 | static inline TSClock clock_now(void) { | ||
| 35 | LARGE_INTEGER result; | ||
| 36 | QueryPerformanceCounter(&result); | ||
| 37 | return (uint64_t)result.QuadPart; | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline TSClock clock_after(TSClock base, TSDuration duration) { | ||
| 41 | return base + duration; | ||
| 42 | } | ||
| 43 | |||
| 44 | static inline bool clock_is_null(TSClock self) { | ||
| 45 | return !self; | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline bool clock_is_gt(TSClock self, TSClock other) { | ||
| 49 | return self > other; | ||
| 50 | } | ||
| 51 | |||
| 52 | #elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__) | ||
| 53 | |||
| 54 | // POSIX with monotonic clock support (Linux) | ||
| 55 | // * Represent a time as a monotonic (seconds, nanoseconds) pair. | ||
| 56 | // * Represent a duration as a number of microseconds. | ||
| 57 | // | ||
| 58 | // On these platforms, parse timeouts will correspond accurately to | ||
| 59 | // real time, regardless of what other processes are running. | ||
| 60 | |||
| 61 | #include <time.h> | ||
| 62 | typedef struct timespec TSClock; | ||
| 63 | |||
| 64 | static inline TSDuration duration_from_micros(uint64_t micros) { | ||
| 65 | return micros; | ||
| 66 | } | ||
| 67 | |||
| 68 | static inline uint64_t duration_to_micros(TSDuration self) { | ||
| 69 | return self; | ||
| 70 | } | ||
| 71 | |||
| 72 | static inline TSClock clock_now(void) { | ||
| 73 | TSClock result; | ||
| 74 | clock_gettime(CLOCK_MONOTONIC, &result); | ||
| 75 | return result; | ||
| 76 | } | ||
| 77 | |||
| 78 | static inline TSClock clock_null(void) { | ||
| 79 | return (TSClock) {0, 0}; | ||
| 80 | } | ||
| 81 | |||
| 82 | static inline TSClock clock_after(TSClock base, TSDuration duration) { | ||
| 83 | TSClock result = base; | ||
| 84 | result.tv_sec += duration / 1000000; | ||
| 85 | result.tv_nsec += (duration % 1000000) * 1000; | ||
| 86 | if (result.tv_nsec >= 1000000000) { | ||
| 87 | result.tv_nsec -= 1000000000; | ||
| 88 | ++(result.tv_sec); | ||
| 89 | } | ||
| 90 | return result; | ||
| 91 | } | ||
| 92 | |||
| 93 | static inline bool clock_is_null(TSClock self) { | ||
| 94 | return !self.tv_sec; | ||
| 95 | } | ||
| 96 | |||
| 97 | static inline bool clock_is_gt(TSClock self, TSClock other) { | ||
| 98 | if (self.tv_sec > other.tv_sec) return true; | ||
| 99 | if (self.tv_sec < other.tv_sec) return false; | ||
| 100 | return self.tv_nsec > other.tv_nsec; | ||
| 101 | } | ||
| 102 | |||
| 103 | #else | ||
| 104 | |||
| 105 | // macOS or POSIX without monotonic clock support | ||
| 106 | // * Represent a time as a process clock value. | ||
| 107 | // * Represent a duration as a number of process clock ticks. | ||
| 108 | // | ||
| 109 | // On these platforms, parse timeouts may be affected by other processes, | ||
| 110 | // which is not ideal, but is better than using a non-monotonic time API | ||
| 111 | // like `gettimeofday`. | ||
| 112 | |||
| 113 | #include <time.h> | ||
| 114 | typedef uint64_t TSClock; | ||
| 115 | |||
| 116 | static inline TSDuration duration_from_micros(uint64_t micros) { | ||
| 117 | return micros * (uint64_t)CLOCKS_PER_SEC / 1000000; | ||
| 118 | } | ||
| 119 | |||
| 120 | static inline uint64_t duration_to_micros(TSDuration self) { | ||
| 121 | return self * 1000000 / (uint64_t)CLOCKS_PER_SEC; | ||
| 122 | } | ||
| 123 | |||
| 124 | static inline TSClock clock_null(void) { | ||
| 125 | return 0; | ||
| 126 | } | ||
| 127 | |||
| 128 | static inline TSClock clock_now(void) { | ||
| 129 | return (uint64_t)clock(); | ||
| 130 | } | ||
| 131 | |||
| 132 | static inline TSClock clock_after(TSClock base, TSDuration duration) { | ||
| 133 | return base + duration; | ||
| 134 | } | ||
| 135 | |||
| 136 | static inline bool clock_is_null(TSClock self) { | ||
| 137 | return !self; | ||
| 138 | } | ||
| 139 | |||
| 140 | static inline bool clock_is_gt(TSClock self, TSClock other) { | ||
| 141 | return self > other; | ||
| 142 | } | ||
| 143 | |||
| 144 | #endif | ||
| 145 | |||
| 146 | #endif // TREE_SITTER_CLOCK_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/error_costs.h b/vendor/tree-sitter/lib/src/error_costs.h new file mode 100644 index 0000000..32d3666 --- /dev/null +++ b/vendor/tree-sitter/lib/src/error_costs.h | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | #ifndef TREE_SITTER_ERROR_COSTS_H_ | ||
| 2 | #define TREE_SITTER_ERROR_COSTS_H_ | ||
| 3 | |||
| 4 | #define ERROR_STATE 0 | ||
| 5 | #define ERROR_COST_PER_RECOVERY 500 | ||
| 6 | #define ERROR_COST_PER_MISSING_TREE 110 | ||
| 7 | #define ERROR_COST_PER_SKIPPED_TREE 100 | ||
| 8 | #define ERROR_COST_PER_SKIPPED_LINE 30 | ||
| 9 | #define ERROR_COST_PER_SKIPPED_CHAR 1 | ||
| 10 | |||
| 11 | #endif | ||
diff --git a/vendor/tree-sitter/lib/src/get_changed_ranges.c b/vendor/tree-sitter/lib/src/get_changed_ranges.c new file mode 100644 index 0000000..bcf8da9 --- /dev/null +++ b/vendor/tree-sitter/lib/src/get_changed_ranges.c | |||
| @@ -0,0 +1,501 @@ | |||
| 1 | #include "./get_changed_ranges.h" | ||
| 2 | #include "./subtree.h" | ||
| 3 | #include "./language.h" | ||
| 4 | #include "./error_costs.h" | ||
| 5 | #include "./tree_cursor.h" | ||
| 6 | #include <assert.h> | ||
| 7 | |||
| 8 | // #define DEBUG_GET_CHANGED_RANGES | ||
| 9 | |||
| 10 | static void ts_range_array_add( | ||
| 11 | TSRangeArray *self, | ||
| 12 | Length start, | ||
| 13 | Length end | ||
| 14 | ) { | ||
| 15 | if (self->size > 0) { | ||
| 16 | TSRange *last_range = array_back(self); | ||
| 17 | if (start.bytes <= last_range->end_byte) { | ||
| 18 | last_range->end_byte = end.bytes; | ||
| 19 | last_range->end_point = end.extent; | ||
| 20 | return; | ||
| 21 | } | ||
| 22 | } | ||
| 23 | |||
| 24 | if (start.bytes < end.bytes) { | ||
| 25 | TSRange range = { start.extent, end.extent, start.bytes, end.bytes }; | ||
| 26 | array_push(self, range); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | bool ts_range_array_intersects( | ||
| 31 | const TSRangeArray *self, | ||
| 32 | unsigned start_index, | ||
| 33 | uint32_t start_byte, | ||
| 34 | uint32_t end_byte | ||
| 35 | ) { | ||
| 36 | for (unsigned i = start_index; i < self->size; i++) { | ||
| 37 | TSRange *range = &self->contents[i]; | ||
| 38 | if (range->end_byte > start_byte) { | ||
| 39 | if (range->start_byte >= end_byte) break; | ||
| 40 | return true; | ||
| 41 | } | ||
| 42 | } | ||
| 43 | return false; | ||
| 44 | } | ||
| 45 | |||
| 46 | void ts_range_array_get_changed_ranges( | ||
| 47 | const TSRange *old_ranges, unsigned old_range_count, | ||
| 48 | const TSRange *new_ranges, unsigned new_range_count, | ||
| 49 | TSRangeArray *differences | ||
| 50 | ) { | ||
| 51 | unsigned new_index = 0; | ||
| 52 | unsigned old_index = 0; | ||
| 53 | Length current_position = length_zero(); | ||
| 54 | bool in_old_range = false; | ||
| 55 | bool in_new_range = false; | ||
| 56 | |||
| 57 | while (old_index < old_range_count || new_index < new_range_count) { | ||
| 58 | const TSRange *old_range = &old_ranges[old_index]; | ||
| 59 | const TSRange *new_range = &new_ranges[new_index]; | ||
| 60 | |||
| 61 | Length next_old_position; | ||
| 62 | if (in_old_range) { | ||
| 63 | next_old_position = (Length) {old_range->end_byte, old_range->end_point}; | ||
| 64 | } else if (old_index < old_range_count) { | ||
| 65 | next_old_position = (Length) {old_range->start_byte, old_range->start_point}; | ||
| 66 | } else { | ||
| 67 | next_old_position = LENGTH_MAX; | ||
| 68 | } | ||
| 69 | |||
| 70 | Length next_new_position; | ||
| 71 | if (in_new_range) { | ||
| 72 | next_new_position = (Length) {new_range->end_byte, new_range->end_point}; | ||
| 73 | } else if (new_index < new_range_count) { | ||
| 74 | next_new_position = (Length) {new_range->start_byte, new_range->start_point}; | ||
| 75 | } else { | ||
| 76 | next_new_position = LENGTH_MAX; | ||
| 77 | } | ||
| 78 | |||
| 79 | if (next_old_position.bytes < next_new_position.bytes) { | ||
| 80 | if (in_old_range != in_new_range) { | ||
| 81 | ts_range_array_add(differences, current_position, next_old_position); | ||
| 82 | } | ||
| 83 | if (in_old_range) old_index++; | ||
| 84 | current_position = next_old_position; | ||
| 85 | in_old_range = !in_old_range; | ||
| 86 | } else if (next_new_position.bytes < next_old_position.bytes) { | ||
| 87 | if (in_old_range != in_new_range) { | ||
| 88 | ts_range_array_add(differences, current_position, next_new_position); | ||
| 89 | } | ||
| 90 | if (in_new_range) new_index++; | ||
| 91 | current_position = next_new_position; | ||
| 92 | in_new_range = !in_new_range; | ||
| 93 | } else { | ||
| 94 | if (in_old_range != in_new_range) { | ||
| 95 | ts_range_array_add(differences, current_position, next_new_position); | ||
| 96 | } | ||
| 97 | if (in_old_range) old_index++; | ||
| 98 | if (in_new_range) new_index++; | ||
| 99 | in_old_range = !in_old_range; | ||
| 100 | in_new_range = !in_new_range; | ||
| 101 | current_position = next_new_position; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | typedef struct { | ||
| 107 | TreeCursor cursor; | ||
| 108 | const TSLanguage *language; | ||
| 109 | unsigned visible_depth; | ||
| 110 | bool in_padding; | ||
| 111 | } Iterator; | ||
| 112 | |||
| 113 | static Iterator iterator_new( | ||
| 114 | TreeCursor *cursor, | ||
| 115 | const Subtree *tree, | ||
| 116 | const TSLanguage *language | ||
| 117 | ) { | ||
| 118 | array_clear(&cursor->stack); | ||
| 119 | array_push(&cursor->stack, ((TreeCursorEntry) { | ||
| 120 | .subtree = tree, | ||
| 121 | .position = length_zero(), | ||
| 122 | .child_index = 0, | ||
| 123 | .structural_child_index = 0, | ||
| 124 | })); | ||
| 125 | return (Iterator) { | ||
| 126 | .cursor = *cursor, | ||
| 127 | .language = language, | ||
| 128 | .visible_depth = 1, | ||
| 129 | .in_padding = false, | ||
| 130 | }; | ||
| 131 | } | ||
| 132 | |||
| 133 | static bool iterator_done(Iterator *self) { | ||
| 134 | return self->cursor.stack.size == 0; | ||
| 135 | } | ||
| 136 | |||
| 137 | static Length iterator_start_position(Iterator *self) { | ||
| 138 | TreeCursorEntry entry = *array_back(&self->cursor.stack); | ||
| 139 | if (self->in_padding) { | ||
| 140 | return entry.position; | ||
| 141 | } else { | ||
| 142 | return length_add(entry.position, ts_subtree_padding(*entry.subtree)); | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | static Length iterator_end_position(Iterator *self) { | ||
| 147 | TreeCursorEntry entry = *array_back(&self->cursor.stack); | ||
| 148 | Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree)); | ||
| 149 | if (self->in_padding) { | ||
| 150 | return result; | ||
| 151 | } else { | ||
| 152 | return length_add(result, ts_subtree_size(*entry.subtree)); | ||
| 153 | } | ||
| 154 | } | ||
| 155 | |||
| 156 | static bool iterator_tree_is_visible(const Iterator *self) { | ||
| 157 | TreeCursorEntry entry = *array_back(&self->cursor.stack); | ||
| 158 | if (ts_subtree_visible(*entry.subtree)) return true; | ||
| 159 | if (self->cursor.stack.size > 1) { | ||
| 160 | Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; | ||
| 161 | return ts_language_alias_at( | ||
| 162 | self->language, | ||
| 163 | parent.ptr->production_id, | ||
| 164 | entry.structural_child_index | ||
| 165 | ) != 0; | ||
| 166 | } | ||
| 167 | return false; | ||
| 168 | } | ||
| 169 | |||
| 170 | static void iterator_get_visible_state( | ||
| 171 | const Iterator *self, | ||
| 172 | Subtree *tree, | ||
| 173 | TSSymbol *alias_symbol, | ||
| 174 | uint32_t *start_byte | ||
| 175 | ) { | ||
| 176 | uint32_t i = self->cursor.stack.size - 1; | ||
| 177 | |||
| 178 | if (self->in_padding) { | ||
| 179 | if (i == 0) return; | ||
| 180 | i--; | ||
| 181 | } | ||
| 182 | |||
| 183 | for (; i + 1 > 0; i--) { | ||
| 184 | TreeCursorEntry entry = self->cursor.stack.contents[i]; | ||
| 185 | |||
| 186 | if (i > 0) { | ||
| 187 | const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; | ||
| 188 | *alias_symbol = ts_language_alias_at( | ||
| 189 | self->language, | ||
| 190 | parent->ptr->production_id, | ||
| 191 | entry.structural_child_index | ||
| 192 | ); | ||
| 193 | } | ||
| 194 | |||
| 195 | if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { | ||
| 196 | *tree = *entry.subtree; | ||
| 197 | *start_byte = entry.position.bytes; | ||
| 198 | break; | ||
| 199 | } | ||
| 200 | } | ||
| 201 | } | ||
| 202 | |||
| 203 | static void iterator_ascend(Iterator *self) { | ||
| 204 | if (iterator_done(self)) return; | ||
| 205 | if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--; | ||
| 206 | if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false; | ||
| 207 | self->cursor.stack.size--; | ||
| 208 | } | ||
| 209 | |||
| 210 | static bool iterator_descend(Iterator *self, uint32_t goal_position) { | ||
| 211 | if (self->in_padding) return false; | ||
| 212 | |||
| 213 | bool did_descend = false; | ||
| 214 | do { | ||
| 215 | did_descend = false; | ||
| 216 | TreeCursorEntry entry = *array_back(&self->cursor.stack); | ||
| 217 | Length position = entry.position; | ||
| 218 | uint32_t structural_child_index = 0; | ||
| 219 | for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { | ||
| 220 | const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; | ||
| 221 | Length child_left = length_add(position, ts_subtree_padding(*child)); | ||
| 222 | Length child_right = length_add(child_left, ts_subtree_size(*child)); | ||
| 223 | |||
| 224 | if (child_right.bytes > goal_position) { | ||
| 225 | array_push(&self->cursor.stack, ((TreeCursorEntry) { | ||
| 226 | .subtree = child, | ||
| 227 | .position = position, | ||
| 228 | .child_index = i, | ||
| 229 | .structural_child_index = structural_child_index, | ||
| 230 | })); | ||
| 231 | |||
| 232 | if (iterator_tree_is_visible(self)) { | ||
| 233 | if (child_left.bytes > goal_position) { | ||
| 234 | self->in_padding = true; | ||
| 235 | } else { | ||
| 236 | self->visible_depth++; | ||
| 237 | } | ||
| 238 | return true; | ||
| 239 | } | ||
| 240 | |||
| 241 | did_descend = true; | ||
| 242 | break; | ||
| 243 | } | ||
| 244 | |||
| 245 | position = child_right; | ||
| 246 | if (!ts_subtree_extra(*child)) structural_child_index++; | ||
| 247 | } | ||
| 248 | } while (did_descend); | ||
| 249 | |||
| 250 | return false; | ||
| 251 | } | ||
| 252 | |||
| 253 | static void iterator_advance(Iterator *self) { | ||
| 254 | if (self->in_padding) { | ||
| 255 | self->in_padding = false; | ||
| 256 | if (iterator_tree_is_visible(self)) { | ||
| 257 | self->visible_depth++; | ||
| 258 | } else { | ||
| 259 | iterator_descend(self, 0); | ||
| 260 | } | ||
| 261 | return; | ||
| 262 | } | ||
| 263 | |||
| 264 | for (;;) { | ||
| 265 | if (iterator_tree_is_visible(self)) self->visible_depth--; | ||
| 266 | TreeCursorEntry entry = array_pop(&self->cursor.stack); | ||
| 267 | if (iterator_done(self)) return; | ||
| 268 | |||
| 269 | const Subtree *parent = array_back(&self->cursor.stack)->subtree; | ||
| 270 | uint32_t child_index = entry.child_index + 1; | ||
| 271 | if (ts_subtree_child_count(*parent) > child_index) { | ||
| 272 | Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); | ||
| 273 | uint32_t structural_child_index = entry.structural_child_index; | ||
| 274 | if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; | ||
| 275 | const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; | ||
| 276 | |||
| 277 | array_push(&self->cursor.stack, ((TreeCursorEntry) { | ||
| 278 | .subtree = next_child, | ||
| 279 | .position = position, | ||
| 280 | .child_index = child_index, | ||
| 281 | .structural_child_index = structural_child_index, | ||
| 282 | })); | ||
| 283 | |||
| 284 | if (iterator_tree_is_visible(self)) { | ||
| 285 | if (ts_subtree_padding(*next_child).bytes > 0) { | ||
| 286 | self->in_padding = true; | ||
| 287 | } else { | ||
| 288 | self->visible_depth++; | ||
| 289 | } | ||
| 290 | } else { | ||
| 291 | iterator_descend(self, 0); | ||
| 292 | } | ||
| 293 | break; | ||
| 294 | } | ||
| 295 | } | ||
| 296 | } | ||
| 297 | |||
| 298 | typedef enum { | ||
| 299 | IteratorDiffers, | ||
| 300 | IteratorMayDiffer, | ||
| 301 | IteratorMatches, | ||
| 302 | } IteratorComparison; | ||
| 303 | |||
| 304 | static IteratorComparison iterator_compare( | ||
| 305 | const Iterator *old_iter, | ||
| 306 | const Iterator *new_iter | ||
| 307 | ) { | ||
| 308 | Subtree old_tree = NULL_SUBTREE; | ||
| 309 | Subtree new_tree = NULL_SUBTREE; | ||
| 310 | uint32_t old_start = 0; | ||
| 311 | uint32_t new_start = 0; | ||
| 312 | TSSymbol old_alias_symbol = 0; | ||
| 313 | TSSymbol new_alias_symbol = 0; | ||
| 314 | iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); | ||
| 315 | iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start); | ||
| 316 | |||
| 317 | if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches; | ||
| 318 | if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers; | ||
| 319 | |||
| 320 | if ( | ||
| 321 | old_alias_symbol == new_alias_symbol && | ||
| 322 | ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree) | ||
| 323 | ) { | ||
| 324 | if (old_start == new_start && | ||
| 325 | !ts_subtree_has_changes(old_tree) && | ||
| 326 | ts_subtree_symbol(old_tree) != ts_builtin_sym_error && | ||
| 327 | ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes && | ||
| 328 | ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE && | ||
| 329 | ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE && | ||
| 330 | (ts_subtree_parse_state(old_tree) == ERROR_STATE) == | ||
| 331 | (ts_subtree_parse_state(new_tree) == ERROR_STATE)) { | ||
| 332 | return IteratorMatches; | ||
| 333 | } else { | ||
| 334 | return IteratorMayDiffer; | ||
| 335 | } | ||
| 336 | } | ||
| 337 | |||
| 338 | return IteratorDiffers; | ||
| 339 | } | ||
| 340 | |||
| 341 | #ifdef DEBUG_GET_CHANGED_RANGES | ||
| 342 | static inline void iterator_print_state(Iterator *self) { | ||
| 343 | TreeCursorEntry entry = *array_back(&self->cursor.stack); | ||
| 344 | TSPoint start = iterator_start_position(self).extent; | ||
| 345 | TSPoint end = iterator_end_position(self).extent; | ||
| 346 | const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree)); | ||
| 347 | printf( | ||
| 348 | "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", | ||
| 349 | name, self->in_padding ? "(p)" : " ", | ||
| 350 | self->visible_depth, | ||
| 351 | start.row + 1, start.column, | ||
| 352 | end.row + 1, end.column | ||
| 353 | ); | ||
| 354 | } | ||
| 355 | #endif | ||
| 356 | |||
| 357 | unsigned ts_subtree_get_changed_ranges( | ||
| 358 | const Subtree *old_tree, const Subtree *new_tree, | ||
| 359 | TreeCursor *cursor1, TreeCursor *cursor2, | ||
| 360 | const TSLanguage *language, | ||
| 361 | const TSRangeArray *included_range_differences, | ||
| 362 | TSRange **ranges | ||
| 363 | ) { | ||
| 364 | TSRangeArray results = array_new(); | ||
| 365 | |||
| 366 | Iterator old_iter = iterator_new(cursor1, old_tree, language); | ||
| 367 | Iterator new_iter = iterator_new(cursor2, new_tree, language); | ||
| 368 | |||
| 369 | unsigned included_range_difference_index = 0; | ||
| 370 | |||
| 371 | Length position = iterator_start_position(&old_iter); | ||
| 372 | Length next_position = iterator_start_position(&new_iter); | ||
| 373 | if (position.bytes < next_position.bytes) { | ||
| 374 | ts_range_array_add(&results, position, next_position); | ||
| 375 | position = next_position; | ||
| 376 | } else if (position.bytes > next_position.bytes) { | ||
| 377 | ts_range_array_add(&results, next_position, position); | ||
| 378 | next_position = position; | ||
| 379 | } | ||
| 380 | |||
| 381 | do { | ||
| 382 | #ifdef DEBUG_GET_CHANGED_RANGES | ||
| 383 | printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column); | ||
| 384 | iterator_print_state(&old_iter); | ||
| 385 | printf("\tvs\t"); | ||
| 386 | iterator_print_state(&new_iter); | ||
| 387 | puts(""); | ||
| 388 | #endif | ||
| 389 | |||
| 390 | // Compare the old and new subtrees. | ||
| 391 | IteratorComparison comparison = iterator_compare(&old_iter, &new_iter); | ||
| 392 | |||
| 393 | // Even if the two subtrees appear to be identical, they could differ | ||
| 394 | // internally if they contain a range of text that was previously | ||
| 395 | // excluded from the parse, and is now included, or vice-versa. | ||
| 396 | if (comparison == IteratorMatches && ts_range_array_intersects( | ||
| 397 | included_range_differences, | ||
| 398 | included_range_difference_index, | ||
| 399 | position.bytes, | ||
| 400 | iterator_end_position(&old_iter).bytes | ||
| 401 | )) { | ||
| 402 | comparison = IteratorMayDiffer; | ||
| 403 | } | ||
| 404 | |||
| 405 | bool is_changed = false; | ||
| 406 | switch (comparison) { | ||
| 407 | // If the subtrees are definitely identical, move to the end | ||
| 408 | // of both subtrees. | ||
| 409 | case IteratorMatches: | ||
| 410 | next_position = iterator_end_position(&old_iter); | ||
| 411 | break; | ||
| 412 | |||
| 413 | // If the subtrees might differ internally, descend into both | ||
| 414 | // subtrees, finding the first child that spans the current position. | ||
| 415 | case IteratorMayDiffer: | ||
| 416 | if (iterator_descend(&old_iter, position.bytes)) { | ||
| 417 | if (!iterator_descend(&new_iter, position.bytes)) { | ||
| 418 | is_changed = true; | ||
| 419 | next_position = iterator_end_position(&old_iter); | ||
| 420 | } | ||
| 421 | } else if (iterator_descend(&new_iter, position.bytes)) { | ||
| 422 | is_changed = true; | ||
| 423 | next_position = iterator_end_position(&new_iter); | ||
| 424 | } else { | ||
| 425 | next_position = length_min( | ||
| 426 | iterator_end_position(&old_iter), | ||
| 427 | iterator_end_position(&new_iter) | ||
| 428 | ); | ||
| 429 | } | ||
| 430 | break; | ||
| 431 | |||
| 432 | // If the subtrees are different, record a change and then move | ||
| 433 | // to the end of both subtrees. | ||
| 434 | case IteratorDiffers: | ||
| 435 | is_changed = true; | ||
| 436 | next_position = length_min( | ||
| 437 | iterator_end_position(&old_iter), | ||
| 438 | iterator_end_position(&new_iter) | ||
| 439 | ); | ||
| 440 | break; | ||
| 441 | } | ||
| 442 | |||
| 443 | // Ensure that both iterators are caught up to the current position. | ||
| 444 | while ( | ||
| 445 | !iterator_done(&old_iter) && | ||
| 446 | iterator_end_position(&old_iter).bytes <= next_position.bytes | ||
| 447 | ) iterator_advance(&old_iter); | ||
| 448 | while ( | ||
| 449 | !iterator_done(&new_iter) && | ||
| 450 | iterator_end_position(&new_iter).bytes <= next_position.bytes | ||
| 451 | ) iterator_advance(&new_iter); | ||
| 452 | |||
| 453 | // Ensure that both iterators are at the same depth in the tree. | ||
| 454 | while (old_iter.visible_depth > new_iter.visible_depth) { | ||
| 455 | iterator_ascend(&old_iter); | ||
| 456 | } | ||
| 457 | while (new_iter.visible_depth > old_iter.visible_depth) { | ||
| 458 | iterator_ascend(&new_iter); | ||
| 459 | } | ||
| 460 | |||
| 461 | if (is_changed) { | ||
| 462 | #ifdef DEBUG_GET_CHANGED_RANGES | ||
| 463 | printf( | ||
| 464 | " change: [[%u, %u] - [%u, %u]]\n", | ||
| 465 | position.extent.row + 1, position.extent.column, | ||
| 466 | next_position.extent.row + 1, next_position.extent.column | ||
| 467 | ); | ||
| 468 | #endif | ||
| 469 | |||
| 470 | ts_range_array_add(&results, position, next_position); | ||
| 471 | } | ||
| 472 | |||
| 473 | position = next_position; | ||
| 474 | |||
| 475 | // Keep track of the current position in the included range differences | ||
| 476 | // array in order to avoid scanning the entire array on each iteration. | ||
| 477 | while (included_range_difference_index < included_range_differences->size) { | ||
| 478 | const TSRange *range = &included_range_differences->contents[ | ||
| 479 | included_range_difference_index | ||
| 480 | ]; | ||
| 481 | if (range->end_byte <= position.bytes) { | ||
| 482 | included_range_difference_index++; | ||
| 483 | } else { | ||
| 484 | break; | ||
| 485 | } | ||
| 486 | } | ||
| 487 | } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); | ||
| 488 | |||
| 489 | Length old_size = ts_subtree_total_size(*old_tree); | ||
| 490 | Length new_size = ts_subtree_total_size(*new_tree); | ||
| 491 | if (old_size.bytes < new_size.bytes) { | ||
| 492 | ts_range_array_add(&results, old_size, new_size); | ||
| 493 | } else if (new_size.bytes < old_size.bytes) { | ||
| 494 | ts_range_array_add(&results, new_size, old_size); | ||
| 495 | } | ||
| 496 | |||
| 497 | *cursor1 = old_iter.cursor; | ||
| 498 | *cursor2 = new_iter.cursor; | ||
| 499 | *ranges = results.contents; | ||
| 500 | return results.size; | ||
| 501 | } | ||
diff --git a/vendor/tree-sitter/lib/src/get_changed_ranges.h b/vendor/tree-sitter/lib/src/get_changed_ranges.h new file mode 100644 index 0000000..a1f1dbb --- /dev/null +++ b/vendor/tree-sitter/lib/src/get_changed_ranges.h | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | #ifndef TREE_SITTER_GET_CHANGED_RANGES_H_ | ||
| 2 | #define TREE_SITTER_GET_CHANGED_RANGES_H_ | ||
| 3 | |||
| 4 | #ifdef __cplusplus | ||
| 5 | extern "C" { | ||
| 6 | #endif | ||
| 7 | |||
| 8 | #include "./tree_cursor.h" | ||
| 9 | #include "./subtree.h" | ||
| 10 | |||
| 11 | typedef Array(TSRange) TSRangeArray; | ||
| 12 | |||
| 13 | void ts_range_array_get_changed_ranges( | ||
| 14 | const TSRange *old_ranges, unsigned old_range_count, | ||
| 15 | const TSRange *new_ranges, unsigned new_range_count, | ||
| 16 | TSRangeArray *differences | ||
| 17 | ); | ||
| 18 | |||
| 19 | bool ts_range_array_intersects( | ||
| 20 | const TSRangeArray *self, unsigned start_index, | ||
| 21 | uint32_t start_byte, uint32_t end_byte | ||
| 22 | ); | ||
| 23 | |||
| 24 | unsigned ts_subtree_get_changed_ranges( | ||
| 25 | const Subtree *old_tree, const Subtree *new_tree, | ||
| 26 | TreeCursor *cursor1, TreeCursor *cursor2, | ||
| 27 | const TSLanguage *language, | ||
| 28 | const TSRangeArray *included_range_differences, | ||
| 29 | TSRange **ranges | ||
| 30 | ); | ||
| 31 | |||
| 32 | #ifdef __cplusplus | ||
| 33 | } | ||
| 34 | #endif | ||
| 35 | |||
| 36 | #endif // TREE_SITTER_GET_CHANGED_RANGES_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/host.h b/vendor/tree-sitter/lib/src/host.h new file mode 100644 index 0000000..a07e9f8 --- /dev/null +++ b/vendor/tree-sitter/lib/src/host.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | |||
| 2 | // Determine endian and pointer size based on known defines. | ||
| 3 | // TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments | ||
| 4 | // to override this. | ||
| 5 | |||
| 6 | #if !defined(TS_BIG_ENDIAN) | ||
| 7 | #if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \ | ||
| 8 | || (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__))) | ||
| 9 | #define TS_BIG_ENDIAN 1 | ||
| 10 | #else | ||
| 11 | #define TS_BIG_ENDIAN 0 | ||
| 12 | #endif | ||
| 13 | #endif | ||
| 14 | |||
| 15 | #if !defined(TS_PTR_SIZE) | ||
| 16 | #if UINTPTR_MAX == 0xFFFFFFFF | ||
| 17 | #define TS_PTR_SIZE 32 | ||
| 18 | #else | ||
| 19 | #define TS_PTR_SIZE 64 | ||
| 20 | #endif | ||
| 21 | #endif | ||
diff --git a/vendor/tree-sitter/lib/src/language.c b/vendor/tree-sitter/lib/src/language.c new file mode 100644 index 0000000..f30329d --- /dev/null +++ b/vendor/tree-sitter/lib/src/language.c | |||
| @@ -0,0 +1,208 @@ | |||
| 1 | #include "./language.h" | ||
| 2 | #include "./subtree.h" | ||
| 3 | #include "./error_costs.h" | ||
| 4 | #include <string.h> | ||
| 5 | |||
| 6 | uint32_t ts_language_symbol_count(const TSLanguage *self) { | ||
| 7 | return self->symbol_count + self->alias_count; | ||
| 8 | } | ||
| 9 | |||
| 10 | uint32_t ts_language_state_count(const TSLanguage *self) { | ||
| 11 | return self->state_count; | ||
| 12 | } | ||
| 13 | |||
| 14 | uint32_t ts_language_version(const TSLanguage *self) { | ||
| 15 | return self->version; | ||
| 16 | } | ||
| 17 | |||
| 18 | uint32_t ts_language_field_count(const TSLanguage *self) { | ||
| 19 | return self->field_count; | ||
| 20 | } | ||
| 21 | |||
| 22 | void ts_language_table_entry( | ||
| 23 | const TSLanguage *self, | ||
| 24 | TSStateId state, | ||
| 25 | TSSymbol symbol, | ||
| 26 | TableEntry *result | ||
| 27 | ) { | ||
| 28 | if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { | ||
| 29 | result->action_count = 0; | ||
| 30 | result->is_reusable = false; | ||
| 31 | result->actions = NULL; | ||
| 32 | } else { | ||
| 33 | assert(symbol < self->token_count); | ||
| 34 | uint32_t action_index = ts_language_lookup(self, state, symbol); | ||
| 35 | const TSParseActionEntry *entry = &self->parse_actions[action_index]; | ||
| 36 | result->action_count = entry->entry.count; | ||
| 37 | result->is_reusable = entry->entry.reusable; | ||
| 38 | result->actions = (const TSParseAction *)(entry + 1); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | TSSymbolMetadata ts_language_symbol_metadata( | ||
| 43 | const TSLanguage *self, | ||
| 44 | TSSymbol symbol | ||
| 45 | ) { | ||
| 46 | if (symbol == ts_builtin_sym_error) { | ||
| 47 | return (TSSymbolMetadata) {.visible = true, .named = true}; | ||
| 48 | } else if (symbol == ts_builtin_sym_error_repeat) { | ||
| 49 | return (TSSymbolMetadata) {.visible = false, .named = false}; | ||
| 50 | } else { | ||
| 51 | return self->symbol_metadata[symbol]; | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | TSSymbol ts_language_public_symbol( | ||
| 56 | const TSLanguage *self, | ||
| 57 | TSSymbol symbol | ||
| 58 | ) { | ||
| 59 | if (symbol == ts_builtin_sym_error) return symbol; | ||
| 60 | return self->public_symbol_map[symbol]; | ||
| 61 | } | ||
| 62 | |||
| 63 | TSStateId ts_language_next_state( | ||
| 64 | const TSLanguage *self, | ||
| 65 | TSStateId state, | ||
| 66 | TSSymbol symbol | ||
| 67 | ) { | ||
| 68 | if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { | ||
| 69 | return 0; | ||
| 70 | } else if (symbol < self->token_count) { | ||
| 71 | uint32_t count; | ||
| 72 | const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); | ||
| 73 | if (count > 0) { | ||
| 74 | TSParseAction action = actions[count - 1]; | ||
| 75 | if (action.type == TSParseActionTypeShift) { | ||
| 76 | return action.shift.extra ? state : action.shift.state; | ||
| 77 | } | ||
| 78 | } | ||
| 79 | return 0; | ||
| 80 | } else { | ||
| 81 | return ts_language_lookup(self, state, symbol); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | const char *ts_language_symbol_name( | ||
| 86 | const TSLanguage *self, | ||
| 87 | TSSymbol symbol | ||
| 88 | ) { | ||
| 89 | if (symbol == ts_builtin_sym_error) { | ||
| 90 | return "ERROR"; | ||
| 91 | } else if (symbol == ts_builtin_sym_error_repeat) { | ||
| 92 | return "_ERROR"; | ||
| 93 | } else if (symbol < ts_language_symbol_count(self)) { | ||
| 94 | return self->symbol_names[symbol]; | ||
| 95 | } else { | ||
| 96 | return NULL; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | TSSymbol ts_language_symbol_for_name( | ||
| 101 | const TSLanguage *self, | ||
| 102 | const char *string, | ||
| 103 | uint32_t length, | ||
| 104 | bool is_named | ||
| 105 | ) { | ||
| 106 | if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error; | ||
| 107 | uint16_t count = (uint16_t)ts_language_symbol_count(self); | ||
| 108 | for (TSSymbol i = 0; i < count; i++) { | ||
| 109 | TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); | ||
| 110 | if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; | ||
| 111 | const char *symbol_name = self->symbol_names[i]; | ||
| 112 | if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { | ||
| 113 | return self->public_symbol_map[i]; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | return 0; | ||
| 117 | } | ||
| 118 | |||
| 119 | TSSymbolType ts_language_symbol_type( | ||
| 120 | const TSLanguage *self, | ||
| 121 | TSSymbol symbol | ||
| 122 | ) { | ||
| 123 | TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); | ||
| 124 | if (metadata.named && metadata.visible) { | ||
| 125 | return TSSymbolTypeRegular; | ||
| 126 | } else if (metadata.visible) { | ||
| 127 | return TSSymbolTypeAnonymous; | ||
| 128 | } else { | ||
| 129 | return TSSymbolTypeAuxiliary; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | const char *ts_language_field_name_for_id( | ||
| 134 | const TSLanguage *self, | ||
| 135 | TSFieldId id | ||
| 136 | ) { | ||
| 137 | uint32_t count = ts_language_field_count(self); | ||
| 138 | if (count && id <= count) { | ||
| 139 | return self->field_names[id]; | ||
| 140 | } else { | ||
| 141 | return NULL; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | TSFieldId ts_language_field_id_for_name( | ||
| 146 | const TSLanguage *self, | ||
| 147 | const char *name, | ||
| 148 | uint32_t name_length | ||
| 149 | ) { | ||
| 150 | uint16_t count = (uint16_t)ts_language_field_count(self); | ||
| 151 | for (TSSymbol i = 1; i < count + 1; i++) { | ||
| 152 | switch (strncmp(name, self->field_names[i], name_length)) { | ||
| 153 | case 0: | ||
| 154 | if (self->field_names[i][name_length] == 0) return i; | ||
| 155 | break; | ||
| 156 | case -1: | ||
| 157 | return 0; | ||
| 158 | default: | ||
| 159 | break; | ||
| 160 | } | ||
| 161 | } | ||
| 162 | return 0; | ||
| 163 | } | ||
| 164 | |||
| 165 | TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) { | ||
| 166 | if (state >= self->state_count) return NULL; | ||
| 167 | LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator)); | ||
| 168 | *iterator = ts_language_lookaheads(self, state); | ||
| 169 | return (TSLookaheadIterator *)iterator; | ||
| 170 | } | ||
| 171 | |||
| 172 | void ts_lookahead_iterator_delete(TSLookaheadIterator *self) { | ||
| 173 | ts_free(self); | ||
| 174 | } | ||
| 175 | |||
| 176 | bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) { | ||
| 177 | LookaheadIterator *iterator = (LookaheadIterator *)self; | ||
| 178 | if (state >= iterator->language->state_count) return false; | ||
| 179 | *iterator = ts_language_lookaheads(iterator->language, state); | ||
| 180 | return true; | ||
| 181 | } | ||
| 182 | |||
| 183 | const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) { | ||
| 184 | const LookaheadIterator *iterator = (const LookaheadIterator *)self; | ||
| 185 | return iterator->language; | ||
| 186 | } | ||
| 187 | |||
| 188 | bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) { | ||
| 189 | if (state >= language->state_count) return false; | ||
| 190 | LookaheadIterator *iterator = (LookaheadIterator *)self; | ||
| 191 | *iterator = ts_language_lookaheads(language, state); | ||
| 192 | return true; | ||
| 193 | } | ||
| 194 | |||
| 195 | bool ts_lookahead_iterator_next(TSLookaheadIterator *self) { | ||
| 196 | LookaheadIterator *iterator = (LookaheadIterator *)self; | ||
| 197 | return ts_lookahead_iterator__next(iterator); | ||
| 198 | } | ||
| 199 | |||
| 200 | TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) { | ||
| 201 | const LookaheadIterator *iterator = (const LookaheadIterator *)self; | ||
| 202 | return iterator->symbol; | ||
| 203 | } | ||
| 204 | |||
| 205 | const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) { | ||
| 206 | const LookaheadIterator *iterator = (const LookaheadIterator *)self; | ||
| 207 | return ts_language_symbol_name(iterator->language, iterator->symbol); | ||
| 208 | } | ||
diff --git a/vendor/tree-sitter/lib/src/language.h b/vendor/tree-sitter/lib/src/language.h new file mode 100644 index 0000000..55b5d89 --- /dev/null +++ b/vendor/tree-sitter/lib/src/language.h | |||
| @@ -0,0 +1,296 @@ | |||
| 1 | #ifndef TREE_SITTER_LANGUAGE_H_ | ||
| 2 | #define TREE_SITTER_LANGUAGE_H_ | ||
| 3 | |||
| 4 | #ifdef __cplusplus | ||
| 5 | extern "C" { | ||
| 6 | #endif | ||
| 7 | |||
| 8 | #include "./subtree.h" | ||
| 9 | #include "tree_sitter/parser.h" | ||
| 10 | |||
| 11 | #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) | ||
| 12 | |||
| 13 | typedef struct { | ||
| 14 | const TSParseAction *actions; | ||
| 15 | uint32_t action_count; | ||
| 16 | bool is_reusable; | ||
| 17 | } TableEntry; | ||
| 18 | |||
| 19 | typedef struct { | ||
| 20 | const TSLanguage *language; | ||
| 21 | const uint16_t *data; | ||
| 22 | const uint16_t *group_end; | ||
| 23 | TSStateId state; | ||
| 24 | uint16_t table_value; | ||
| 25 | uint16_t section_index; | ||
| 26 | uint16_t group_count; | ||
| 27 | bool is_small_state; | ||
| 28 | |||
| 29 | const TSParseAction *actions; | ||
| 30 | TSSymbol symbol; | ||
| 31 | TSStateId next_state; | ||
| 32 | uint16_t action_count; | ||
| 33 | } LookaheadIterator; | ||
| 34 | |||
| 35 | void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *); | ||
| 36 | |||
| 37 | TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol); | ||
| 38 | |||
| 39 | TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol); | ||
| 40 | |||
| 41 | TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); | ||
| 42 | |||
| 43 | static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) { | ||
| 44 | return 0 < symbol && symbol < self->external_token_count + 1; | ||
| 45 | } | ||
| 46 | |||
| 47 | static inline const TSParseAction *ts_language_actions( | ||
| 48 | const TSLanguage *self, | ||
| 49 | TSStateId state, | ||
| 50 | TSSymbol symbol, | ||
| 51 | uint32_t *count | ||
| 52 | ) { | ||
| 53 | TableEntry entry; | ||
| 54 | ts_language_table_entry(self, state, symbol, &entry); | ||
| 55 | *count = entry.action_count; | ||
| 56 | return entry.actions; | ||
| 57 | } | ||
| 58 | |||
| 59 | static inline bool ts_language_has_reduce_action( | ||
| 60 | const TSLanguage *self, | ||
| 61 | TSStateId state, | ||
| 62 | TSSymbol symbol | ||
| 63 | ) { | ||
| 64 | TableEntry entry; | ||
| 65 | ts_language_table_entry(self, state, symbol, &entry); | ||
| 66 | return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce; | ||
| 67 | } | ||
| 68 | |||
| 69 | // Lookup the table value for a given symbol and state. | ||
| 70 | // | ||
| 71 | // For non-terminal symbols, the table value represents a successor state. | ||
| 72 | // For terminal symbols, it represents an index in the actions table. | ||
| 73 | // For 'large' parse states, this is a direct lookup. For 'small' parse | ||
| 74 | // states, this requires searching through the symbol groups to find | ||
| 75 | // the given symbol. | ||
| 76 | static inline uint16_t ts_language_lookup( | ||
| 77 | const TSLanguage *self, | ||
| 78 | TSStateId state, | ||
| 79 | TSSymbol symbol | ||
| 80 | ) { | ||
| 81 | if (state >= self->large_state_count) { | ||
| 82 | uint32_t index = self->small_parse_table_map[state - self->large_state_count]; | ||
| 83 | const uint16_t *data = &self->small_parse_table[index]; | ||
| 84 | uint16_t group_count = *(data++); | ||
| 85 | for (unsigned i = 0; i < group_count; i++) { | ||
| 86 | uint16_t section_value = *(data++); | ||
| 87 | uint16_t symbol_count = *(data++); | ||
| 88 | for (unsigned j = 0; j < symbol_count; j++) { | ||
| 89 | if (*(data++) == symbol) return section_value; | ||
| 90 | } | ||
| 91 | } | ||
| 92 | return 0; | ||
| 93 | } else { | ||
| 94 | return self->parse_table[state * self->symbol_count + symbol]; | ||
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 98 | static inline bool ts_language_has_actions( | ||
| 99 | const TSLanguage *self, | ||
| 100 | TSStateId state, | ||
| 101 | TSSymbol symbol | ||
| 102 | ) { | ||
| 103 | return ts_language_lookup(self, state, symbol) != 0; | ||
| 104 | } | ||
| 105 | |||
| 106 | // Iterate over all of the symbols that are valid in the given state. | ||
| 107 | // | ||
| 108 | // For 'large' parse states, this just requires iterating through | ||
| 109 | // all possible symbols and checking the parse table for each one. | ||
| 110 | // For 'small' parse states, this exploits the structure of the | ||
| 111 | // table to only visit the valid symbols. | ||
| 112 | static inline LookaheadIterator ts_language_lookaheads( | ||
| 113 | const TSLanguage *self, | ||
| 114 | TSStateId state | ||
| 115 | ) { | ||
| 116 | bool is_small_state = state >= self->large_state_count; | ||
| 117 | const uint16_t *data; | ||
| 118 | const uint16_t *group_end = NULL; | ||
| 119 | uint16_t group_count = 0; | ||
| 120 | if (is_small_state) { | ||
| 121 | uint32_t index = self->small_parse_table_map[state - self->large_state_count]; | ||
| 122 | data = &self->small_parse_table[index]; | ||
| 123 | group_end = data + 1; | ||
| 124 | group_count = *data; | ||
| 125 | } else { | ||
| 126 | data = &self->parse_table[state * self->symbol_count] - 1; | ||
| 127 | } | ||
| 128 | return (LookaheadIterator) { | ||
| 129 | .language = self, | ||
| 130 | .data = data, | ||
| 131 | .group_end = group_end, | ||
| 132 | .group_count = group_count, | ||
| 133 | .is_small_state = is_small_state, | ||
| 134 | .symbol = UINT16_MAX, | ||
| 135 | .next_state = 0, | ||
| 136 | }; | ||
| 137 | } | ||
| 138 | |||
| 139 | static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) { | ||
| 140 | // For small parse states, valid symbols are listed explicitly, | ||
| 141 | // grouped by their value. There's no need to look up the actions | ||
| 142 | // again until moving to the next group. | ||
| 143 | if (self->is_small_state) { | ||
| 144 | self->data++; | ||
| 145 | if (self->data == self->group_end) { | ||
| 146 | if (self->group_count == 0) return false; | ||
| 147 | self->group_count--; | ||
| 148 | self->table_value = *(self->data++); | ||
| 149 | unsigned symbol_count = *(self->data++); | ||
| 150 | self->group_end = self->data + symbol_count; | ||
| 151 | self->symbol = *self->data; | ||
| 152 | } else { | ||
| 153 | self->symbol = *self->data; | ||
| 154 | return true; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | // For large parse states, iterate through every symbol until one | ||
| 159 | // is found that has valid actions. | ||
| 160 | else { | ||
| 161 | do { | ||
| 162 | self->data++; | ||
| 163 | self->symbol++; | ||
| 164 | if (self->symbol >= self->language->symbol_count) return false; | ||
| 165 | self->table_value = *self->data; | ||
| 166 | } while (!self->table_value); | ||
| 167 | } | ||
| 168 | |||
| 169 | // Depending on if the symbols is terminal or non-terminal, the table value either | ||
| 170 | // represents a list of actions or a successor state. | ||
| 171 | if (self->symbol < self->language->token_count) { | ||
| 172 | const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value]; | ||
| 173 | self->action_count = entry->entry.count; | ||
| 174 | self->actions = (const TSParseAction *)(entry + 1); | ||
| 175 | self->next_state = 0; | ||
| 176 | } else { | ||
| 177 | self->action_count = 0; | ||
| 178 | self->next_state = self->table_value; | ||
| 179 | } | ||
| 180 | return true; | ||
| 181 | } | ||
| 182 | |||
| 183 | // Whether the state is a "primary state". If this returns false, it indicates that there exists | ||
| 184 | // another state that behaves identically to this one with respect to query analysis. | ||
| 185 | static inline bool ts_language_state_is_primary( | ||
| 186 | const TSLanguage *self, | ||
| 187 | TSStateId state | ||
| 188 | ) { | ||
| 189 | if (self->version >= 14) { | ||
| 190 | return state == self->primary_state_ids[state]; | ||
| 191 | } else { | ||
| 192 | return true; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | static inline const bool *ts_language_enabled_external_tokens( | ||
| 197 | const TSLanguage *self, | ||
| 198 | unsigned external_scanner_state | ||
| 199 | ) { | ||
| 200 | if (external_scanner_state == 0) { | ||
| 201 | return NULL; | ||
| 202 | } else { | ||
| 203 | return self->external_scanner.states + self->external_token_count * external_scanner_state; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | static inline const TSSymbol *ts_language_alias_sequence( | ||
| 208 | const TSLanguage *self, | ||
| 209 | uint32_t production_id | ||
| 210 | ) { | ||
| 211 | return production_id ? | ||
| 212 | &self->alias_sequences[production_id * self->max_alias_sequence_length] : | ||
| 213 | NULL; | ||
| 214 | } | ||
| 215 | |||
| 216 | static inline TSSymbol ts_language_alias_at( | ||
| 217 | const TSLanguage *self, | ||
| 218 | uint32_t production_id, | ||
| 219 | uint32_t child_index | ||
| 220 | ) { | ||
| 221 | return production_id ? | ||
| 222 | self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] : | ||
| 223 | 0; | ||
| 224 | } | ||
| 225 | |||
| 226 | static inline void ts_language_field_map( | ||
| 227 | const TSLanguage *self, | ||
| 228 | uint32_t production_id, | ||
| 229 | const TSFieldMapEntry **start, | ||
| 230 | const TSFieldMapEntry **end | ||
| 231 | ) { | ||
| 232 | if (self->field_count == 0) { | ||
| 233 | *start = NULL; | ||
| 234 | *end = NULL; | ||
| 235 | return; | ||
| 236 | } | ||
| 237 | |||
| 238 | TSFieldMapSlice slice = self->field_map_slices[production_id]; | ||
| 239 | *start = &self->field_map_entries[slice.index]; | ||
| 240 | *end = &self->field_map_entries[slice.index] + slice.length; | ||
| 241 | } | ||
| 242 | |||
| 243 | static inline void ts_language_aliases_for_symbol( | ||
| 244 | const TSLanguage *self, | ||
| 245 | TSSymbol original_symbol, | ||
| 246 | const TSSymbol **start, | ||
| 247 | const TSSymbol **end | ||
| 248 | ) { | ||
| 249 | *start = &self->public_symbol_map[original_symbol]; | ||
| 250 | *end = *start + 1; | ||
| 251 | |||
| 252 | unsigned idx = 0; | ||
| 253 | for (;;) { | ||
| 254 | TSSymbol symbol = self->alias_map[idx++]; | ||
| 255 | if (symbol == 0 || symbol > original_symbol) break; | ||
| 256 | uint16_t count = self->alias_map[idx++]; | ||
| 257 | if (symbol == original_symbol) { | ||
| 258 | *start = &self->alias_map[idx]; | ||
| 259 | *end = &self->alias_map[idx + count]; | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | idx += count; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | static inline void ts_language_write_symbol_as_dot_string( | ||
| 267 | const TSLanguage *self, | ||
| 268 | FILE *f, | ||
| 269 | TSSymbol symbol | ||
| 270 | ) { | ||
| 271 | const char *name = ts_language_symbol_name(self, symbol); | ||
| 272 | for (const char *chr = name; *chr; chr++) { | ||
| 273 | switch (*chr) { | ||
| 274 | case '"': | ||
| 275 | case '\\': | ||
| 276 | fputc('\\', f); | ||
| 277 | fputc(*chr, f); | ||
| 278 | break; | ||
| 279 | case '\n': | ||
| 280 | fputs("\\n", f); | ||
| 281 | break; | ||
| 282 | case '\t': | ||
| 283 | fputs("\\t", f); | ||
| 284 | break; | ||
| 285 | default: | ||
| 286 | fputc(*chr, f); | ||
| 287 | break; | ||
| 288 | } | ||
| 289 | } | ||
| 290 | } | ||
| 291 | |||
| 292 | #ifdef __cplusplus | ||
| 293 | } | ||
| 294 | #endif | ||
| 295 | |||
| 296 | #endif // TREE_SITTER_LANGUAGE_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/length.h b/vendor/tree-sitter/lib/src/length.h new file mode 100644 index 0000000..42d61ef --- /dev/null +++ b/vendor/tree-sitter/lib/src/length.h | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | #ifndef TREE_SITTER_LENGTH_H_ | ||
| 2 | #define TREE_SITTER_LENGTH_H_ | ||
| 3 | |||
| 4 | #include <stdlib.h> | ||
| 5 | #include <stdbool.h> | ||
| 6 | #include "./point.h" | ||
| 7 | #include "tree_sitter/api.h" | ||
| 8 | |||
| 9 | typedef struct { | ||
| 10 | uint32_t bytes; | ||
| 11 | TSPoint extent; | ||
| 12 | } Length; | ||
| 13 | |||
| 14 | static const Length LENGTH_UNDEFINED = {0, {0, 1}}; | ||
| 15 | static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}}; | ||
| 16 | |||
| 17 | static inline bool length_is_undefined(Length length) { | ||
| 18 | return length.bytes == 0 && length.extent.column != 0; | ||
| 19 | } | ||
| 20 | |||
| 21 | static inline Length length_min(Length len1, Length len2) { | ||
| 22 | return (len1.bytes < len2.bytes) ? len1 : len2; | ||
| 23 | } | ||
| 24 | |||
| 25 | static inline Length length_add(Length len1, Length len2) { | ||
| 26 | Length result; | ||
| 27 | result.bytes = len1.bytes + len2.bytes; | ||
| 28 | result.extent = point_add(len1.extent, len2.extent); | ||
| 29 | return result; | ||
| 30 | } | ||
| 31 | |||
| 32 | static inline Length length_sub(Length len1, Length len2) { | ||
| 33 | Length result; | ||
| 34 | result.bytes = len1.bytes - len2.bytes; | ||
| 35 | result.extent = point_sub(len1.extent, len2.extent); | ||
| 36 | return result; | ||
| 37 | } | ||
| 38 | |||
| 39 | static inline Length length_zero(void) { | ||
| 40 | Length result = {0, {0, 0}}; | ||
| 41 | return result; | ||
| 42 | } | ||
| 43 | |||
| 44 | static inline Length length_saturating_sub(Length len1, Length len2) { | ||
| 45 | if (len1.bytes > len2.bytes) { | ||
| 46 | return length_sub(len1, len2); | ||
| 47 | } else { | ||
| 48 | return length_zero(); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | #endif | ||
diff --git a/vendor/tree-sitter/lib/src/lexer.c b/vendor/tree-sitter/lib/src/lexer.c new file mode 100644 index 0000000..d108c04 --- /dev/null +++ b/vendor/tree-sitter/lib/src/lexer.c | |||
| @@ -0,0 +1,419 @@ | |||
| 1 | #include <stdio.h> | ||
| 2 | #include "./lexer.h" | ||
| 3 | #include "./subtree.h" | ||
| 4 | #include "./length.h" | ||
| 5 | #include "./unicode.h" | ||
| 6 | |||
| 7 | #define LOG(message, character) \ | ||
| 8 | if (self->logger.log) { \ | ||
| 9 | snprintf( \ | ||
| 10 | self->debug_buffer, \ | ||
| 11 | TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \ | ||
| 12 | 32 <= character && character < 127 ? \ | ||
| 13 | message " character:'%c'" : \ | ||
| 14 | message " character:%d", \ | ||
| 15 | character \ | ||
| 16 | ); \ | ||
| 17 | self->logger.log( \ | ||
| 18 | self->logger.payload, \ | ||
| 19 | TSLogTypeLex, \ | ||
| 20 | self->debug_buffer \ | ||
| 21 | ); \ | ||
| 22 | } | ||
| 23 | |||
| 24 | static const int32_t BYTE_ORDER_MARK = 0xFEFF; | ||
| 25 | |||
| 26 | static const TSRange DEFAULT_RANGE = { | ||
| 27 | .start_point = { | ||
| 28 | .row = 0, | ||
| 29 | .column = 0, | ||
| 30 | }, | ||
| 31 | .end_point = { | ||
| 32 | .row = UINT32_MAX, | ||
| 33 | .column = UINT32_MAX, | ||
| 34 | }, | ||
| 35 | .start_byte = 0, | ||
| 36 | .end_byte = UINT32_MAX | ||
| 37 | }; | ||
| 38 | |||
| 39 | // Check if the lexer has reached EOF. This state is stored | ||
| 40 | // by setting the lexer's `current_included_range_index` such that | ||
| 41 | // it has consumed all of its available ranges. | ||
| 42 | static bool ts_lexer__eof(const TSLexer *_self) { | ||
| 43 | Lexer *self = (Lexer *)_self; | ||
| 44 | return self->current_included_range_index == self->included_range_count; | ||
| 45 | } | ||
| 46 | |||
| 47 | // Clear the currently stored chunk of source code, because the lexer's | ||
| 48 | // position has changed. | ||
| 49 | static void ts_lexer__clear_chunk(Lexer *self) { | ||
| 50 | self->chunk = NULL; | ||
| 51 | self->chunk_size = 0; | ||
| 52 | self->chunk_start = 0; | ||
| 53 | } | ||
| 54 | |||
| 55 | // Call the lexer's input callback to obtain a new chunk of source code | ||
| 56 | // for the current position. | ||
| 57 | static void ts_lexer__get_chunk(Lexer *self) { | ||
| 58 | self->chunk_start = self->current_position.bytes; | ||
| 59 | self->chunk = self->input.read( | ||
| 60 | self->input.payload, | ||
| 61 | self->current_position.bytes, | ||
| 62 | self->current_position.extent, | ||
| 63 | &self->chunk_size | ||
| 64 | ); | ||
| 65 | if (!self->chunk_size) { | ||
| 66 | self->current_included_range_index = self->included_range_count; | ||
| 67 | self->chunk = NULL; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | // Decode the next unicode character in the current chunk of source code. | ||
| 72 | // This assumes that the lexer has already retrieved a chunk of source | ||
| 73 | // code that spans the current position. | ||
| 74 | static void ts_lexer__get_lookahead(Lexer *self) { | ||
| 75 | uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start; | ||
| 76 | uint32_t size = self->chunk_size - position_in_chunk; | ||
| 77 | |||
| 78 | if (size == 0) { | ||
| 79 | self->lookahead_size = 1; | ||
| 80 | self->data.lookahead = '\0'; | ||
| 81 | return; | ||
| 82 | } | ||
| 83 | |||
| 84 | const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; | ||
| 85 | UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8 | ||
| 86 | ? ts_decode_utf8 | ||
| 87 | : ts_decode_utf16; | ||
| 88 | |||
| 89 | self->lookahead_size = decode(chunk, size, &self->data.lookahead); | ||
| 90 | |||
| 91 | // If this chunk ended in the middle of a multi-byte character, | ||
| 92 | // try again with a fresh chunk. | ||
| 93 | if (self->data.lookahead == TS_DECODE_ERROR && size < 4) { | ||
| 94 | ts_lexer__get_chunk(self); | ||
| 95 | chunk = (const uint8_t *)self->chunk; | ||
| 96 | size = self->chunk_size; | ||
| 97 | self->lookahead_size = decode(chunk, size, &self->data.lookahead); | ||
| 98 | } | ||
| 99 | |||
| 100 | if (self->data.lookahead == TS_DECODE_ERROR) { | ||
| 101 | self->lookahead_size = 1; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | static void ts_lexer_goto(Lexer *self, Length position) { | ||
| 106 | self->current_position = position; | ||
| 107 | |||
| 108 | // Move to the first valid position at or after the given position. | ||
| 109 | bool found_included_range = false; | ||
| 110 | for (unsigned i = 0; i < self->included_range_count; i++) { | ||
| 111 | TSRange *included_range = &self->included_ranges[i]; | ||
| 112 | if ( | ||
| 113 | included_range->end_byte > self->current_position.bytes && | ||
| 114 | included_range->end_byte > included_range->start_byte | ||
| 115 | ) { | ||
| 116 | if (included_range->start_byte >= self->current_position.bytes) { | ||
| 117 | self->current_position = (Length) { | ||
| 118 | .bytes = included_range->start_byte, | ||
| 119 | .extent = included_range->start_point, | ||
| 120 | }; | ||
| 121 | } | ||
| 122 | |||
| 123 | self->current_included_range_index = i; | ||
| 124 | found_included_range = true; | ||
| 125 | break; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | if (found_included_range) { | ||
| 130 | // If the current position is outside of the current chunk of text, | ||
| 131 | // then clear out the current chunk of text. | ||
| 132 | if (self->chunk && ( | ||
| 133 | self->current_position.bytes < self->chunk_start || | ||
| 134 | self->current_position.bytes >= self->chunk_start + self->chunk_size | ||
| 135 | )) { | ||
| 136 | ts_lexer__clear_chunk(self); | ||
| 137 | } | ||
| 138 | |||
| 139 | self->lookahead_size = 0; | ||
| 140 | self->data.lookahead = '\0'; | ||
| 141 | } | ||
| 142 | |||
| 143 | // If the given position is beyond any of included ranges, move to the EOF | ||
| 144 | // state - past the end of the included ranges. | ||
| 145 | else { | ||
| 146 | self->current_included_range_index = self->included_range_count; | ||
| 147 | TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; | ||
| 148 | self->current_position = (Length) { | ||
| 149 | .bytes = last_included_range->end_byte, | ||
| 150 | .extent = last_included_range->end_point, | ||
| 151 | }; | ||
| 152 | ts_lexer__clear_chunk(self); | ||
| 153 | self->lookahead_size = 1; | ||
| 154 | self->data.lookahead = '\0'; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | // Intended to be called only from functions that control logging. | ||
| 159 | static void ts_lexer__do_advance(Lexer *self, bool skip) { | ||
| 160 | if (self->lookahead_size) { | ||
| 161 | self->current_position.bytes += self->lookahead_size; | ||
| 162 | if (self->data.lookahead == '\n') { | ||
| 163 | self->current_position.extent.row++; | ||
| 164 | self->current_position.extent.column = 0; | ||
| 165 | } else { | ||
| 166 | self->current_position.extent.column += self->lookahead_size; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | const TSRange *current_range = &self->included_ranges[self->current_included_range_index]; | ||
| 171 | while ( | ||
| 172 | self->current_position.bytes >= current_range->end_byte || | ||
| 173 | current_range->end_byte == current_range->start_byte | ||
| 174 | ) { | ||
| 175 | if (self->current_included_range_index < self->included_range_count) { | ||
| 176 | self->current_included_range_index++; | ||
| 177 | } | ||
| 178 | if (self->current_included_range_index < self->included_range_count) { | ||
| 179 | current_range++; | ||
| 180 | self->current_position = (Length) { | ||
| 181 | current_range->start_byte, | ||
| 182 | current_range->start_point, | ||
| 183 | }; | ||
| 184 | } else { | ||
| 185 | current_range = NULL; | ||
| 186 | break; | ||
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | if (skip) self->token_start_position = self->current_position; | ||
| 191 | |||
| 192 | if (current_range) { | ||
| 193 | if ( | ||
| 194 | self->current_position.bytes < self->chunk_start || | ||
| 195 | self->current_position.bytes >= self->chunk_start + self->chunk_size | ||
| 196 | ) { | ||
| 197 | ts_lexer__get_chunk(self); | ||
| 198 | } | ||
| 199 | ts_lexer__get_lookahead(self); | ||
| 200 | } else { | ||
| 201 | ts_lexer__clear_chunk(self); | ||
| 202 | self->data.lookahead = '\0'; | ||
| 203 | self->lookahead_size = 1; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | // Advance to the next character in the source code, retrieving a new | ||
| 208 | // chunk of source code if needed. | ||
| 209 | static void ts_lexer__advance(TSLexer *_self, bool skip) { | ||
| 210 | Lexer *self = (Lexer *)_self; | ||
| 211 | if (!self->chunk) return; | ||
| 212 | |||
| 213 | if (skip) { | ||
| 214 | LOG("skip", self->data.lookahead) | ||
| 215 | } else { | ||
| 216 | LOG("consume", self->data.lookahead) | ||
| 217 | } | ||
| 218 | |||
| 219 | ts_lexer__do_advance(self, skip); | ||
| 220 | } | ||
| 221 | |||
| 222 | // Mark that a token match has completed. This can be called multiple | ||
| 223 | // times if a longer match is found later. | ||
| 224 | static void ts_lexer__mark_end(TSLexer *_self) { | ||
| 225 | Lexer *self = (Lexer *)_self; | ||
| 226 | if (!ts_lexer__eof(&self->data)) { | ||
| 227 | // If the lexer is right at the beginning of included range, | ||
| 228 | // then the token should be considered to end at the *end* of the | ||
| 229 | // previous included range, rather than here. | ||
| 230 | TSRange *current_included_range = &self->included_ranges[ | ||
| 231 | self->current_included_range_index | ||
| 232 | ]; | ||
| 233 | if ( | ||
| 234 | self->current_included_range_index > 0 && | ||
| 235 | self->current_position.bytes == current_included_range->start_byte | ||
| 236 | ) { | ||
| 237 | TSRange *previous_included_range = current_included_range - 1; | ||
| 238 | self->token_end_position = (Length) { | ||
| 239 | previous_included_range->end_byte, | ||
| 240 | previous_included_range->end_point, | ||
| 241 | }; | ||
| 242 | return; | ||
| 243 | } | ||
| 244 | } | ||
| 245 | self->token_end_position = self->current_position; | ||
| 246 | } | ||
| 247 | |||
| 248 | static uint32_t ts_lexer__get_column(TSLexer *_self) { | ||
| 249 | Lexer *self = (Lexer *)_self; | ||
| 250 | |||
| 251 | uint32_t goal_byte = self->current_position.bytes; | ||
| 252 | |||
| 253 | self->did_get_column = true; | ||
| 254 | self->current_position.bytes -= self->current_position.extent.column; | ||
| 255 | self->current_position.extent.column = 0; | ||
| 256 | |||
| 257 | if (self->current_position.bytes < self->chunk_start) { | ||
| 258 | ts_lexer__get_chunk(self); | ||
| 259 | } | ||
| 260 | |||
| 261 | uint32_t result = 0; | ||
| 262 | if (!ts_lexer__eof(_self)) { | ||
| 263 | ts_lexer__get_lookahead(self); | ||
| 264 | while (self->current_position.bytes < goal_byte && self->chunk) { | ||
| 265 | result++; | ||
| 266 | ts_lexer__do_advance(self, false); | ||
| 267 | if (ts_lexer__eof(_self)) break; | ||
| 268 | } | ||
| 269 | } | ||
| 270 | |||
| 271 | return result; | ||
| 272 | } | ||
| 273 | |||
| 274 | // Is the lexer at a boundary between two disjoint included ranges of | ||
| 275 | // source code? This is exposed as an API because some languages' external | ||
| 276 | // scanners need to perform custom actions at these boundaries. | ||
| 277 | static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) { | ||
| 278 | const Lexer *self = (const Lexer *)_self; | ||
| 279 | if (self->current_included_range_index < self->included_range_count) { | ||
| 280 | TSRange *current_range = &self->included_ranges[self->current_included_range_index]; | ||
| 281 | return self->current_position.bytes == current_range->start_byte; | ||
| 282 | } else { | ||
| 283 | return false; | ||
| 284 | } | ||
| 285 | } | ||
| 286 | |||
| 287 | void ts_lexer_init(Lexer *self) { | ||
| 288 | *self = (Lexer) { | ||
| 289 | .data = { | ||
| 290 | // The lexer's methods are stored as struct fields so that generated | ||
| 291 | // parsers can call them without needing to be linked against this | ||
| 292 | // library. | ||
| 293 | .advance = ts_lexer__advance, | ||
| 294 | .mark_end = ts_lexer__mark_end, | ||
| 295 | .get_column = ts_lexer__get_column, | ||
| 296 | .is_at_included_range_start = ts_lexer__is_at_included_range_start, | ||
| 297 | .eof = ts_lexer__eof, | ||
| 298 | .lookahead = 0, | ||
| 299 | .result_symbol = 0, | ||
| 300 | }, | ||
| 301 | .chunk = NULL, | ||
| 302 | .chunk_size = 0, | ||
| 303 | .chunk_start = 0, | ||
| 304 | .current_position = {0, {0, 0}}, | ||
| 305 | .logger = { | ||
| 306 | .payload = NULL, | ||
| 307 | .log = NULL | ||
| 308 | }, | ||
| 309 | .included_ranges = NULL, | ||
| 310 | .included_range_count = 0, | ||
| 311 | .current_included_range_index = 0, | ||
| 312 | }; | ||
| 313 | ts_lexer_set_included_ranges(self, NULL, 0); | ||
| 314 | } | ||
| 315 | |||
| 316 | void ts_lexer_delete(Lexer *self) { | ||
| 317 | ts_free(self->included_ranges); | ||
| 318 | } | ||
| 319 | |||
| 320 | void ts_lexer_set_input(Lexer *self, TSInput input) { | ||
| 321 | self->input = input; | ||
| 322 | ts_lexer__clear_chunk(self); | ||
| 323 | ts_lexer_goto(self, self->current_position); | ||
| 324 | } | ||
| 325 | |||
| 326 | // Move the lexer to the given position. This doesn't do any work | ||
| 327 | // if the parser is already at the given position. | ||
| 328 | void ts_lexer_reset(Lexer *self, Length position) { | ||
| 329 | if (position.bytes != self->current_position.bytes) { | ||
| 330 | ts_lexer_goto(self, position); | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 334 | void ts_lexer_start(Lexer *self) { | ||
| 335 | self->token_start_position = self->current_position; | ||
| 336 | self->token_end_position = LENGTH_UNDEFINED; | ||
| 337 | self->data.result_symbol = 0; | ||
| 338 | self->did_get_column = false; | ||
| 339 | if (!ts_lexer__eof(&self->data)) { | ||
| 340 | if (!self->chunk_size) ts_lexer__get_chunk(self); | ||
| 341 | if (!self->lookahead_size) ts_lexer__get_lookahead(self); | ||
| 342 | if ( | ||
| 343 | self->current_position.bytes == 0 && | ||
| 344 | self->data.lookahead == BYTE_ORDER_MARK | ||
| 345 | ) ts_lexer__advance(&self->data, true); | ||
| 346 | } | ||
| 347 | } | ||
| 348 | |||
| 349 | void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) { | ||
| 350 | if (length_is_undefined(self->token_end_position)) { | ||
| 351 | ts_lexer__mark_end(&self->data); | ||
| 352 | } | ||
| 353 | |||
| 354 | // If the token ended at an included range boundary, then its end position | ||
| 355 | // will have been reset to the end of the preceding range. Reset the start | ||
| 356 | // position to match. | ||
| 357 | if (self->token_end_position.bytes < self->token_start_position.bytes) { | ||
| 358 | self->token_start_position = self->token_end_position; | ||
| 359 | } | ||
| 360 | |||
| 361 | uint32_t current_lookahead_end_byte = self->current_position.bytes + 1; | ||
| 362 | |||
| 363 | // In order to determine that a byte sequence is invalid UTF8 or UTF16, | ||
| 364 | // the character decoding algorithm may have looked at the following byte. | ||
| 365 | // Therefore, the next byte *after* the current (invalid) character | ||
| 366 | // affects the interpretation of the current character. | ||
| 367 | if (self->data.lookahead == TS_DECODE_ERROR) { | ||
| 368 | current_lookahead_end_byte++; | ||
| 369 | } | ||
| 370 | |||
| 371 | if (current_lookahead_end_byte > *lookahead_end_byte) { | ||
| 372 | *lookahead_end_byte = current_lookahead_end_byte; | ||
| 373 | } | ||
| 374 | } | ||
| 375 | |||
| 376 | void ts_lexer_advance_to_end(Lexer *self) { | ||
| 377 | while (self->chunk) { | ||
| 378 | ts_lexer__advance(&self->data, false); | ||
| 379 | } | ||
| 380 | } | ||
| 381 | |||
| 382 | void ts_lexer_mark_end(Lexer *self) { | ||
| 383 | ts_lexer__mark_end(&self->data); | ||
| 384 | } | ||
| 385 | |||
| 386 | bool ts_lexer_set_included_ranges( | ||
| 387 | Lexer *self, | ||
| 388 | const TSRange *ranges, | ||
| 389 | uint32_t count | ||
| 390 | ) { | ||
| 391 | if (count == 0 || !ranges) { | ||
| 392 | ranges = &DEFAULT_RANGE; | ||
| 393 | count = 1; | ||
| 394 | } else { | ||
| 395 | uint32_t previous_byte = 0; | ||
| 396 | for (unsigned i = 0; i < count; i++) { | ||
| 397 | const TSRange *range = &ranges[i]; | ||
| 398 | if ( | ||
| 399 | range->start_byte < previous_byte || | ||
| 400 | range->end_byte < range->start_byte | ||
| 401 | ) return false; | ||
| 402 | previous_byte = range->end_byte; | ||
| 403 | } | ||
| 404 | } | ||
| 405 | |||
| 406 | size_t size = count * sizeof(TSRange); | ||
| 407 | self->included_ranges = ts_realloc(self->included_ranges, size); | ||
| 408 | memcpy(self->included_ranges, ranges, size); | ||
| 409 | self->included_range_count = count; | ||
| 410 | ts_lexer_goto(self, self->current_position); | ||
| 411 | return true; | ||
| 412 | } | ||
| 413 | |||
| 414 | TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) { | ||
| 415 | *count = self->included_range_count; | ||
| 416 | return self->included_ranges; | ||
| 417 | } | ||
| 418 | |||
| 419 | #undef LOG | ||
diff --git a/vendor/tree-sitter/lib/src/lexer.h b/vendor/tree-sitter/lib/src/lexer.h new file mode 100644 index 0000000..c1a5bfd --- /dev/null +++ b/vendor/tree-sitter/lib/src/lexer.h | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | #ifndef TREE_SITTER_LEXER_H_ | ||
| 2 | #define TREE_SITTER_LEXER_H_ | ||
| 3 | |||
| 4 | #ifdef __cplusplus | ||
| 5 | extern "C" { | ||
| 6 | #endif | ||
| 7 | |||
| 8 | #include "./length.h" | ||
| 9 | #include "./subtree.h" | ||
| 10 | #include "tree_sitter/api.h" | ||
| 11 | #include "tree_sitter/parser.h" | ||
| 12 | |||
| 13 | typedef struct { | ||
| 14 | TSLexer data; | ||
| 15 | Length current_position; | ||
| 16 | Length token_start_position; | ||
| 17 | Length token_end_position; | ||
| 18 | |||
| 19 | TSRange *included_ranges; | ||
| 20 | const char *chunk; | ||
| 21 | TSInput input; | ||
| 22 | TSLogger logger; | ||
| 23 | |||
| 24 | uint32_t included_range_count; | ||
| 25 | uint32_t current_included_range_index; | ||
| 26 | uint32_t chunk_start; | ||
| 27 | uint32_t chunk_size; | ||
| 28 | uint32_t lookahead_size; | ||
| 29 | bool did_get_column; | ||
| 30 | |||
| 31 | char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; | ||
| 32 | } Lexer; | ||
| 33 | |||
| 34 | void ts_lexer_init(Lexer *); | ||
| 35 | void ts_lexer_delete(Lexer *); | ||
| 36 | void ts_lexer_set_input(Lexer *, TSInput); | ||
| 37 | void ts_lexer_reset(Lexer *, Length); | ||
| 38 | void ts_lexer_start(Lexer *); | ||
| 39 | void ts_lexer_finish(Lexer *, uint32_t *); | ||
| 40 | void ts_lexer_advance_to_end(Lexer *); | ||
| 41 | void ts_lexer_mark_end(Lexer *); | ||
| 42 | bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count); | ||
| 43 | TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count); | ||
| 44 | |||
| 45 | #ifdef __cplusplus | ||
| 46 | } | ||
| 47 | #endif | ||
| 48 | |||
| 49 | #endif // TREE_SITTER_LEXER_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/lib.c b/vendor/tree-sitter/lib/src/lib.c new file mode 100644 index 0000000..5aab20d --- /dev/null +++ b/vendor/tree-sitter/lib/src/lib.c | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | // The Tree-sitter library can be built by compiling this one source file. | ||
| 2 | // | ||
| 3 | // The following directories must be added to the include path: | ||
| 4 | // - include | ||
| 5 | |||
| 6 | #define _POSIX_C_SOURCE 200112L | ||
| 7 | |||
| 8 | #include "./alloc.c" | ||
| 9 | #include "./get_changed_ranges.c" | ||
| 10 | #include "./language.c" | ||
| 11 | #include "./lexer.c" | ||
| 12 | #include "./node.c" | ||
| 13 | #include "./parser.c" | ||
| 14 | #include "./query.c" | ||
| 15 | #include "./stack.c" | ||
| 16 | #include "./subtree.c" | ||
| 17 | #include "./tree_cursor.c" | ||
| 18 | #include "./tree.c" | ||
diff --git a/vendor/tree-sitter/lib/src/node.c b/vendor/tree-sitter/lib/src/node.c new file mode 100644 index 0000000..546b909 --- /dev/null +++ b/vendor/tree-sitter/lib/src/node.c | |||
| @@ -0,0 +1,767 @@ | |||
| 1 | #include <stdbool.h> | ||
| 2 | #include "./subtree.h" | ||
| 3 | #include "./tree.h" | ||
| 4 | #include "./language.h" | ||
| 5 | |||
| 6 | typedef struct { | ||
| 7 | Subtree parent; | ||
| 8 | const TSTree *tree; | ||
| 9 | Length position; | ||
| 10 | uint32_t child_index; | ||
| 11 | uint32_t structural_child_index; | ||
| 12 | const TSSymbol *alias_sequence; | ||
| 13 | } NodeChildIterator; | ||
| 14 | |||
| 15 | // TSNode - constructors | ||
| 16 | |||
| 17 | TSNode ts_node_new( | ||
| 18 | const TSTree *tree, | ||
| 19 | const Subtree *subtree, | ||
| 20 | Length position, | ||
| 21 | TSSymbol alias | ||
| 22 | ) { | ||
| 23 | return (TSNode) { | ||
| 24 | {position.bytes, position.extent.row, position.extent.column, alias}, | ||
| 25 | subtree, | ||
| 26 | tree, | ||
| 27 | }; | ||
| 28 | } | ||
| 29 | |||
| 30 | static inline TSNode ts_node__null(void) { | ||
| 31 | return ts_node_new(NULL, NULL, length_zero(), 0); | ||
| 32 | } | ||
| 33 | |||
| 34 | // TSNode - accessors | ||
| 35 | |||
| 36 | uint32_t ts_node_start_byte(TSNode self) { | ||
| 37 | return self.context[0]; | ||
| 38 | } | ||
| 39 | |||
| 40 | TSPoint ts_node_start_point(TSNode self) { | ||
| 41 | return (TSPoint) {self.context[1], self.context[2]}; | ||
| 42 | } | ||
| 43 | |||
| 44 | static inline uint32_t ts_node__alias(const TSNode *self) { | ||
| 45 | return self->context[3]; | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline Subtree ts_node__subtree(TSNode self) { | ||
| 49 | return *(const Subtree *)self.id; | ||
| 50 | } | ||
| 51 | |||
| 52 | // NodeChildIterator | ||
| 53 | |||
| 54 | static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { | ||
| 55 | Subtree subtree = ts_node__subtree(*node); | ||
| 56 | if (ts_subtree_child_count(subtree) == 0) { | ||
| 57 | return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; | ||
| 58 | } | ||
| 59 | const TSSymbol *alias_sequence = ts_language_alias_sequence( | ||
| 60 | node->tree->language, | ||
| 61 | subtree.ptr->production_id | ||
| 62 | ); | ||
| 63 | return (NodeChildIterator) { | ||
| 64 | .tree = node->tree, | ||
| 65 | .parent = subtree, | ||
| 66 | .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, | ||
| 67 | .child_index = 0, | ||
| 68 | .structural_child_index = 0, | ||
| 69 | .alias_sequence = alias_sequence, | ||
| 70 | }; | ||
| 71 | } | ||
| 72 | |||
| 73 | static inline bool ts_node_child_iterator_done(NodeChildIterator *self) { | ||
| 74 | return self->child_index == self->parent.ptr->child_count; | ||
| 75 | } | ||
| 76 | |||
| 77 | static inline bool ts_node_child_iterator_next( | ||
| 78 | NodeChildIterator *self, | ||
| 79 | TSNode *result | ||
| 80 | ) { | ||
| 81 | if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; | ||
| 82 | const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; | ||
| 83 | TSSymbol alias_symbol = 0; | ||
| 84 | if (!ts_subtree_extra(*child)) { | ||
| 85 | if (self->alias_sequence) { | ||
| 86 | alias_symbol = self->alias_sequence[self->structural_child_index]; | ||
| 87 | } | ||
| 88 | self->structural_child_index++; | ||
| 89 | } | ||
| 90 | if (self->child_index > 0) { | ||
| 91 | self->position = length_add(self->position, ts_subtree_padding(*child)); | ||
| 92 | } | ||
| 93 | *result = ts_node_new( | ||
| 94 | self->tree, | ||
| 95 | child, | ||
| 96 | self->position, | ||
| 97 | alias_symbol | ||
| 98 | ); | ||
| 99 | self->position = length_add(self->position, ts_subtree_size(*child)); | ||
| 100 | self->child_index++; | ||
| 101 | return true; | ||
| 102 | } | ||
| 103 | |||
| 104 | // TSNode - private | ||
| 105 | |||
| 106 | static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { | ||
| 107 | Subtree tree = ts_node__subtree(self); | ||
| 108 | if (include_anonymous) { | ||
| 109 | return ts_subtree_visible(tree) || ts_node__alias(&self); | ||
| 110 | } else { | ||
| 111 | TSSymbol alias = ts_node__alias(&self); | ||
| 112 | if (alias) { | ||
| 113 | return ts_language_symbol_metadata(self.tree->language, alias).named; | ||
| 114 | } else { | ||
| 115 | return ts_subtree_visible(tree) && ts_subtree_named(tree); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | static inline uint32_t ts_node__relevant_child_count( | ||
| 121 | TSNode self, | ||
| 122 | bool include_anonymous | ||
| 123 | ) { | ||
| 124 | Subtree tree = ts_node__subtree(self); | ||
| 125 | if (ts_subtree_child_count(tree) > 0) { | ||
| 126 | if (include_anonymous) { | ||
| 127 | return tree.ptr->visible_child_count; | ||
| 128 | } else { | ||
| 129 | return tree.ptr->named_child_count; | ||
| 130 | } | ||
| 131 | } else { | ||
| 132 | return 0; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | static inline TSNode ts_node__child( | ||
| 137 | TSNode self, | ||
| 138 | uint32_t child_index, | ||
| 139 | bool include_anonymous | ||
| 140 | ) { | ||
| 141 | TSNode result = self; | ||
| 142 | bool did_descend = true; | ||
| 143 | |||
| 144 | while (did_descend) { | ||
| 145 | did_descend = false; | ||
| 146 | |||
| 147 | TSNode child; | ||
| 148 | uint32_t index = 0; | ||
| 149 | NodeChildIterator iterator = ts_node_iterate_children(&result); | ||
| 150 | while (ts_node_child_iterator_next(&iterator, &child)) { | ||
| 151 | if (ts_node__is_relevant(child, include_anonymous)) { | ||
| 152 | if (index == child_index) { | ||
| 153 | return child; | ||
| 154 | } | ||
| 155 | index++; | ||
| 156 | } else { | ||
| 157 | uint32_t grandchild_index = child_index - index; | ||
| 158 | uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous); | ||
| 159 | if (grandchild_index < grandchild_count) { | ||
| 160 | did_descend = true; | ||
| 161 | result = child; | ||
| 162 | child_index = grandchild_index; | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | index += grandchild_count; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | return ts_node__null(); | ||
| 171 | } | ||
| 172 | |||
| 173 | static bool ts_subtree_has_trailing_empty_descendant( | ||
| 174 | Subtree self, | ||
| 175 | Subtree other | ||
| 176 | ) { | ||
| 177 | for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) { | ||
| 178 | Subtree child = ts_subtree_children(self)[i]; | ||
| 179 | if (ts_subtree_total_bytes(child) > 0) break; | ||
| 180 | if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) { | ||
| 181 | return true; | ||
| 182 | } | ||
| 183 | } | ||
| 184 | return false; | ||
| 185 | } | ||
| 186 | |||
| 187 | static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) { | ||
| 188 | Subtree self_subtree = ts_node__subtree(self); | ||
| 189 | bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0; | ||
| 190 | uint32_t target_end_byte = ts_node_end_byte(self); | ||
| 191 | |||
| 192 | TSNode node = ts_node_parent(self); | ||
| 193 | TSNode earlier_node = ts_node__null(); | ||
| 194 | bool earlier_node_is_relevant = false; | ||
| 195 | |||
| 196 | while (!ts_node_is_null(node)) { | ||
| 197 | TSNode earlier_child = ts_node__null(); | ||
| 198 | bool earlier_child_is_relevant = false; | ||
| 199 | bool found_child_containing_target = false; | ||
| 200 | |||
| 201 | TSNode child; | ||
| 202 | NodeChildIterator iterator = ts_node_iterate_children(&node); | ||
| 203 | while (ts_node_child_iterator_next(&iterator, &child)) { | ||
| 204 | if (child.id == self.id) break; | ||
| 205 | if (iterator.position.bytes > target_end_byte) { | ||
| 206 | found_child_containing_target = true; | ||
| 207 | break; | ||
| 208 | } | ||
| 209 | |||
| 210 | if (iterator.position.bytes == target_end_byte && | ||
| 211 | (!self_is_empty || | ||
| 212 | ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) { | ||
| 213 | found_child_containing_target = true; | ||
| 214 | break; | ||
| 215 | } | ||
| 216 | |||
| 217 | if (ts_node__is_relevant(child, include_anonymous)) { | ||
| 218 | earlier_child = child; | ||
| 219 | earlier_child_is_relevant = true; | ||
| 220 | } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { | ||
| 221 | earlier_child = child; | ||
| 222 | earlier_child_is_relevant = false; | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | if (found_child_containing_target) { | ||
| 227 | if (!ts_node_is_null(earlier_child)) { | ||
| 228 | earlier_node = earlier_child; | ||
| 229 | earlier_node_is_relevant = earlier_child_is_relevant; | ||
| 230 | } | ||
| 231 | node = child; | ||
| 232 | } else if (earlier_child_is_relevant) { | ||
| 233 | return earlier_child; | ||
| 234 | } else if (!ts_node_is_null(earlier_child)) { | ||
| 235 | node = earlier_child; | ||
| 236 | } else if (earlier_node_is_relevant) { | ||
| 237 | return earlier_node; | ||
| 238 | } else { | ||
| 239 | node = earlier_node; | ||
| 240 | earlier_node = ts_node__null(); | ||
| 241 | earlier_node_is_relevant = false; | ||
| 242 | } | ||
| 243 | } | ||
| 244 | |||
| 245 | return ts_node__null(); | ||
| 246 | } | ||
| 247 | |||
| 248 | static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) { | ||
| 249 | uint32_t target_end_byte = ts_node_end_byte(self); | ||
| 250 | |||
| 251 | TSNode node = ts_node_parent(self); | ||
| 252 | TSNode later_node = ts_node__null(); | ||
| 253 | bool later_node_is_relevant = false; | ||
| 254 | |||
| 255 | while (!ts_node_is_null(node)) { | ||
| 256 | TSNode later_child = ts_node__null(); | ||
| 257 | bool later_child_is_relevant = false; | ||
| 258 | TSNode child_containing_target = ts_node__null(); | ||
| 259 | |||
| 260 | TSNode child; | ||
| 261 | NodeChildIterator iterator = ts_node_iterate_children(&node); | ||
| 262 | while (ts_node_child_iterator_next(&iterator, &child)) { | ||
| 263 | if (iterator.position.bytes < target_end_byte) continue; | ||
| 264 | if (ts_node_start_byte(child) <= ts_node_start_byte(self)) { | ||
| 265 | if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) { | ||
| 266 | child_containing_target = child; | ||
| 267 | } | ||
| 268 | } else if (ts_node__is_relevant(child, include_anonymous)) { | ||
| 269 | later_child = child; | ||
| 270 | later_child_is_relevant = true; | ||
| 271 | break; | ||
| 272 | } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { | ||
| 273 | later_child = child; | ||
| 274 | later_child_is_relevant = false; | ||
| 275 | break; | ||
| 276 | } | ||
| 277 | } | ||
| 278 | |||
| 279 | if (!ts_node_is_null(child_containing_target)) { | ||
| 280 | if (!ts_node_is_null(later_child)) { | ||
| 281 | later_node = later_child; | ||
| 282 | later_node_is_relevant = later_child_is_relevant; | ||
| 283 | } | ||
| 284 | node = child_containing_target; | ||
| 285 | } else if (later_child_is_relevant) { | ||
| 286 | return later_child; | ||
| 287 | } else if (!ts_node_is_null(later_child)) { | ||
| 288 | node = later_child; | ||
| 289 | } else if (later_node_is_relevant) { | ||
| 290 | return later_node; | ||
| 291 | } else { | ||
| 292 | node = later_node; | ||
| 293 | } | ||
| 294 | } | ||
| 295 | |||
| 296 | return ts_node__null(); | ||
| 297 | } | ||
| 298 | |||
| 299 | static inline TSNode ts_node__first_child_for_byte( | ||
| 300 | TSNode self, | ||
| 301 | uint32_t goal, | ||
| 302 | bool include_anonymous | ||
| 303 | ) { | ||
| 304 | TSNode node = self; | ||
| 305 | bool did_descend = true; | ||
| 306 | |||
| 307 | while (did_descend) { | ||
| 308 | did_descend = false; | ||
| 309 | |||
| 310 | TSNode child; | ||
| 311 | NodeChildIterator iterator = ts_node_iterate_children(&node); | ||
| 312 | while (ts_node_child_iterator_next(&iterator, &child)) { | ||
| 313 | if (ts_node_end_byte(child) > goal) { | ||
| 314 | if (ts_node__is_relevant(child, include_anonymous)) { | ||
| 315 | return child; | ||
| 316 | } else if (ts_node_child_count(child) > 0) { | ||
| 317 | did_descend = true; | ||
| 318 | node = child; | ||
| 319 | break; | ||
| 320 | } | ||
| 321 | } | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | return ts_node__null(); | ||
| 326 | } | ||
| 327 | |||
| 328 | static inline TSNode ts_node__descendant_for_byte_range( | ||
| 329 | TSNode self, | ||
| 330 | uint32_t range_start, | ||
| 331 | uint32_t range_end, | ||
| 332 | bool include_anonymous | ||
| 333 | ) { | ||
| 334 | TSNode node = self; | ||
| 335 | TSNode last_visible_node = self; | ||
| 336 | |||
| 337 | bool did_descend = true; | ||
| 338 | while (did_descend) { | ||
| 339 | did_descend = false; | ||
| 340 | |||
| 341 | TSNode child; | ||
| 342 | NodeChildIterator iterator = ts_node_iterate_children(&node); | ||
| 343 | while (ts_node_child_iterator_next(&iterator, &child)) { | ||
| 344 | uint32_t node_end = iterator.position.bytes; | ||
| 345 | |||
| 346 | // The end of this node must extend far enough forward to touch | ||
| 347 | // the end of the range and exceed the start of the range. | ||
| 348 | if (node_end < range_end) continue; | ||
| 349 | if (node_end <= range_start) continue; | ||
| 350 | |||
| 351 | // The start of this node must extend far enough backward to | ||
| 352 | // touch the start of the range. | ||
| 353 | if (range_start < ts_node_start_byte(child)) break; | ||
| 354 | |||
| 355 | node = child; | ||
| 356 | if (ts_node__is_relevant(node, include_anonymous)) { | ||
| 357 | last_visible_node = node; | ||
| 358 | } | ||
| 359 | did_descend = true; | ||
| 360 | break; | ||
| 361 | } | ||
| 362 | } | ||
| 363 | |||
| 364 | return last_visible_node; | ||
| 365 | } | ||
| 366 | |||
| 367 | static inline TSNode ts_node__descendant_for_point_range( | ||
| 368 | TSNode self, | ||
| 369 | TSPoint range_start, | ||
| 370 | TSPoint range_end, | ||
| 371 | bool include_anonymous | ||
| 372 | ) { | ||
| 373 | TSNode node = self; | ||
| 374 | TSNode last_visible_node = self; | ||
| 375 | |||
| 376 | bool did_descend = true; | ||
| 377 | while (did_descend) { | ||
| 378 | did_descend = false; | ||
| 379 | |||
| 380 | TSNode child; | ||
| 381 | NodeChildIterator iterator = ts_node_iterate_children(&node); | ||
| 382 | while (ts_node_child_iterator_next(&iterator, &child)) { | ||
| 383 | TSPoint node_end = iterator.position.extent; | ||
| 384 | |||
| 385 | // The end of this node must extend far enough forward to touch | ||
| 386 | // the end of the range and exceed the start of the range. | ||
| 387 | if (point_lt(node_end, range_end)) continue; | ||
| 388 | if (point_lte(node_end, range_start)) continue; | ||
| 389 | |||
| 390 | // The start of this node must extend far enough backward to | ||
| 391 | // touch the start of the range. | ||
| 392 | if (point_lt(range_start, ts_node_start_point(child))) break; | ||
| 393 | |||
| 394 | node = child; | ||
| 395 | if (ts_node__is_relevant(node, include_anonymous)) { | ||
| 396 | last_visible_node = node; | ||
| 397 | } | ||
| 398 | did_descend = true; | ||
| 399 | break; | ||
| 400 | } | ||
| 401 | } | ||
| 402 | |||
| 403 | return last_visible_node; | ||
| 404 | } | ||
| 405 | |||
| 406 | // TSNode - public | ||
| 407 | |||
| 408 | uint32_t ts_node_end_byte(TSNode self) { | ||
| 409 | return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes; | ||
| 410 | } | ||
| 411 | |||
| 412 | TSPoint ts_node_end_point(TSNode self) { | ||
| 413 | return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent); | ||
| 414 | } | ||
| 415 | |||
| 416 | TSSymbol ts_node_symbol(TSNode self) { | ||
| 417 | TSSymbol symbol = ts_node__alias(&self); | ||
| 418 | if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); | ||
| 419 | return ts_language_public_symbol(self.tree->language, symbol); | ||
| 420 | } | ||
| 421 | |||
| 422 | const char *ts_node_type(TSNode self) { | ||
| 423 | TSSymbol symbol = ts_node__alias(&self); | ||
| 424 | if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); | ||
| 425 | return ts_language_symbol_name(self.tree->language, symbol); | ||
| 426 | } | ||
| 427 | |||
| 428 | const TSLanguage *ts_node_language(TSNode self) { | ||
| 429 | return self.tree->language; | ||
| 430 | } | ||
| 431 | |||
| 432 | TSSymbol ts_node_grammar_symbol(TSNode self) { | ||
| 433 | return ts_subtree_symbol(ts_node__subtree(self)); | ||
| 434 | } | ||
| 435 | |||
| 436 | const char *ts_node_grammar_type(TSNode self) { | ||
| 437 | TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self)); | ||
| 438 | return ts_language_symbol_name(self.tree->language, symbol); | ||
| 439 | } | ||
| 440 | |||
| 441 | char *ts_node_string(TSNode self) { | ||
| 442 | return ts_subtree_string(ts_node__subtree(self), self.tree->language, false); | ||
| 443 | } | ||
| 444 | |||
| 445 | bool ts_node_eq(TSNode self, TSNode other) { | ||
| 446 | return self.tree == other.tree && self.id == other.id; | ||
| 447 | } | ||
| 448 | |||
| 449 | bool ts_node_is_null(TSNode self) { | ||
| 450 | return self.id == 0; | ||
| 451 | } | ||
| 452 | |||
| 453 | bool ts_node_is_extra(TSNode self) { | ||
| 454 | return ts_subtree_extra(ts_node__subtree(self)); | ||
| 455 | } | ||
| 456 | |||
| 457 | bool ts_node_is_named(TSNode self) { | ||
| 458 | TSSymbol alias = ts_node__alias(&self); | ||
| 459 | return alias | ||
| 460 | ? ts_language_symbol_metadata(self.tree->language, alias).named | ||
| 461 | : ts_subtree_named(ts_node__subtree(self)); | ||
| 462 | } | ||
| 463 | |||
| 464 | bool ts_node_is_missing(TSNode self) { | ||
| 465 | return ts_subtree_missing(ts_node__subtree(self)); | ||
| 466 | } | ||
| 467 | |||
| 468 | bool ts_node_has_changes(TSNode self) { | ||
| 469 | return ts_subtree_has_changes(ts_node__subtree(self)); | ||
| 470 | } | ||
| 471 | |||
| 472 | bool ts_node_has_error(TSNode self) { | ||
| 473 | return ts_subtree_error_cost(ts_node__subtree(self)) > 0; | ||
| 474 | } | ||
| 475 | |||
| 476 | bool ts_node_is_error(TSNode self) { | ||
| 477 | TSSymbol symbol = ts_node_symbol(self); | ||
| 478 | return symbol == ts_builtin_sym_error; | ||
| 479 | } | ||
| 480 | |||
| 481 | uint32_t ts_node_descendant_count(TSNode self) { | ||
| 482 | return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; | ||
| 483 | } | ||
| 484 | |||
| 485 | TSStateId ts_node_parse_state(TSNode self) { | ||
| 486 | return ts_subtree_parse_state(ts_node__subtree(self)); | ||
| 487 | } | ||
| 488 | |||
| 489 | TSStateId ts_node_next_parse_state(TSNode self) { | ||
| 490 | const TSLanguage *language = self.tree->language; | ||
| 491 | uint16_t state = ts_node_parse_state(self); | ||
| 492 | if (state == TS_TREE_STATE_NONE) { | ||
| 493 | return TS_TREE_STATE_NONE; | ||
| 494 | } | ||
| 495 | uint16_t symbol = ts_node_grammar_symbol(self); | ||
| 496 | return ts_language_next_state(language, state, symbol); | ||
| 497 | } | ||
| 498 | |||
| 499 | TSNode ts_node_parent(TSNode self) { | ||
| 500 | TSNode node = ts_tree_root_node(self.tree); | ||
| 501 | uint32_t end_byte = ts_node_end_byte(self); | ||
| 502 | if (node.id == self.id) return ts_node__null(); | ||
| 503 | |||
| 504 | TSNode last_visible_node = node; | ||
| 505 | bool did_descend = true; | ||
| 506 | while (did_descend) { | ||
| 507 | did_descend = false; | ||
| 508 | |||
| 509 | TSNode child; | ||
| 510 | NodeChildIterator iterator = ts_node_iterate_children(&node); | ||
| 511 | while (ts_node_child_iterator_next(&iterator, &child)) { | ||
| 512 | if ( | ||
| 513 | ts_node_start_byte(child) > ts_node_start_byte(self) || | ||
| 514 | child.id == self.id | ||
| 515 | ) break; | ||
| 516 | if (iterator.position.bytes >= end_byte) { | ||
| 517 | node = child; | ||
| 518 | if (ts_node__is_relevant(child, true)) { | ||
| 519 | last_visible_node = node; | ||
| 520 | } | ||
| 521 | did_descend = true; | ||
| 522 | break; | ||
| 523 | } | ||
| 524 | } | ||
| 525 | } | ||
| 526 | |||
| 527 | return last_visible_node; | ||
| 528 | } | ||
| 529 | |||
| 530 | TSNode ts_node_child(TSNode self, uint32_t child_index) { | ||
| 531 | return ts_node__child(self, child_index, true); | ||
| 532 | } | ||
| 533 | |||
| 534 | TSNode ts_node_named_child(TSNode self, uint32_t child_index) { | ||
| 535 | return ts_node__child(self, child_index, false); | ||
| 536 | } | ||
| 537 | |||
| 538 | TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { | ||
| 539 | recur: | ||
| 540 | if (!field_id || ts_node_child_count(self) == 0) return ts_node__null(); | ||
| 541 | |||
| 542 | const TSFieldMapEntry *field_map, *field_map_end; | ||
| 543 | ts_language_field_map( | ||
| 544 | self.tree->language, | ||
| 545 | ts_node__subtree(self).ptr->production_id, | ||
| 546 | &field_map, | ||
| 547 | &field_map_end | ||
| 548 | ); | ||
| 549 | if (field_map == field_map_end) return ts_node__null(); | ||
| 550 | |||
| 551 | // The field mappings are sorted by their field id. Scan all | ||
| 552 | // the mappings to find the ones for the given field id. | ||
| 553 | while (field_map->field_id < field_id) { | ||
| 554 | field_map++; | ||
| 555 | if (field_map == field_map_end) return ts_node__null(); | ||
| 556 | } | ||
| 557 | while (field_map_end[-1].field_id > field_id) { | ||
| 558 | field_map_end--; | ||
| 559 | if (field_map == field_map_end) return ts_node__null(); | ||
| 560 | } | ||
| 561 | |||
| 562 | TSNode child; | ||
| 563 | NodeChildIterator iterator = ts_node_iterate_children(&self); | ||
| 564 | while (ts_node_child_iterator_next(&iterator, &child)) { | ||
| 565 | if (!ts_subtree_extra(ts_node__subtree(child))) { | ||
| 566 | uint32_t index = iterator.structural_child_index - 1; | ||
| 567 | if (index < field_map->child_index) continue; | ||
| 568 | |||
| 569 | // Hidden nodes' fields are "inherited" by their visible parent. | ||
| 570 | if (field_map->inherited) { | ||
| 571 | |||
| 572 | // If this is the *last* possible child node for this field, | ||
| 573 | // then perform a tail call to avoid recursion. | ||
| 574 | if (field_map + 1 == field_map_end) { | ||
| 575 | self = child; | ||
| 576 | goto recur; | ||
| 577 | } | ||
| 578 | |||
| 579 | // Otherwise, descend into this child, but if it doesn't contain | ||
| 580 | // the field, continue searching subsequent children. | ||
| 581 | else { | ||
| 582 | TSNode result = ts_node_child_by_field_id(child, field_id); | ||
| 583 | if (result.id) return result; | ||
| 584 | field_map++; | ||
| 585 | if (field_map == field_map_end) return ts_node__null(); | ||
| 586 | } | ||
| 587 | } | ||
| 588 | |||
| 589 | else if (ts_node__is_relevant(child, true)) { | ||
| 590 | return child; | ||
| 591 | } | ||
| 592 | |||
| 593 | // If the field refers to a hidden node with visible children, | ||
| 594 | // return the first visible child. | ||
| 595 | else if (ts_node_child_count(child) > 0 ) { | ||
| 596 | return ts_node_child(child, 0); | ||
| 597 | } | ||
| 598 | |||
| 599 | // Otherwise, continue searching subsequent children. | ||
| 600 | else { | ||
| 601 | field_map++; | ||
| 602 | if (field_map == field_map_end) return ts_node__null(); | ||
| 603 | } | ||
| 604 | } | ||
| 605 | } | ||
| 606 | |||
| 607 | return ts_node__null(); | ||
| 608 | } | ||
| 609 | |||
| 610 | static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) { | ||
| 611 | const TSFieldMapEntry *field_map, *field_map_end; | ||
| 612 | ts_language_field_map( | ||
| 613 | self.tree->language, | ||
| 614 | ts_node__subtree(self).ptr->production_id, | ||
| 615 | &field_map, | ||
| 616 | &field_map_end | ||
| 617 | ); | ||
| 618 | for (; field_map != field_map_end; field_map++) { | ||
| 619 | if (!field_map->inherited && field_map->child_index == structural_child_index) { | ||
| 620 | return self.tree->language->field_names[field_map->field_id]; | ||
| 621 | } | ||
| 622 | } | ||
| 623 | return NULL; | ||
| 624 | } | ||
| 625 | |||
| 626 | const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { | ||
| 627 | TSNode result = self; | ||
| 628 | bool did_descend = true; | ||
| 629 | const char *inherited_field_name = NULL; | ||
| 630 | |||
| 631 | while (did_descend) { | ||
| 632 | did_descend = false; | ||
| 633 | |||
| 634 | TSNode child; | ||
| 635 | uint32_t index = 0; | ||
| 636 | NodeChildIterator iterator = ts_node_iterate_children(&result); | ||
| 637 | while (ts_node_child_iterator_next(&iterator, &child)) { | ||
| 638 | if (ts_node__is_relevant(child, true)) { | ||
| 639 | if (index == child_index) { | ||
| 640 | const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); | ||
| 641 | if (field_name) return field_name; | ||
| 642 | return inherited_field_name; | ||
| 643 | } | ||
| 644 | index++; | ||
| 645 | } else { | ||
| 646 | uint32_t grandchild_index = child_index - index; | ||
| 647 | uint32_t grandchild_count = ts_node__relevant_child_count(child, true); | ||
| 648 | if (grandchild_index < grandchild_count) { | ||
| 649 | const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); | ||
| 650 | if (field_name) inherited_field_name = field_name; | ||
| 651 | |||
| 652 | did_descend = true; | ||
| 653 | result = child; | ||
| 654 | child_index = grandchild_index; | ||
| 655 | break; | ||
| 656 | } | ||
| 657 | index += grandchild_count; | ||
| 658 | } | ||
| 659 | } | ||
| 660 | } | ||
| 661 | |||
| 662 | return NULL; | ||
| 663 | } | ||
| 664 | |||
| 665 | TSNode ts_node_child_by_field_name( | ||
| 666 | TSNode self, | ||
| 667 | const char *name, | ||
| 668 | uint32_t name_length | ||
| 669 | ) { | ||
| 670 | TSFieldId field_id = ts_language_field_id_for_name( | ||
| 671 | self.tree->language, | ||
| 672 | name, | ||
| 673 | name_length | ||
| 674 | ); | ||
| 675 | return ts_node_child_by_field_id(self, field_id); | ||
| 676 | } | ||
| 677 | |||
| 678 | uint32_t ts_node_child_count(TSNode self) { | ||
| 679 | Subtree tree = ts_node__subtree(self); | ||
| 680 | if (ts_subtree_child_count(tree) > 0) { | ||
| 681 | return tree.ptr->visible_child_count; | ||
| 682 | } else { | ||
| 683 | return 0; | ||
| 684 | } | ||
| 685 | } | ||
| 686 | |||
| 687 | uint32_t ts_node_named_child_count(TSNode self) { | ||
| 688 | Subtree tree = ts_node__subtree(self); | ||
| 689 | if (ts_subtree_child_count(tree) > 0) { | ||
| 690 | return tree.ptr->named_child_count; | ||
| 691 | } else { | ||
| 692 | return 0; | ||
| 693 | } | ||
| 694 | } | ||
| 695 | |||
| 696 | TSNode ts_node_next_sibling(TSNode self) { | ||
| 697 | return ts_node__next_sibling(self, true); | ||
| 698 | } | ||
| 699 | |||
| 700 | TSNode ts_node_next_named_sibling(TSNode self) { | ||
| 701 | return ts_node__next_sibling(self, false); | ||
| 702 | } | ||
| 703 | |||
| 704 | TSNode ts_node_prev_sibling(TSNode self) { | ||
| 705 | return ts_node__prev_sibling(self, true); | ||
| 706 | } | ||
| 707 | |||
| 708 | TSNode ts_node_prev_named_sibling(TSNode self) { | ||
| 709 | return ts_node__prev_sibling(self, false); | ||
| 710 | } | ||
| 711 | |||
| 712 | TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) { | ||
| 713 | return ts_node__first_child_for_byte(self, byte, true); | ||
| 714 | } | ||
| 715 | |||
| 716 | TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) { | ||
| 717 | return ts_node__first_child_for_byte(self, byte, false); | ||
| 718 | } | ||
| 719 | |||
| 720 | TSNode ts_node_descendant_for_byte_range( | ||
| 721 | TSNode self, | ||
| 722 | uint32_t start, | ||
| 723 | uint32_t end | ||
| 724 | ) { | ||
| 725 | return ts_node__descendant_for_byte_range(self, start, end, true); | ||
| 726 | } | ||
| 727 | |||
| 728 | TSNode ts_node_named_descendant_for_byte_range( | ||
| 729 | TSNode self, | ||
| 730 | uint32_t start, | ||
| 731 | uint32_t end | ||
| 732 | ) { | ||
| 733 | return ts_node__descendant_for_byte_range(self, start, end, false); | ||
| 734 | } | ||
| 735 | |||
| 736 | TSNode ts_node_descendant_for_point_range( | ||
| 737 | TSNode self, | ||
| 738 | TSPoint start, | ||
| 739 | TSPoint end | ||
| 740 | ) { | ||
| 741 | return ts_node__descendant_for_point_range(self, start, end, true); | ||
| 742 | } | ||
| 743 | |||
| 744 | TSNode ts_node_named_descendant_for_point_range( | ||
| 745 | TSNode self, | ||
| 746 | TSPoint start, | ||
| 747 | TSPoint end | ||
| 748 | ) { | ||
| 749 | return ts_node__descendant_for_point_range(self, start, end, false); | ||
| 750 | } | ||
| 751 | |||
| 752 | void ts_node_edit(TSNode *self, const TSInputEdit *edit) { | ||
| 753 | uint32_t start_byte = ts_node_start_byte(*self); | ||
| 754 | TSPoint start_point = ts_node_start_point(*self); | ||
| 755 | |||
| 756 | if (start_byte >= edit->old_end_byte) { | ||
| 757 | start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte); | ||
| 758 | start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point)); | ||
| 759 | } else if (start_byte > edit->start_byte) { | ||
| 760 | start_byte = edit->new_end_byte; | ||
| 761 | start_point = edit->new_end_point; | ||
| 762 | } | ||
| 763 | |||
| 764 | self->context[0] = start_byte; | ||
| 765 | self->context[1] = start_point.row; | ||
| 766 | self->context[2] = start_point.column; | ||
| 767 | } | ||
diff --git a/vendor/tree-sitter/lib/src/parser.c b/vendor/tree-sitter/lib/src/parser.c new file mode 100644 index 0000000..cc93162 --- /dev/null +++ b/vendor/tree-sitter/lib/src/parser.c | |||
| @@ -0,0 +1,2011 @@ | |||
| 1 | #include <time.h> | ||
| 2 | #include <assert.h> | ||
| 3 | #include <stdio.h> | ||
| 4 | #include <limits.h> | ||
| 5 | #include <stdbool.h> | ||
| 6 | #include "tree_sitter/api.h" | ||
| 7 | #include "./alloc.h" | ||
| 8 | #include "./array.h" | ||
| 9 | #include "./atomic.h" | ||
| 10 | #include "./clock.h" | ||
| 11 | #include "./error_costs.h" | ||
| 12 | #include "./get_changed_ranges.h" | ||
| 13 | #include "./language.h" | ||
| 14 | #include "./length.h" | ||
| 15 | #include "./lexer.h" | ||
| 16 | #include "./reduce_action.h" | ||
| 17 | #include "./reusable_node.h" | ||
| 18 | #include "./stack.h" | ||
| 19 | #include "./subtree.h" | ||
| 20 | #include "./tree.h" | ||
| 21 | |||
| 22 | #define LOG(...) \ | ||
| 23 | if (self->lexer.logger.log || self->dot_graph_file) { \ | ||
| 24 | snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ | ||
| 25 | ts_parser__log(self); \ | ||
| 26 | } | ||
| 27 | |||
| 28 | #define LOG_LOOKAHEAD(symbol_name, size) \ | ||
| 29 | if (self->lexer.logger.log || self->dot_graph_file) { \ | ||
| 30 | char *buf = self->lexer.debug_buffer; \ | ||
| 31 | const char *symbol = symbol_name; \ | ||
| 32 | int off = sprintf(buf, "lexed_lookahead sym:"); \ | ||
| 33 | for ( \ | ||
| 34 | int i = 0; \ | ||
| 35 | symbol[i] != '\0' \ | ||
| 36 | && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \ | ||
| 37 | i++ \ | ||
| 38 | ) { \ | ||
| 39 | switch (symbol[i]) { \ | ||
| 40 | case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \ | ||
| 41 | case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \ | ||
| 42 | case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \ | ||
| 43 | case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \ | ||
| 44 | case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \ | ||
| 45 | case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \ | ||
| 46 | default: buf[off++] = symbol[i]; break; \ | ||
| 47 | } \ | ||
| 48 | } \ | ||
| 49 | snprintf( \ | ||
| 50 | buf + off, \ | ||
| 51 | TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \ | ||
| 52 | ", size:%u", \ | ||
| 53 | size \ | ||
| 54 | ); \ | ||
| 55 | ts_parser__log(self); \ | ||
| 56 | } | ||
| 57 | |||
| 58 | #define LOG_STACK() \ | ||
| 59 | if (self->dot_graph_file) { \ | ||
| 60 | ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \ | ||
| 61 | fputs("\n\n", self->dot_graph_file); \ | ||
| 62 | } | ||
| 63 | |||
| 64 | #define LOG_TREE(tree) \ | ||
| 65 | if (self->dot_graph_file) { \ | ||
| 66 | ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \ | ||
| 67 | fputs("\n", self->dot_graph_file); \ | ||
| 68 | } | ||
| 69 | |||
| 70 | #define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) | ||
| 71 | |||
| 72 | #define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree)) | ||
| 73 | |||
| 74 | static const unsigned MAX_VERSION_COUNT = 6; | ||
| 75 | static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; | ||
| 76 | static const unsigned MAX_SUMMARY_DEPTH = 16; | ||
| 77 | static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; | ||
| 78 | static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; | ||
| 79 | |||
| 80 | typedef struct { | ||
| 81 | Subtree token; | ||
| 82 | Subtree last_external_token; | ||
| 83 | uint32_t byte_index; | ||
| 84 | } TokenCache; | ||
| 85 | |||
| 86 | struct TSParser { | ||
| 87 | Lexer lexer; | ||
| 88 | Stack *stack; | ||
| 89 | SubtreePool tree_pool; | ||
| 90 | const TSLanguage *language; | ||
| 91 | ReduceActionSet reduce_actions; | ||
| 92 | Subtree finished_tree; | ||
| 93 | SubtreeArray trailing_extras; | ||
| 94 | SubtreeArray trailing_extras2; | ||
| 95 | SubtreeArray scratch_trees; | ||
| 96 | TokenCache token_cache; | ||
| 97 | ReusableNode reusable_node; | ||
| 98 | void *external_scanner_payload; | ||
| 99 | FILE *dot_graph_file; | ||
| 100 | TSClock end_clock; | ||
| 101 | TSDuration timeout_duration; | ||
| 102 | unsigned accept_count; | ||
| 103 | unsigned operation_count; | ||
| 104 | const volatile size_t *cancellation_flag; | ||
| 105 | Subtree old_tree; | ||
| 106 | TSRangeArray included_range_differences; | ||
| 107 | unsigned included_range_difference_index; | ||
| 108 | }; | ||
| 109 | |||
| 110 | typedef struct { | ||
| 111 | unsigned cost; | ||
| 112 | unsigned node_count; | ||
| 113 | int dynamic_precedence; | ||
| 114 | bool is_in_error; | ||
| 115 | } ErrorStatus; | ||
| 116 | |||
| 117 | typedef enum { | ||
| 118 | ErrorComparisonTakeLeft, | ||
| 119 | ErrorComparisonPreferLeft, | ||
| 120 | ErrorComparisonNone, | ||
| 121 | ErrorComparisonPreferRight, | ||
| 122 | ErrorComparisonTakeRight, | ||
| 123 | } ErrorComparison; | ||
| 124 | |||
| 125 | typedef struct { | ||
| 126 | const char *string; | ||
| 127 | uint32_t length; | ||
| 128 | } TSStringInput; | ||
| 129 | |||
| 130 | // StringInput | ||
| 131 | |||
| 132 | static const char *ts_string_input_read( | ||
| 133 | void *_self, | ||
| 134 | uint32_t byte, | ||
| 135 | TSPoint point, | ||
| 136 | uint32_t *length | ||
| 137 | ) { | ||
| 138 | (void)point; | ||
| 139 | TSStringInput *self = (TSStringInput *)_self; | ||
| 140 | if (byte >= self->length) { | ||
| 141 | *length = 0; | ||
| 142 | return ""; | ||
| 143 | } else { | ||
| 144 | *length = self->length - byte; | ||
| 145 | return self->string + byte; | ||
| 146 | } | ||
| 147 | } | ||
| 148 | |||
| 149 | // Parser - Private | ||
| 150 | |||
| 151 | static void ts_parser__log(TSParser *self) { | ||
| 152 | if (self->lexer.logger.log) { | ||
| 153 | self->lexer.logger.log( | ||
| 154 | self->lexer.logger.payload, | ||
| 155 | TSLogTypeParse, | ||
| 156 | self->lexer.debug_buffer | ||
| 157 | ); | ||
| 158 | } | ||
| 159 | |||
| 160 | if (self->dot_graph_file) { | ||
| 161 | fprintf(self->dot_graph_file, "graph {\nlabel=\""); | ||
| 162 | for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) { | ||
| 163 | if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file); | ||
| 164 | fputc(*chr, self->dot_graph_file); | ||
| 165 | } | ||
| 166 | fprintf(self->dot_graph_file, "\"\n}\n\n"); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | static bool ts_parser__breakdown_top_of_stack( | ||
| 171 | TSParser *self, | ||
| 172 | StackVersion version | ||
| 173 | ) { | ||
| 174 | bool did_break_down = false; | ||
| 175 | bool pending = false; | ||
| 176 | |||
| 177 | do { | ||
| 178 | StackSliceArray pop = ts_stack_pop_pending(self->stack, version); | ||
| 179 | if (!pop.size) break; | ||
| 180 | |||
| 181 | did_break_down = true; | ||
| 182 | pending = false; | ||
| 183 | for (uint32_t i = 0; i < pop.size; i++) { | ||
| 184 | StackSlice slice = pop.contents[i]; | ||
| 185 | TSStateId state = ts_stack_state(self->stack, slice.version); | ||
| 186 | Subtree parent = *array_front(&slice.subtrees); | ||
| 187 | |||
| 188 | for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) { | ||
| 189 | Subtree child = ts_subtree_children(parent)[j]; | ||
| 190 | pending = ts_subtree_child_count(child) > 0; | ||
| 191 | |||
| 192 | if (ts_subtree_is_error(child)) { | ||
| 193 | state = ERROR_STATE; | ||
| 194 | } else if (!ts_subtree_extra(child)) { | ||
| 195 | state = ts_language_next_state(self->language, state, ts_subtree_symbol(child)); | ||
| 196 | } | ||
| 197 | |||
| 198 | ts_subtree_retain(child); | ||
| 199 | ts_stack_push(self->stack, slice.version, child, pending, state); | ||
| 200 | } | ||
| 201 | |||
| 202 | for (uint32_t j = 1; j < slice.subtrees.size; j++) { | ||
| 203 | Subtree tree = slice.subtrees.contents[j]; | ||
| 204 | ts_stack_push(self->stack, slice.version, tree, false, state); | ||
| 205 | } | ||
| 206 | |||
| 207 | ts_subtree_release(&self->tree_pool, parent); | ||
| 208 | array_delete(&slice.subtrees); | ||
| 209 | |||
| 210 | LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent)); | ||
| 211 | LOG_STACK(); | ||
| 212 | } | ||
| 213 | } while (pending); | ||
| 214 | |||
| 215 | return did_break_down; | ||
| 216 | } | ||
| 217 | |||
| 218 | static void ts_parser__breakdown_lookahead( | ||
| 219 | TSParser *self, | ||
| 220 | Subtree *lookahead, | ||
| 221 | TSStateId state, | ||
| 222 | ReusableNode *reusable_node | ||
| 223 | ) { | ||
| 224 | bool did_descend = false; | ||
| 225 | Subtree tree = reusable_node_tree(reusable_node); | ||
| 226 | while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) { | ||
| 227 | LOG("state_mismatch sym:%s", TREE_NAME(tree)); | ||
| 228 | reusable_node_descend(reusable_node); | ||
| 229 | tree = reusable_node_tree(reusable_node); | ||
| 230 | did_descend = true; | ||
| 231 | } | ||
| 232 | |||
| 233 | if (did_descend) { | ||
| 234 | ts_subtree_release(&self->tree_pool, *lookahead); | ||
| 235 | *lookahead = tree; | ||
| 236 | ts_subtree_retain(*lookahead); | ||
| 237 | } | ||
| 238 | } | ||
| 239 | |||
| 240 | static ErrorComparison ts_parser__compare_versions( | ||
| 241 | TSParser *self, | ||
| 242 | ErrorStatus a, | ||
| 243 | ErrorStatus b | ||
| 244 | ) { | ||
| 245 | (void)self; | ||
| 246 | if (!a.is_in_error && b.is_in_error) { | ||
| 247 | if (a.cost < b.cost) { | ||
| 248 | return ErrorComparisonTakeLeft; | ||
| 249 | } else { | ||
| 250 | return ErrorComparisonPreferLeft; | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | if (a.is_in_error && !b.is_in_error) { | ||
| 255 | if (b.cost < a.cost) { | ||
| 256 | return ErrorComparisonTakeRight; | ||
| 257 | } else { | ||
| 258 | return ErrorComparisonPreferRight; | ||
| 259 | } | ||
| 260 | } | ||
| 261 | |||
| 262 | if (a.cost < b.cost) { | ||
| 263 | if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) { | ||
| 264 | return ErrorComparisonTakeLeft; | ||
| 265 | } else { | ||
| 266 | return ErrorComparisonPreferLeft; | ||
| 267 | } | ||
| 268 | } | ||
| 269 | |||
| 270 | if (b.cost < a.cost) { | ||
| 271 | if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) { | ||
| 272 | return ErrorComparisonTakeRight; | ||
| 273 | } else { | ||
| 274 | return ErrorComparisonPreferRight; | ||
| 275 | } | ||
| 276 | } | ||
| 277 | |||
| 278 | if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft; | ||
| 279 | if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight; | ||
| 280 | return ErrorComparisonNone; | ||
| 281 | } | ||
| 282 | |||
| 283 | static ErrorStatus ts_parser__version_status( | ||
| 284 | TSParser *self, | ||
| 285 | StackVersion version | ||
| 286 | ) { | ||
| 287 | unsigned cost = ts_stack_error_cost(self->stack, version); | ||
| 288 | bool is_paused = ts_stack_is_paused(self->stack, version); | ||
| 289 | if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; | ||
| 290 | return (ErrorStatus) { | ||
| 291 | .cost = cost, | ||
| 292 | .node_count = ts_stack_node_count_since_error(self->stack, version), | ||
| 293 | .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), | ||
| 294 | .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE | ||
| 295 | }; | ||
| 296 | } | ||
| 297 | |||
| 298 | static bool ts_parser__better_version_exists( | ||
| 299 | TSParser *self, | ||
| 300 | StackVersion version, | ||
| 301 | bool is_in_error, | ||
| 302 | unsigned cost | ||
| 303 | ) { | ||
| 304 | if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) { | ||
| 305 | return true; | ||
| 306 | } | ||
| 307 | |||
| 308 | Length position = ts_stack_position(self->stack, version); | ||
| 309 | ErrorStatus status = { | ||
| 310 | .cost = cost, | ||
| 311 | .is_in_error = is_in_error, | ||
| 312 | .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), | ||
| 313 | .node_count = ts_stack_node_count_since_error(self->stack, version), | ||
| 314 | }; | ||
| 315 | |||
| 316 | for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { | ||
| 317 | if (i == version || | ||
| 318 | !ts_stack_is_active(self->stack, i) || | ||
| 319 | ts_stack_position(self->stack, i).bytes < position.bytes) continue; | ||
| 320 | ErrorStatus status_i = ts_parser__version_status(self, i); | ||
| 321 | switch (ts_parser__compare_versions(self, status, status_i)) { | ||
| 322 | case ErrorComparisonTakeRight: | ||
| 323 | return true; | ||
| 324 | case ErrorComparisonPreferRight: | ||
| 325 | if (ts_stack_can_merge(self->stack, i, version)) return true; | ||
| 326 | break; | ||
| 327 | default: | ||
| 328 | break; | ||
| 329 | } | ||
| 330 | } | ||
| 331 | |||
| 332 | return false; | ||
| 333 | } | ||
| 334 | |||
| 335 | static void ts_parser__restore_external_scanner( | ||
| 336 | TSParser *self, | ||
| 337 | Subtree external_token | ||
| 338 | ) { | ||
| 339 | if (external_token.ptr) { | ||
| 340 | self->language->external_scanner.deserialize( | ||
| 341 | self->external_scanner_payload, | ||
| 342 | ts_external_scanner_state_data(&external_token.ptr->external_scanner_state), | ||
| 343 | external_token.ptr->external_scanner_state.length | ||
| 344 | ); | ||
| 345 | } else { | ||
| 346 | self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0); | ||
| 347 | } | ||
| 348 | } | ||
| 349 | |||
| 350 | static bool ts_parser__can_reuse_first_leaf( | ||
| 351 | TSParser *self, | ||
| 352 | TSStateId state, | ||
| 353 | Subtree tree, | ||
| 354 | TableEntry *table_entry | ||
| 355 | ) { | ||
| 356 | TSLexMode current_lex_mode = self->language->lex_modes[state]; | ||
| 357 | TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree); | ||
| 358 | TSStateId leaf_state = ts_subtree_leaf_parse_state(tree); | ||
| 359 | TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state]; | ||
| 360 | |||
| 361 | // At the end of a non-terminal extra node, the lexer normally returns | ||
| 362 | // NULL, which indicates that the parser should look for a reduce action | ||
| 363 | // at symbol `0`. Avoid reusing tokens in this situation to ensure that | ||
| 364 | // the same thing happens when incrementally reparsing. | ||
| 365 | if (current_lex_mode.lex_state == (uint16_t)(-1)) return false; | ||
| 366 | |||
| 367 | // If the token was created in a state with the same set of lookaheads, it is reusable. | ||
| 368 | if ( | ||
| 369 | table_entry->action_count > 0 && | ||
| 370 | memcmp(&leaf_lex_mode, ¤t_lex_mode, sizeof(TSLexMode)) == 0 && | ||
| 371 | ( | ||
| 372 | leaf_symbol != self->language->keyword_capture_token || | ||
| 373 | (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state) | ||
| 374 | ) | ||
| 375 | ) return true; | ||
| 376 | |||
| 377 | // Empty tokens are not reusable in states with different lookaheads. | ||
| 378 | if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false; | ||
| 379 | |||
| 380 | // If the current state allows external tokens or other tokens that conflict with this | ||
| 381 | // token, this token is not reusable. | ||
| 382 | return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable; | ||
| 383 | } | ||
| 384 | |||
| 385 | static Subtree ts_parser__lex( | ||
| 386 | TSParser *self, | ||
| 387 | StackVersion version, | ||
| 388 | TSStateId parse_state | ||
| 389 | ) { | ||
| 390 | TSLexMode lex_mode = self->language->lex_modes[parse_state]; | ||
| 391 | if (lex_mode.lex_state == (uint16_t)-1) { | ||
| 392 | LOG("no_lookahead_after_non_terminal_extra"); | ||
| 393 | return NULL_SUBTREE; | ||
| 394 | } | ||
| 395 | |||
| 396 | const Length start_position = ts_stack_position(self->stack, version); | ||
| 397 | const Subtree external_token = ts_stack_last_external_token(self->stack, version); | ||
| 398 | const bool *valid_external_tokens = ts_language_enabled_external_tokens( | ||
| 399 | self->language, | ||
| 400 | lex_mode.external_lex_state | ||
| 401 | ); | ||
| 402 | |||
| 403 | bool found_external_token = false; | ||
| 404 | bool error_mode = parse_state == ERROR_STATE; | ||
| 405 | bool skipped_error = false; | ||
| 406 | bool called_get_column = false; | ||
| 407 | int32_t first_error_character = 0; | ||
| 408 | Length error_start_position = length_zero(); | ||
| 409 | Length error_end_position = length_zero(); | ||
| 410 | uint32_t lookahead_end_byte = 0; | ||
| 411 | uint32_t external_scanner_state_len = 0; | ||
| 412 | bool external_scanner_state_changed = false; | ||
| 413 | ts_lexer_reset(&self->lexer, start_position); | ||
| 414 | |||
| 415 | for (;;) { | ||
| 416 | Length current_position = self->lexer.current_position; | ||
| 417 | |||
| 418 | if (valid_external_tokens) { | ||
| 419 | LOG( | ||
| 420 | "lex_external state:%d, row:%u, column:%u", | ||
| 421 | lex_mode.external_lex_state, | ||
| 422 | current_position.extent.row, | ||
| 423 | current_position.extent.column | ||
| 424 | ); | ||
| 425 | ts_lexer_start(&self->lexer); | ||
| 426 | ts_parser__restore_external_scanner(self, external_token); | ||
| 427 | bool found_token = self->language->external_scanner.scan( | ||
| 428 | self->external_scanner_payload, | ||
| 429 | &self->lexer.data, | ||
| 430 | valid_external_tokens | ||
| 431 | ); | ||
| 432 | ts_lexer_finish(&self->lexer, &lookahead_end_byte); | ||
| 433 | |||
| 434 | if (found_token) { | ||
| 435 | external_scanner_state_len = self->language->external_scanner.serialize( | ||
| 436 | self->external_scanner_payload, | ||
| 437 | self->lexer.debug_buffer | ||
| 438 | ); | ||
| 439 | external_scanner_state_changed = !ts_external_scanner_state_eq( | ||
| 440 | ts_subtree_external_scanner_state(external_token), | ||
| 441 | self->lexer.debug_buffer, | ||
| 442 | external_scanner_state_len | ||
| 443 | ); | ||
| 444 | |||
| 445 | // When recovering from an error, ignore any zero-length external tokens | ||
| 446 | // unless they have changed the external scanner's state. This helps to | ||
| 447 | // avoid infinite loops which could otherwise occur, because the lexer is | ||
| 448 | // looking for any possible token, instead of looking for the specific set of | ||
| 449 | // tokens that are valid in some parse state. | ||
| 450 | // | ||
| 451 | // Note that it's possible that the token end position may be *before* the | ||
| 452 | // original position of the lexer because of the way that tokens are positioned | ||
| 453 | // at included range boundaries: when a token is terminated at the start of | ||
| 454 | // an included range, it is marked as ending at the *end* of the preceding | ||
| 455 | // included range. | ||
| 456 | if ( | ||
| 457 | self->lexer.token_end_position.bytes <= current_position.bytes && | ||
| 458 | (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) && | ||
| 459 | !external_scanner_state_changed | ||
| 460 | ) { | ||
| 461 | LOG( | ||
| 462 | "ignore_empty_external_token symbol:%s", | ||
| 463 | SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol]) | ||
| 464 | ) | ||
| 465 | found_token = false; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | |||
| 469 | if (found_token) { | ||
| 470 | found_external_token = true; | ||
| 471 | called_get_column = self->lexer.did_get_column; | ||
| 472 | break; | ||
| 473 | } | ||
| 474 | |||
| 475 | ts_lexer_reset(&self->lexer, current_position); | ||
| 476 | } | ||
| 477 | |||
| 478 | LOG( | ||
| 479 | "lex_internal state:%d, row:%u, column:%u", | ||
| 480 | lex_mode.lex_state, | ||
| 481 | current_position.extent.row, | ||
| 482 | current_position.extent.column | ||
| 483 | ); | ||
| 484 | ts_lexer_start(&self->lexer); | ||
| 485 | bool found_token = self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); | ||
| 486 | ts_lexer_finish(&self->lexer, &lookahead_end_byte); | ||
| 487 | if (found_token) break; | ||
| 488 | |||
| 489 | if (!error_mode) { | ||
| 490 | error_mode = true; | ||
| 491 | lex_mode = self->language->lex_modes[ERROR_STATE]; | ||
| 492 | valid_external_tokens = ts_language_enabled_external_tokens( | ||
| 493 | self->language, | ||
| 494 | lex_mode.external_lex_state | ||
| 495 | ); | ||
| 496 | ts_lexer_reset(&self->lexer, start_position); | ||
| 497 | continue; | ||
| 498 | } | ||
| 499 | |||
| 500 | if (!skipped_error) { | ||
| 501 | LOG("skip_unrecognized_character"); | ||
| 502 | skipped_error = true; | ||
| 503 | error_start_position = self->lexer.token_start_position; | ||
| 504 | error_end_position = self->lexer.token_start_position; | ||
| 505 | first_error_character = self->lexer.data.lookahead; | ||
| 506 | } | ||
| 507 | |||
| 508 | if (self->lexer.current_position.bytes == error_end_position.bytes) { | ||
| 509 | if (self->lexer.data.eof(&self->lexer.data)) { | ||
| 510 | self->lexer.data.result_symbol = ts_builtin_sym_error; | ||
| 511 | break; | ||
| 512 | } | ||
| 513 | self->lexer.data.advance(&self->lexer.data, false); | ||
| 514 | } | ||
| 515 | |||
| 516 | error_end_position = self->lexer.current_position; | ||
| 517 | } | ||
| 518 | |||
| 519 | Subtree result; | ||
| 520 | if (skipped_error) { | ||
| 521 | Length padding = length_sub(error_start_position, start_position); | ||
| 522 | Length size = length_sub(error_end_position, error_start_position); | ||
| 523 | uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes; | ||
| 524 | result = ts_subtree_new_error( | ||
| 525 | &self->tree_pool, | ||
| 526 | first_error_character, | ||
| 527 | padding, | ||
| 528 | size, | ||
| 529 | lookahead_bytes, | ||
| 530 | parse_state, | ||
| 531 | self->language | ||
| 532 | ); | ||
| 533 | } else { | ||
| 534 | bool is_keyword = false; | ||
| 535 | TSSymbol symbol = self->lexer.data.result_symbol; | ||
| 536 | Length padding = length_sub(self->lexer.token_start_position, start_position); | ||
| 537 | Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); | ||
| 538 | uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes; | ||
| 539 | |||
| 540 | if (found_external_token) { | ||
| 541 | symbol = self->language->external_scanner.symbol_map[symbol]; | ||
| 542 | } else if (symbol == self->language->keyword_capture_token && symbol != 0) { | ||
| 543 | uint32_t end_byte = self->lexer.token_end_position.bytes; | ||
| 544 | ts_lexer_reset(&self->lexer, self->lexer.token_start_position); | ||
| 545 | ts_lexer_start(&self->lexer); | ||
| 546 | if ( | ||
| 547 | self->language->keyword_lex_fn(&self->lexer.data, 0) && | ||
| 548 | self->lexer.token_end_position.bytes == end_byte && | ||
| 549 | ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol) | ||
| 550 | ) { | ||
| 551 | is_keyword = true; | ||
| 552 | symbol = self->lexer.data.result_symbol; | ||
| 553 | } | ||
| 554 | } | ||
| 555 | |||
| 556 | result = ts_subtree_new_leaf( | ||
| 557 | &self->tree_pool, | ||
| 558 | symbol, | ||
| 559 | padding, | ||
| 560 | size, | ||
| 561 | lookahead_bytes, | ||
| 562 | parse_state, | ||
| 563 | found_external_token, | ||
| 564 | called_get_column, | ||
| 565 | is_keyword, | ||
| 566 | self->language | ||
| 567 | ); | ||
| 568 | |||
| 569 | if (found_external_token) { | ||
| 570 | MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result); | ||
| 571 | ts_external_scanner_state_init( | ||
| 572 | &mut_result.ptr->external_scanner_state, | ||
| 573 | self->lexer.debug_buffer, | ||
| 574 | external_scanner_state_len | ||
| 575 | ); | ||
| 576 | mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed; | ||
| 577 | } | ||
| 578 | } | ||
| 579 | |||
| 580 | LOG_LOOKAHEAD( | ||
| 581 | SYM_NAME(ts_subtree_symbol(result)), | ||
| 582 | ts_subtree_total_size(result).bytes | ||
| 583 | ); | ||
| 584 | return result; | ||
| 585 | } | ||
| 586 | |||
| 587 | static Subtree ts_parser__get_cached_token( | ||
| 588 | TSParser *self, | ||
| 589 | TSStateId state, | ||
| 590 | size_t position, | ||
| 591 | Subtree last_external_token, | ||
| 592 | TableEntry *table_entry | ||
| 593 | ) { | ||
| 594 | TokenCache *cache = &self->token_cache; | ||
| 595 | if ( | ||
| 596 | cache->token.ptr && cache->byte_index == position && | ||
| 597 | ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token) | ||
| 598 | ) { | ||
| 599 | ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry); | ||
| 600 | if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) { | ||
| 601 | ts_subtree_retain(cache->token); | ||
| 602 | return cache->token; | ||
| 603 | } | ||
| 604 | } | ||
| 605 | return NULL_SUBTREE; | ||
| 606 | } | ||
| 607 | |||
| 608 | static void ts_parser__set_cached_token( | ||
| 609 | TSParser *self, | ||
| 610 | uint32_t byte_index, | ||
| 611 | Subtree last_external_token, | ||
| 612 | Subtree token | ||
| 613 | ) { | ||
| 614 | TokenCache *cache = &self->token_cache; | ||
| 615 | if (token.ptr) ts_subtree_retain(token); | ||
| 616 | if (last_external_token.ptr) ts_subtree_retain(last_external_token); | ||
| 617 | if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token); | ||
| 618 | if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token); | ||
| 619 | cache->token = token; | ||
| 620 | cache->byte_index = byte_index; | ||
| 621 | cache->last_external_token = last_external_token; | ||
| 622 | } | ||
| 623 | |||
| 624 | static bool ts_parser__has_included_range_difference( | ||
| 625 | const TSParser *self, | ||
| 626 | uint32_t start_position, | ||
| 627 | uint32_t end_position | ||
| 628 | ) { | ||
| 629 | return ts_range_array_intersects( | ||
| 630 | &self->included_range_differences, | ||
| 631 | self->included_range_difference_index, | ||
| 632 | start_position, | ||
| 633 | end_position | ||
| 634 | ); | ||
| 635 | } | ||
| 636 | |||
| 637 | static Subtree ts_parser__reuse_node( | ||
| 638 | TSParser *self, | ||
| 639 | StackVersion version, | ||
| 640 | TSStateId *state, | ||
| 641 | uint32_t position, | ||
| 642 | Subtree last_external_token, | ||
| 643 | TableEntry *table_entry | ||
| 644 | ) { | ||
| 645 | Subtree result; | ||
| 646 | while ((result = reusable_node_tree(&self->reusable_node)).ptr) { | ||
| 647 | uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node); | ||
| 648 | uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result); | ||
| 649 | |||
| 650 | // Do not reuse an EOF node if the included ranges array has changes | ||
| 651 | // later on in the file. | ||
| 652 | if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX; | ||
| 653 | |||
| 654 | if (byte_offset > position) { | ||
| 655 | LOG("before_reusable_node symbol:%s", TREE_NAME(result)); | ||
| 656 | break; | ||
| 657 | } | ||
| 658 | |||
| 659 | if (byte_offset < position) { | ||
| 660 | LOG("past_reusable_node symbol:%s", TREE_NAME(result)); | ||
| 661 | if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) { | ||
| 662 | reusable_node_advance(&self->reusable_node); | ||
| 663 | } | ||
| 664 | continue; | ||
| 665 | } | ||
| 666 | |||
| 667 | if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) { | ||
| 668 | LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result)); | ||
| 669 | reusable_node_advance(&self->reusable_node); | ||
| 670 | continue; | ||
| 671 | } | ||
| 672 | |||
| 673 | const char *reason = NULL; | ||
| 674 | if (ts_subtree_has_changes(result)) { | ||
| 675 | reason = "has_changes"; | ||
| 676 | } else if (ts_subtree_is_error(result)) { | ||
| 677 | reason = "is_error"; | ||
| 678 | } else if (ts_subtree_missing(result)) { | ||
| 679 | reason = "is_missing"; | ||
| 680 | } else if (ts_subtree_is_fragile(result)) { | ||
| 681 | reason = "is_fragile"; | ||
| 682 | } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) { | ||
| 683 | reason = "contains_different_included_range"; | ||
| 684 | } | ||
| 685 | |||
| 686 | if (reason) { | ||
| 687 | LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result)); | ||
| 688 | if (!reusable_node_descend(&self->reusable_node)) { | ||
| 689 | reusable_node_advance(&self->reusable_node); | ||
| 690 | ts_parser__breakdown_top_of_stack(self, version); | ||
| 691 | *state = ts_stack_state(self->stack, version); | ||
| 692 | } | ||
| 693 | continue; | ||
| 694 | } | ||
| 695 | |||
| 696 | TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result); | ||
| 697 | ts_language_table_entry(self->language, *state, leaf_symbol, table_entry); | ||
| 698 | if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) { | ||
| 699 | LOG( | ||
| 700 | "cant_reuse_node symbol:%s, first_leaf_symbol:%s", | ||
| 701 | TREE_NAME(result), | ||
| 702 | SYM_NAME(leaf_symbol) | ||
| 703 | ); | ||
| 704 | reusable_node_advance_past_leaf(&self->reusable_node); | ||
| 705 | break; | ||
| 706 | } | ||
| 707 | |||
| 708 | LOG("reuse_node symbol:%s", TREE_NAME(result)); | ||
| 709 | ts_subtree_retain(result); | ||
| 710 | return result; | ||
| 711 | } | ||
| 712 | |||
| 713 | return NULL_SUBTREE; | ||
| 714 | } | ||
| 715 | |||
| 716 | // Determine if a given tree should be replaced by an alternative tree. | ||
| 717 | // | ||
| 718 | // The decision is based on the trees' error costs (if any), their dynamic precedence, | ||
| 719 | // and finally, as a default, by a recursive comparison of the trees' symbols. | ||
| 720 | static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) { | ||
| 721 | if (!left.ptr) return true; | ||
| 722 | if (!right.ptr) return false; | ||
| 723 | |||
| 724 | if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) { | ||
| 725 | LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); | ||
| 726 | return true; | ||
| 727 | } | ||
| 728 | |||
| 729 | if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) { | ||
| 730 | LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); | ||
| 731 | return false; | ||
| 732 | } | ||
| 733 | |||
| 734 | if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) { | ||
| 735 | LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u", | ||
| 736 | TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left), | ||
| 737 | ts_subtree_dynamic_precedence(left)); | ||
| 738 | return true; | ||
| 739 | } | ||
| 740 | |||
| 741 | if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) { | ||
| 742 | LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u", | ||
| 743 | TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right), | ||
| 744 | ts_subtree_dynamic_precedence(right)); | ||
| 745 | return false; | ||
| 746 | } | ||
| 747 | |||
| 748 | if (ts_subtree_error_cost(left) > 0) return true; | ||
| 749 | |||
| 750 | int comparison = ts_subtree_compare(left, right); | ||
| 751 | switch (comparison) { | ||
| 752 | case -1: | ||
| 753 | LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); | ||
| 754 | return false; | ||
| 755 | break; | ||
| 756 | case 1: | ||
| 757 | LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); | ||
| 758 | return true; | ||
| 759 | default: | ||
| 760 | LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); | ||
| 761 | return false; | ||
| 762 | } | ||
| 763 | } | ||
| 764 | |||
| 765 | // Determine if a given tree's children should be replaced by an alternative | ||
| 766 | // array of children. | ||
| 767 | static bool ts_parser__select_children( | ||
| 768 | TSParser *self, | ||
| 769 | Subtree left, | ||
| 770 | const SubtreeArray *children | ||
| 771 | ) { | ||
| 772 | array_assign(&self->scratch_trees, children); | ||
| 773 | |||
| 774 | // Create a temporary subtree using the scratch trees array. This node does | ||
| 775 | // not perform any allocation except for possibly growing the array to make | ||
| 776 | // room for its own heap data. The scratch tree is never explicitly released, | ||
| 777 | // so the same 'scratch trees' array can be reused again later. | ||
| 778 | MutableSubtree scratch_tree = ts_subtree_new_node( | ||
| 779 | ts_subtree_symbol(left), | ||
| 780 | &self->scratch_trees, | ||
| 781 | 0, | ||
| 782 | self->language | ||
| 783 | ); | ||
| 784 | |||
| 785 | return ts_parser__select_tree( | ||
| 786 | self, | ||
| 787 | left, | ||
| 788 | ts_subtree_from_mut(scratch_tree) | ||
| 789 | ); | ||
| 790 | } | ||
| 791 | |||
| 792 | static void ts_parser__shift( | ||
| 793 | TSParser *self, | ||
| 794 | StackVersion version, | ||
| 795 | TSStateId state, | ||
| 796 | Subtree lookahead, | ||
| 797 | bool extra | ||
| 798 | ) { | ||
| 799 | bool is_leaf = ts_subtree_child_count(lookahead) == 0; | ||
| 800 | Subtree subtree_to_push = lookahead; | ||
| 801 | if (extra != ts_subtree_extra(lookahead) && is_leaf) { | ||
| 802 | MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead); | ||
| 803 | ts_subtree_set_extra(&result, extra); | ||
| 804 | subtree_to_push = ts_subtree_from_mut(result); | ||
| 805 | } | ||
| 806 | |||
| 807 | ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); | ||
| 808 | if (ts_subtree_has_external_tokens(subtree_to_push)) { | ||
| 809 | ts_stack_set_last_external_token( | ||
| 810 | self->stack, version, ts_subtree_last_external_token(subtree_to_push) | ||
| 811 | ); | ||
| 812 | } | ||
| 813 | } | ||
| 814 | |||
| 815 | static StackVersion ts_parser__reduce( | ||
| 816 | TSParser *self, | ||
| 817 | StackVersion version, | ||
| 818 | TSSymbol symbol, | ||
| 819 | uint32_t count, | ||
| 820 | int dynamic_precedence, | ||
| 821 | uint16_t production_id, | ||
| 822 | bool is_fragile, | ||
| 823 | bool end_of_non_terminal_extra | ||
| 824 | ) { | ||
| 825 | uint32_t initial_version_count = ts_stack_version_count(self->stack); | ||
| 826 | |||
| 827 | // Pop the given number of nodes from the given version of the parse stack. | ||
| 828 | // If stack versions have previously merged, then there may be more than one | ||
| 829 | // path back through the stack. For each path, create a new parent node to | ||
| 830 | // contain the popped children, and push it onto the stack in place of the | ||
| 831 | // children. | ||
| 832 | StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); | ||
| 833 | uint32_t removed_version_count = 0; | ||
| 834 | for (uint32_t i = 0; i < pop.size; i++) { | ||
| 835 | StackSlice slice = pop.contents[i]; | ||
| 836 | StackVersion slice_version = slice.version - removed_version_count; | ||
| 837 | |||
| 838 | // This is where new versions are added to the parse stack. The versions | ||
| 839 | // will all be sorted and truncated at the end of the outer parsing loop. | ||
| 840 | // Allow the maximum version count to be temporarily exceeded, but only | ||
| 841 | // by a limited threshold. | ||
| 842 | if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) { | ||
| 843 | ts_stack_remove_version(self->stack, slice_version); | ||
| 844 | ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); | ||
| 845 | removed_version_count++; | ||
| 846 | while (i + 1 < pop.size) { | ||
| 847 | StackSlice next_slice = pop.contents[i + 1]; | ||
| 848 | if (next_slice.version != slice.version) break; | ||
| 849 | ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); | ||
| 850 | i++; | ||
| 851 | } | ||
| 852 | continue; | ||
| 853 | } | ||
| 854 | |||
| 855 | // Extra tokens on top of the stack should not be included in this new parent | ||
| 856 | // node. They will be re-pushed onto the stack after the parent node is | ||
| 857 | // created and pushed. | ||
| 858 | SubtreeArray children = slice.subtrees; | ||
| 859 | ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras); | ||
| 860 | |||
| 861 | MutableSubtree parent = ts_subtree_new_node( | ||
| 862 | symbol, &children, production_id, self->language | ||
| 863 | ); | ||
| 864 | |||
| 865 | // This pop operation may have caused multiple stack versions to collapse | ||
| 866 | // into one, because they all diverged from a common state. In that case, | ||
| 867 | // choose one of the arrays of trees to be the parent node's children, and | ||
| 868 | // delete the rest of the tree arrays. | ||
| 869 | while (i + 1 < pop.size) { | ||
| 870 | StackSlice next_slice = pop.contents[i + 1]; | ||
| 871 | if (next_slice.version != slice.version) break; | ||
| 872 | i++; | ||
| 873 | |||
| 874 | SubtreeArray next_slice_children = next_slice.subtrees; | ||
| 875 | ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2); | ||
| 876 | |||
| 877 | if (ts_parser__select_children( | ||
| 878 | self, | ||
| 879 | ts_subtree_from_mut(parent), | ||
| 880 | &next_slice_children | ||
| 881 | )) { | ||
| 882 | ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras); | ||
| 883 | ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent)); | ||
| 884 | array_swap(&self->trailing_extras, &self->trailing_extras2); | ||
| 885 | parent = ts_subtree_new_node( | ||
| 886 | symbol, &next_slice_children, production_id, self->language | ||
| 887 | ); | ||
| 888 | } else { | ||
| 889 | array_clear(&self->trailing_extras2); | ||
| 890 | ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); | ||
| 891 | } | ||
| 892 | } | ||
| 893 | |||
| 894 | TSStateId state = ts_stack_state(self->stack, slice_version); | ||
| 895 | TSStateId next_state = ts_language_next_state(self->language, state, symbol); | ||
| 896 | if (end_of_non_terminal_extra && next_state == state) { | ||
| 897 | parent.ptr->extra = true; | ||
| 898 | } | ||
| 899 | if (is_fragile || pop.size > 1 || initial_version_count > 1) { | ||
| 900 | parent.ptr->fragile_left = true; | ||
| 901 | parent.ptr->fragile_right = true; | ||
| 902 | parent.ptr->parse_state = TS_TREE_STATE_NONE; | ||
| 903 | } else { | ||
| 904 | parent.ptr->parse_state = state; | ||
| 905 | } | ||
| 906 | parent.ptr->dynamic_precedence += dynamic_precedence; | ||
| 907 | |||
| 908 | // Push the parent node onto the stack, along with any extra tokens that | ||
| 909 | // were previously on top of the stack. | ||
| 910 | ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state); | ||
| 911 | for (uint32_t j = 0; j < self->trailing_extras.size; j++) { | ||
| 912 | ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state); | ||
| 913 | } | ||
| 914 | |||
| 915 | for (StackVersion j = 0; j < slice_version; j++) { | ||
| 916 | if (j == version) continue; | ||
| 917 | if (ts_stack_merge(self->stack, j, slice_version)) { | ||
| 918 | removed_version_count++; | ||
| 919 | break; | ||
| 920 | } | ||
| 921 | } | ||
| 922 | } | ||
| 923 | |||
| 924 | // Return the first new stack version that was created. | ||
| 925 | return ts_stack_version_count(self->stack) > initial_version_count | ||
| 926 | ? initial_version_count | ||
| 927 | : STACK_VERSION_NONE; | ||
| 928 | } | ||
| 929 | |||
| 930 | static void ts_parser__accept( | ||
| 931 | TSParser *self, | ||
| 932 | StackVersion version, | ||
| 933 | Subtree lookahead | ||
| 934 | ) { | ||
| 935 | assert(ts_subtree_is_eof(lookahead)); | ||
| 936 | ts_stack_push(self->stack, version, lookahead, false, 1); | ||
| 937 | |||
| 938 | StackSliceArray pop = ts_stack_pop_all(self->stack, version); | ||
| 939 | for (uint32_t i = 0; i < pop.size; i++) { | ||
| 940 | SubtreeArray trees = pop.contents[i].subtrees; | ||
| 941 | |||
| 942 | Subtree root = NULL_SUBTREE; | ||
| 943 | for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { | ||
| 944 | Subtree tree = trees.contents[j]; | ||
| 945 | if (!ts_subtree_extra(tree)) { | ||
| 946 | assert(!tree.data.is_inline); | ||
| 947 | uint32_t child_count = ts_subtree_child_count(tree); | ||
| 948 | const Subtree *children = ts_subtree_children(tree); | ||
| 949 | for (uint32_t k = 0; k < child_count; k++) { | ||
| 950 | ts_subtree_retain(children[k]); | ||
| 951 | } | ||
| 952 | array_splice(&trees, j, 1, child_count, children); | ||
| 953 | root = ts_subtree_from_mut(ts_subtree_new_node( | ||
| 954 | ts_subtree_symbol(tree), | ||
| 955 | &trees, | ||
| 956 | tree.ptr->production_id, | ||
| 957 | self->language | ||
| 958 | )); | ||
| 959 | ts_subtree_release(&self->tree_pool, tree); | ||
| 960 | break; | ||
| 961 | } | ||
| 962 | } | ||
| 963 | |||
| 964 | assert(root.ptr); | ||
| 965 | self->accept_count++; | ||
| 966 | |||
| 967 | if (self->finished_tree.ptr) { | ||
| 968 | if (ts_parser__select_tree(self, self->finished_tree, root)) { | ||
| 969 | ts_subtree_release(&self->tree_pool, self->finished_tree); | ||
| 970 | self->finished_tree = root; | ||
| 971 | } else { | ||
| 972 | ts_subtree_release(&self->tree_pool, root); | ||
| 973 | } | ||
| 974 | } else { | ||
| 975 | self->finished_tree = root; | ||
| 976 | } | ||
| 977 | } | ||
| 978 | |||
| 979 | ts_stack_remove_version(self->stack, pop.contents[0].version); | ||
| 980 | ts_stack_halt(self->stack, version); | ||
| 981 | } | ||
| 982 | |||
| 983 | static bool ts_parser__do_all_potential_reductions( | ||
| 984 | TSParser *self, | ||
| 985 | StackVersion starting_version, | ||
| 986 | TSSymbol lookahead_symbol | ||
| 987 | ) { | ||
| 988 | uint32_t initial_version_count = ts_stack_version_count(self->stack); | ||
| 989 | |||
| 990 | bool can_shift_lookahead_symbol = false; | ||
| 991 | StackVersion version = starting_version; | ||
| 992 | for (unsigned i = 0; true; i++) { | ||
| 993 | uint32_t version_count = ts_stack_version_count(self->stack); | ||
| 994 | if (version >= version_count) break; | ||
| 995 | |||
| 996 | bool merged = false; | ||
| 997 | for (StackVersion j = initial_version_count; j < version; j++) { | ||
| 998 | if (ts_stack_merge(self->stack, j, version)) { | ||
| 999 | merged = true; | ||
| 1000 | break; | ||
| 1001 | } | ||
| 1002 | } | ||
| 1003 | if (merged) continue; | ||
| 1004 | |||
| 1005 | TSStateId state = ts_stack_state(self->stack, version); | ||
| 1006 | bool has_shift_action = false; | ||
| 1007 | array_clear(&self->reduce_actions); | ||
| 1008 | |||
| 1009 | TSSymbol first_symbol, end_symbol; | ||
| 1010 | if (lookahead_symbol != 0) { | ||
| 1011 | first_symbol = lookahead_symbol; | ||
| 1012 | end_symbol = lookahead_symbol + 1; | ||
| 1013 | } else { | ||
| 1014 | first_symbol = 1; | ||
| 1015 | end_symbol = self->language->token_count; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) { | ||
| 1019 | TableEntry entry; | ||
| 1020 | ts_language_table_entry(self->language, state, symbol, &entry); | ||
| 1021 | for (uint32_t j = 0; j < entry.action_count; j++) { | ||
| 1022 | TSParseAction action = entry.actions[j]; | ||
| 1023 | switch (action.type) { | ||
| 1024 | case TSParseActionTypeShift: | ||
| 1025 | case TSParseActionTypeRecover: | ||
| 1026 | if (!action.shift.extra && !action.shift.repetition) has_shift_action = true; | ||
| 1027 | break; | ||
| 1028 | case TSParseActionTypeReduce: | ||
| 1029 | if (action.reduce.child_count > 0) | ||
| 1030 | ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) { | ||
| 1031 | .symbol = action.reduce.symbol, | ||
| 1032 | .count = action.reduce.child_count, | ||
| 1033 | .dynamic_precedence = action.reduce.dynamic_precedence, | ||
| 1034 | .production_id = action.reduce.production_id, | ||
| 1035 | }); | ||
| 1036 | break; | ||
| 1037 | default: | ||
| 1038 | break; | ||
| 1039 | } | ||
| 1040 | } | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | StackVersion reduction_version = STACK_VERSION_NONE; | ||
| 1044 | for (uint32_t j = 0; j < self->reduce_actions.size; j++) { | ||
| 1045 | ReduceAction action = self->reduce_actions.contents[j]; | ||
| 1046 | |||
| 1047 | reduction_version = ts_parser__reduce( | ||
| 1048 | self, version, action.symbol, action.count, | ||
| 1049 | action.dynamic_precedence, action.production_id, | ||
| 1050 | true, false | ||
| 1051 | ); | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | if (has_shift_action) { | ||
| 1055 | can_shift_lookahead_symbol = true; | ||
| 1056 | } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) { | ||
| 1057 | ts_stack_renumber_version(self->stack, reduction_version, version); | ||
| 1058 | continue; | ||
| 1059 | } else if (lookahead_symbol != 0) { | ||
| 1060 | ts_stack_remove_version(self->stack, version); | ||
| 1061 | } | ||
| 1062 | |||
| 1063 | if (version == starting_version) { | ||
| 1064 | version = version_count; | ||
| 1065 | } else { | ||
| 1066 | version++; | ||
| 1067 | } | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | return can_shift_lookahead_symbol; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | static bool ts_parser__recover_to_state( | ||
| 1074 | TSParser *self, | ||
| 1075 | StackVersion version, | ||
| 1076 | unsigned depth, | ||
| 1077 | TSStateId goal_state | ||
| 1078 | ) { | ||
| 1079 | StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); | ||
| 1080 | StackVersion previous_version = STACK_VERSION_NONE; | ||
| 1081 | |||
| 1082 | for (unsigned i = 0; i < pop.size; i++) { | ||
| 1083 | StackSlice slice = pop.contents[i]; | ||
| 1084 | |||
| 1085 | if (slice.version == previous_version) { | ||
| 1086 | ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); | ||
| 1087 | array_erase(&pop, i--); | ||
| 1088 | continue; | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | if (ts_stack_state(self->stack, slice.version) != goal_state) { | ||
| 1092 | ts_stack_halt(self->stack, slice.version); | ||
| 1093 | ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); | ||
| 1094 | array_erase(&pop, i--); | ||
| 1095 | continue; | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version); | ||
| 1099 | if (error_trees.size > 0) { | ||
| 1100 | assert(error_trees.size == 1); | ||
| 1101 | Subtree error_tree = error_trees.contents[0]; | ||
| 1102 | uint32_t error_child_count = ts_subtree_child_count(error_tree); | ||
| 1103 | if (error_child_count > 0) { | ||
| 1104 | array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree)); | ||
| 1105 | for (unsigned j = 0; j < error_child_count; j++) { | ||
| 1106 | ts_subtree_retain(slice.subtrees.contents[j]); | ||
| 1107 | } | ||
| 1108 | } | ||
| 1109 | ts_subtree_array_delete(&self->tree_pool, &error_trees); | ||
| 1110 | } | ||
| 1111 | |||
| 1112 | ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras); | ||
| 1113 | |||
| 1114 | if (slice.subtrees.size > 0) { | ||
| 1115 | Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); | ||
| 1116 | ts_stack_push(self->stack, slice.version, error, false, goal_state); | ||
| 1117 | } else { | ||
| 1118 | array_delete(&slice.subtrees); | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | for (unsigned j = 0; j < self->trailing_extras.size; j++) { | ||
| 1122 | Subtree tree = self->trailing_extras.contents[j]; | ||
| 1123 | ts_stack_push(self->stack, slice.version, tree, false, goal_state); | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | previous_version = slice.version; | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | return previous_version != STACK_VERSION_NONE; | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | static void ts_parser__recover( | ||
| 1133 | TSParser *self, | ||
| 1134 | StackVersion version, | ||
| 1135 | Subtree lookahead | ||
| 1136 | ) { | ||
| 1137 | bool did_recover = false; | ||
| 1138 | unsigned previous_version_count = ts_stack_version_count(self->stack); | ||
| 1139 | Length position = ts_stack_position(self->stack, version); | ||
| 1140 | StackSummary *summary = ts_stack_get_summary(self->stack, version); | ||
| 1141 | unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version); | ||
| 1142 | unsigned current_error_cost = ts_stack_error_cost(self->stack, version); | ||
| 1143 | |||
| 1144 | // When the parser is in the error state, there are two strategies for recovering with a | ||
| 1145 | // given lookahead token: | ||
| 1146 | // 1. Find a previous state on the stack in which that lookahead token would be valid. Then, | ||
| 1147 | // create a new stack version that is in that state again. This entails popping all of the | ||
| 1148 | // subtrees that have been pushed onto the stack since that previous state, and wrapping | ||
| 1149 | // them in an ERROR node. | ||
| 1150 | // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and | ||
| 1151 | // move on to the next lookahead token, remaining in the error state. | ||
| 1152 | // | ||
| 1153 | // First, try the strategy 1. Upon entering the error state, the parser recorded a summary | ||
| 1154 | // of the previous parse states and their depths. Look at each state in the summary, to see | ||
| 1155 | // if the current lookahead token would be valid in that state. | ||
| 1156 | if (summary && !ts_subtree_is_error(lookahead)) { | ||
| 1157 | for (unsigned i = 0; i < summary->size; i++) { | ||
| 1158 | StackSummaryEntry entry = summary->contents[i]; | ||
| 1159 | |||
| 1160 | if (entry.state == ERROR_STATE) continue; | ||
| 1161 | if (entry.position.bytes == position.bytes) continue; | ||
| 1162 | unsigned depth = entry.depth; | ||
| 1163 | if (node_count_since_error > 0) depth++; | ||
| 1164 | |||
| 1165 | // Do not recover in ways that create redundant stack versions. | ||
| 1166 | bool would_merge = false; | ||
| 1167 | for (unsigned j = 0; j < previous_version_count; j++) { | ||
| 1168 | if ( | ||
| 1169 | ts_stack_state(self->stack, j) == entry.state && | ||
| 1170 | ts_stack_position(self->stack, j).bytes == position.bytes | ||
| 1171 | ) { | ||
| 1172 | would_merge = true; | ||
| 1173 | break; | ||
| 1174 | } | ||
| 1175 | } | ||
| 1176 | if (would_merge) continue; | ||
| 1177 | |||
| 1178 | // Do not recover if the result would clearly be worse than some existing stack version. | ||
| 1179 | unsigned new_cost = | ||
| 1180 | current_error_cost + | ||
| 1181 | entry.depth * ERROR_COST_PER_SKIPPED_TREE + | ||
| 1182 | (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR + | ||
| 1183 | (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; | ||
| 1184 | if (ts_parser__better_version_exists(self, version, false, new_cost)) break; | ||
| 1185 | |||
| 1186 | // If the current lookahead token is valid in some previous state, recover to that state. | ||
| 1187 | // Then stop looking for further recoveries. | ||
| 1188 | if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) { | ||
| 1189 | if (ts_parser__recover_to_state(self, version, depth, entry.state)) { | ||
| 1190 | did_recover = true; | ||
| 1191 | LOG("recover_to_previous state:%u, depth:%u", entry.state, depth); | ||
| 1192 | LOG_STACK(); | ||
| 1193 | break; | ||
| 1194 | } | ||
| 1195 | } | ||
| 1196 | } | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | // In the process of attempting to recover, some stack versions may have been created | ||
| 1200 | // and subsequently halted. Remove those versions. | ||
| 1201 | for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { | ||
| 1202 | if (!ts_stack_is_active(self->stack, i)) { | ||
| 1203 | ts_stack_remove_version(self->stack, i--); | ||
| 1204 | } | ||
| 1205 | } | ||
| 1206 | |||
| 1207 | // If strategy 1 succeeded, a new stack version will have been created which is able to handle | ||
| 1208 | // the current lookahead token. Now, in addition, try strategy 2 described above: skip the | ||
| 1209 | // current lookahead token by wrapping it in an ERROR node. | ||
| 1210 | |||
| 1211 | // Don't pursue this additional strategy if there are already too many stack versions. | ||
| 1212 | if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { | ||
| 1213 | ts_stack_halt(self->stack, version); | ||
| 1214 | ts_subtree_release(&self->tree_pool, lookahead); | ||
| 1215 | return; | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | if ( | ||
| 1219 | did_recover && | ||
| 1220 | ts_subtree_has_external_scanner_state_change(lookahead) | ||
| 1221 | ) { | ||
| 1222 | ts_stack_halt(self->stack, version); | ||
| 1223 | ts_subtree_release(&self->tree_pool, lookahead); | ||
| 1224 | return; | ||
| 1225 | } | ||
| 1226 | |||
| 1227 | // If the parser is still in the error state at the end of the file, just wrap everything | ||
| 1228 | // in an ERROR node and terminate. | ||
| 1229 | if (ts_subtree_is_eof(lookahead)) { | ||
| 1230 | LOG("recover_eof"); | ||
| 1231 | SubtreeArray children = array_new(); | ||
| 1232 | Subtree parent = ts_subtree_new_error_node(&children, false, self->language); | ||
| 1233 | ts_stack_push(self->stack, version, parent, false, 1); | ||
| 1234 | ts_parser__accept(self, version, lookahead); | ||
| 1235 | return; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | // Do not recover if the result would clearly be worse than some existing stack version. | ||
| 1239 | unsigned new_cost = | ||
| 1240 | current_error_cost + ERROR_COST_PER_SKIPPED_TREE + | ||
| 1241 | ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + | ||
| 1242 | ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE; | ||
| 1243 | if (ts_parser__better_version_exists(self, version, false, new_cost)) { | ||
| 1244 | ts_stack_halt(self->stack, version); | ||
| 1245 | ts_subtree_release(&self->tree_pool, lookahead); | ||
| 1246 | return; | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | // If the current lookahead token is an extra token, mark it as extra. This means it won't | ||
| 1250 | // be counted in error cost calculations. | ||
| 1251 | unsigned n; | ||
| 1252 | const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); | ||
| 1253 | if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { | ||
| 1254 | MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); | ||
| 1255 | ts_subtree_set_extra(&mutable_lookahead, true); | ||
| 1256 | lookahead = ts_subtree_from_mut(mutable_lookahead); | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | // Wrap the lookahead token in an ERROR. | ||
| 1260 | LOG("skip_token symbol:%s", TREE_NAME(lookahead)); | ||
| 1261 | SubtreeArray children = array_new(); | ||
| 1262 | array_reserve(&children, 1); | ||
| 1263 | array_push(&children, lookahead); | ||
| 1264 | MutableSubtree error_repeat = ts_subtree_new_node( | ||
| 1265 | ts_builtin_sym_error_repeat, | ||
| 1266 | &children, | ||
| 1267 | 0, | ||
| 1268 | self->language | ||
| 1269 | ); | ||
| 1270 | |||
| 1271 | // If other tokens have already been skipped, so there is already an ERROR at the top of the | ||
| 1272 | // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger | ||
| 1273 | // ERROR. | ||
| 1274 | if (node_count_since_error > 0) { | ||
| 1275 | StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1); | ||
| 1276 | |||
| 1277 | // TODO: Figure out how to make this condition occur. | ||
| 1278 | // See https://github.com/atom/atom/issues/18450#issuecomment-439579778 | ||
| 1279 | // If multiple stack versions have merged at this point, just pick one of the errors | ||
| 1280 | // arbitrarily and discard the rest. | ||
| 1281 | if (pop.size > 1) { | ||
| 1282 | for (unsigned i = 1; i < pop.size; i++) { | ||
| 1283 | ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees); | ||
| 1284 | } | ||
| 1285 | while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) { | ||
| 1286 | ts_stack_remove_version(self->stack, pop.contents[0].version + 1); | ||
| 1287 | } | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | ts_stack_renumber_version(self->stack, pop.contents[0].version, version); | ||
| 1291 | array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat)); | ||
| 1292 | error_repeat = ts_subtree_new_node( | ||
| 1293 | ts_builtin_sym_error_repeat, | ||
| 1294 | &pop.contents[0].subtrees, | ||
| 1295 | 0, | ||
| 1296 | self->language | ||
| 1297 | ); | ||
| 1298 | } | ||
| 1299 | |||
| 1300 | // Push the new ERROR onto the stack. | ||
| 1301 | ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE); | ||
| 1302 | if (ts_subtree_has_external_tokens(lookahead)) { | ||
| 1303 | ts_stack_set_last_external_token( | ||
| 1304 | self->stack, version, ts_subtree_last_external_token(lookahead) | ||
| 1305 | ); | ||
| 1306 | } | ||
| 1307 | } | ||
| 1308 | |||
| 1309 | static void ts_parser__handle_error( | ||
| 1310 | TSParser *self, | ||
| 1311 | StackVersion version, | ||
| 1312 | Subtree lookahead | ||
| 1313 | ) { | ||
| 1314 | uint32_t previous_version_count = ts_stack_version_count(self->stack); | ||
| 1315 | |||
| 1316 | // Perform any reductions that can happen in this state, regardless of the lookahead. After | ||
| 1317 | // skipping one or more invalid tokens, the parser might find a token that would have allowed | ||
| 1318 | // a reduction to take place. | ||
| 1319 | ts_parser__do_all_potential_reductions(self, version, 0); | ||
| 1320 | uint32_t version_count = ts_stack_version_count(self->stack); | ||
| 1321 | Length position = ts_stack_position(self->stack, version); | ||
| 1322 | |||
| 1323 | // Push a discontinuity onto the stack. Merge all of the stack versions that | ||
| 1324 | // were created in the previous step. | ||
| 1325 | bool did_insert_missing_token = false; | ||
| 1326 | for (StackVersion v = version; v < version_count;) { | ||
| 1327 | if (!did_insert_missing_token) { | ||
| 1328 | TSStateId state = ts_stack_state(self->stack, v); | ||
| 1329 | for ( | ||
| 1330 | TSSymbol missing_symbol = 1; | ||
| 1331 | missing_symbol < (uint16_t)self->language->token_count; | ||
| 1332 | missing_symbol++ | ||
| 1333 | ) { | ||
| 1334 | TSStateId state_after_missing_symbol = ts_language_next_state( | ||
| 1335 | self->language, state, missing_symbol | ||
| 1336 | ); | ||
| 1337 | if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) { | ||
| 1338 | continue; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | if (ts_language_has_reduce_action( | ||
| 1342 | self->language, | ||
| 1343 | state_after_missing_symbol, | ||
| 1344 | ts_subtree_leaf_symbol(lookahead) | ||
| 1345 | )) { | ||
| 1346 | // In case the parser is currently outside of any included range, the lexer will | ||
| 1347 | // snap to the beginning of the next included range. The missing token's padding | ||
| 1348 | // must be assigned to position it within the next included range. | ||
| 1349 | ts_lexer_reset(&self->lexer, position); | ||
| 1350 | ts_lexer_mark_end(&self->lexer); | ||
| 1351 | Length padding = length_sub(self->lexer.token_end_position, position); | ||
| 1352 | uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead); | ||
| 1353 | |||
| 1354 | StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v); | ||
| 1355 | Subtree missing_tree = ts_subtree_new_missing_leaf( | ||
| 1356 | &self->tree_pool, missing_symbol, | ||
| 1357 | padding, lookahead_bytes, | ||
| 1358 | self->language | ||
| 1359 | ); | ||
| 1360 | ts_stack_push( | ||
| 1361 | self->stack, version_with_missing_tree, | ||
| 1362 | missing_tree, false, | ||
| 1363 | state_after_missing_symbol | ||
| 1364 | ); | ||
| 1365 | |||
| 1366 | if (ts_parser__do_all_potential_reductions( | ||
| 1367 | self, version_with_missing_tree, | ||
| 1368 | ts_subtree_leaf_symbol(lookahead) | ||
| 1369 | )) { | ||
| 1370 | LOG( | ||
| 1371 | "recover_with_missing symbol:%s, state:%u", | ||
| 1372 | SYM_NAME(missing_symbol), | ||
| 1373 | ts_stack_state(self->stack, version_with_missing_tree) | ||
| 1374 | ); | ||
| 1375 | did_insert_missing_token = true; | ||
| 1376 | break; | ||
| 1377 | } | ||
| 1378 | } | ||
| 1379 | } | ||
| 1380 | } | ||
| 1381 | |||
| 1382 | ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE); | ||
| 1383 | v = (v == version) ? previous_version_count : v + 1; | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | for (unsigned i = previous_version_count; i < version_count; i++) { | ||
| 1387 | bool did_merge = ts_stack_merge(self->stack, version, previous_version_count); | ||
| 1388 | assert(did_merge); | ||
| 1389 | (void)did_merge; // fix warning/error with clang -Os | ||
| 1390 | } | ||
| 1391 | |||
| 1392 | ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); | ||
| 1393 | |||
| 1394 | // Begin recovery with the current lookahead node, rather than waiting for the | ||
| 1395 | // next turn of the parse loop. This ensures that the tree accounts for the the | ||
| 1396 | // current lookahead token's "lookahead bytes" value, which describes how far | ||
| 1397 | // the lexer needed to look ahead beyond the content of the token in order to | ||
| 1398 | // recognize it. | ||
| 1399 | if (ts_subtree_child_count(lookahead) > 0) { | ||
| 1400 | ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); | ||
| 1401 | } | ||
| 1402 | ts_parser__recover(self, version, lookahead); | ||
| 1403 | |||
| 1404 | LOG_STACK(); | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | static bool ts_parser__advance( | ||
| 1408 | TSParser *self, | ||
| 1409 | StackVersion version, | ||
| 1410 | bool allow_node_reuse | ||
| 1411 | ) { | ||
| 1412 | TSStateId state = ts_stack_state(self->stack, version); | ||
| 1413 | uint32_t position = ts_stack_position(self->stack, version).bytes; | ||
| 1414 | Subtree last_external_token = ts_stack_last_external_token(self->stack, version); | ||
| 1415 | |||
| 1416 | bool did_reuse = true; | ||
| 1417 | Subtree lookahead = NULL_SUBTREE; | ||
| 1418 | TableEntry table_entry = {.action_count = 0}; | ||
| 1419 | |||
| 1420 | // If possible, reuse a node from the previous syntax tree. | ||
| 1421 | if (allow_node_reuse) { | ||
| 1422 | lookahead = ts_parser__reuse_node( | ||
| 1423 | self, version, &state, position, last_external_token, &table_entry | ||
| 1424 | ); | ||
| 1425 | } | ||
| 1426 | |||
| 1427 | // If no node from the previous syntax tree could be reused, then try to | ||
| 1428 | // reuse the token previously returned by the lexer. | ||
| 1429 | if (!lookahead.ptr) { | ||
| 1430 | did_reuse = false; | ||
| 1431 | lookahead = ts_parser__get_cached_token( | ||
| 1432 | self, state, position, last_external_token, &table_entry | ||
| 1433 | ); | ||
| 1434 | } | ||
| 1435 | |||
| 1436 | bool needs_lex = !lookahead.ptr; | ||
| 1437 | for (;;) { | ||
| 1438 | // Otherwise, re-run the lexer. | ||
| 1439 | if (needs_lex) { | ||
| 1440 | needs_lex = false; | ||
| 1441 | lookahead = ts_parser__lex(self, version, state); | ||
| 1442 | |||
| 1443 | if (lookahead.ptr) { | ||
| 1444 | ts_parser__set_cached_token(self, position, last_external_token, lookahead); | ||
| 1445 | ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry); | ||
| 1446 | } | ||
| 1447 | |||
| 1448 | // When parsing a non-terminal extra, a null lookahead indicates the | ||
| 1449 | // end of the rule. The reduction is stored in the EOF table entry. | ||
| 1450 | // After the reduction, the lexer needs to be run again. | ||
| 1451 | else { | ||
| 1452 | ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry); | ||
| 1453 | } | ||
| 1454 | } | ||
| 1455 | |||
| 1456 | // If a cancellation flag or a timeout was provided, then check every | ||
| 1457 | // time a fixed number of parse actions has been processed. | ||
| 1458 | if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { | ||
| 1459 | self->operation_count = 0; | ||
| 1460 | } | ||
| 1461 | if ( | ||
| 1462 | self->operation_count == 0 && | ||
| 1463 | ((self->cancellation_flag && atomic_load(self->cancellation_flag)) || | ||
| 1464 | (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock))) | ||
| 1465 | ) { | ||
| 1466 | if (lookahead.ptr) { | ||
| 1467 | ts_subtree_release(&self->tree_pool, lookahead); | ||
| 1468 | } | ||
| 1469 | return false; | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | // Process each parse action for the current lookahead token in | ||
| 1473 | // the current state. If there are multiple actions, then this is | ||
| 1474 | // an ambiguous state. REDUCE actions always create a new stack | ||
| 1475 | // version, whereas SHIFT actions update the existing stack version | ||
| 1476 | // and terminate this loop. | ||
| 1477 | StackVersion last_reduction_version = STACK_VERSION_NONE; | ||
| 1478 | for (uint32_t i = 0; i < table_entry.action_count; i++) { | ||
| 1479 | TSParseAction action = table_entry.actions[i]; | ||
| 1480 | |||
| 1481 | switch (action.type) { | ||
| 1482 | case TSParseActionTypeShift: { | ||
| 1483 | if (action.shift.repetition) break; | ||
| 1484 | TSStateId next_state; | ||
| 1485 | if (action.shift.extra) { | ||
| 1486 | next_state = state; | ||
| 1487 | LOG("shift_extra"); | ||
| 1488 | } else { | ||
| 1489 | next_state = action.shift.state; | ||
| 1490 | LOG("shift state:%u", next_state); | ||
| 1491 | } | ||
| 1492 | |||
| 1493 | if (ts_subtree_child_count(lookahead) > 0) { | ||
| 1494 | ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node); | ||
| 1495 | next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead)); | ||
| 1496 | } | ||
| 1497 | |||
| 1498 | ts_parser__shift(self, version, next_state, lookahead, action.shift.extra); | ||
| 1499 | if (did_reuse) reusable_node_advance(&self->reusable_node); | ||
| 1500 | return true; | ||
| 1501 | } | ||
| 1502 | |||
| 1503 | case TSParseActionTypeReduce: { | ||
| 1504 | bool is_fragile = table_entry.action_count > 1; | ||
| 1505 | bool end_of_non_terminal_extra = lookahead.ptr == NULL; | ||
| 1506 | LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count); | ||
| 1507 | StackVersion reduction_version = ts_parser__reduce( | ||
| 1508 | self, version, action.reduce.symbol, action.reduce.child_count, | ||
| 1509 | action.reduce.dynamic_precedence, action.reduce.production_id, | ||
| 1510 | is_fragile, end_of_non_terminal_extra | ||
| 1511 | ); | ||
| 1512 | if (reduction_version != STACK_VERSION_NONE) { | ||
| 1513 | last_reduction_version = reduction_version; | ||
| 1514 | } | ||
| 1515 | break; | ||
| 1516 | } | ||
| 1517 | |||
| 1518 | case TSParseActionTypeAccept: { | ||
| 1519 | LOG("accept"); | ||
| 1520 | ts_parser__accept(self, version, lookahead); | ||
| 1521 | return true; | ||
| 1522 | } | ||
| 1523 | |||
| 1524 | case TSParseActionTypeRecover: { | ||
| 1525 | if (ts_subtree_child_count(lookahead) > 0) { | ||
| 1526 | ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); | ||
| 1527 | } | ||
| 1528 | |||
| 1529 | ts_parser__recover(self, version, lookahead); | ||
| 1530 | if (did_reuse) reusable_node_advance(&self->reusable_node); | ||
| 1531 | return true; | ||
| 1532 | } | ||
| 1533 | } | ||
| 1534 | } | ||
| 1535 | |||
| 1536 | // If a reduction was performed, then replace the current stack version | ||
| 1537 | // with one of the stack versions created by a reduction, and continue | ||
| 1538 | // processing this version of the stack with the same lookahead symbol. | ||
| 1539 | if (last_reduction_version != STACK_VERSION_NONE) { | ||
| 1540 | ts_stack_renumber_version(self->stack, last_reduction_version, version); | ||
| 1541 | LOG_STACK(); | ||
| 1542 | state = ts_stack_state(self->stack, version); | ||
| 1543 | |||
| 1544 | // At the end of a non-terminal extra rule, the lexer will return a | ||
| 1545 | // null subtree, because the parser needs to perform a fixed reduction | ||
| 1546 | // regardless of the lookahead node. After performing that reduction, | ||
| 1547 | // (and completing the non-terminal extra rule) run the lexer again based | ||
| 1548 | // on the current parse state. | ||
| 1549 | if (!lookahead.ptr) { | ||
| 1550 | needs_lex = true; | ||
| 1551 | } else { | ||
| 1552 | ts_language_table_entry( | ||
| 1553 | self->language, | ||
| 1554 | state, | ||
| 1555 | ts_subtree_leaf_symbol(lookahead), | ||
| 1556 | &table_entry | ||
| 1557 | ); | ||
| 1558 | } | ||
| 1559 | |||
| 1560 | continue; | ||
| 1561 | } | ||
| 1562 | |||
| 1563 | // A non-terminal extra rule was reduced and merged into an existing | ||
| 1564 | // stack version. This version can be discarded. | ||
| 1565 | if (!lookahead.ptr) { | ||
| 1566 | ts_stack_halt(self->stack, version); | ||
| 1567 | return true; | ||
| 1568 | } | ||
| 1569 | |||
| 1570 | // If there were no parse actions for the current lookahead token, then | ||
| 1571 | // it is not valid in this state. If the current lookahead token is a | ||
| 1572 | // keyword, then switch to treating it as the normal word token if that | ||
| 1573 | // token is valid in this state. | ||
| 1574 | if ( | ||
| 1575 | ts_subtree_is_keyword(lookahead) && | ||
| 1576 | ts_subtree_symbol(lookahead) != self->language->keyword_capture_token | ||
| 1577 | ) { | ||
| 1578 | ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry); | ||
| 1579 | if (table_entry.action_count > 0) { | ||
| 1580 | LOG( | ||
| 1581 | "switch from_keyword:%s, to_word_token:%s", | ||
| 1582 | TREE_NAME(lookahead), | ||
| 1583 | SYM_NAME(self->language->keyword_capture_token) | ||
| 1584 | ); | ||
| 1585 | |||
| 1586 | MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); | ||
| 1587 | ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language); | ||
| 1588 | lookahead = ts_subtree_from_mut(mutable_lookahead); | ||
| 1589 | continue; | ||
| 1590 | } | ||
| 1591 | } | ||
| 1592 | |||
| 1593 | // If the current lookahead token is not valid and the parser is | ||
| 1594 | // already in the error state, restart the error recovery process. | ||
| 1595 | // TODO - can this be unified with the other `RECOVER` case above? | ||
| 1596 | if (state == ERROR_STATE) { | ||
| 1597 | ts_parser__recover(self, version, lookahead); | ||
| 1598 | return true; | ||
| 1599 | } | ||
| 1600 | |||
| 1601 | // If the current lookahead token is not valid and the previous | ||
| 1602 | // subtree on the stack was reused from an old tree, it isn't actually | ||
| 1603 | // valid to reuse it. Remove it from the stack, and in its place, | ||
| 1604 | // push each of its children. Then try again to process the current | ||
| 1605 | // lookahead. | ||
| 1606 | if (ts_parser__breakdown_top_of_stack(self, version)) { | ||
| 1607 | state = ts_stack_state(self->stack, version); | ||
| 1608 | ts_subtree_release(&self->tree_pool, lookahead); | ||
| 1609 | needs_lex = true; | ||
| 1610 | continue; | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | // At this point, the current lookahead token is definitely not valid | ||
| 1614 | // for this parse stack version. Mark this version as paused and continue | ||
| 1615 | // processing any other stack versions that might exist. If some other | ||
| 1616 | // version advances successfully, then this version can simply be removed. | ||
| 1617 | // But if all versions end up paused, then error recovery is needed. | ||
| 1618 | LOG("detect_error"); | ||
| 1619 | ts_stack_pause(self->stack, version, lookahead); | ||
| 1620 | return true; | ||
| 1621 | } | ||
| 1622 | } | ||
| 1623 | |||
| 1624 | static unsigned ts_parser__condense_stack(TSParser *self) { | ||
| 1625 | bool made_changes = false; | ||
| 1626 | unsigned min_error_cost = UINT_MAX; | ||
| 1627 | for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { | ||
| 1628 | // Prune any versions that have been marked for removal. | ||
| 1629 | if (ts_stack_is_halted(self->stack, i)) { | ||
| 1630 | ts_stack_remove_version(self->stack, i); | ||
| 1631 | i--; | ||
| 1632 | continue; | ||
| 1633 | } | ||
| 1634 | |||
| 1635 | // Keep track of the minimum error cost of any stack version so | ||
| 1636 | // that it can be returned. | ||
| 1637 | ErrorStatus status_i = ts_parser__version_status(self, i); | ||
| 1638 | if (!status_i.is_in_error && status_i.cost < min_error_cost) { | ||
| 1639 | min_error_cost = status_i.cost; | ||
| 1640 | } | ||
| 1641 | |||
| 1642 | // Examine each pair of stack versions, removing any versions that | ||
| 1643 | // are clearly worse than another version. Ensure that the versions | ||
| 1644 | // are ordered from most promising to least promising. | ||
| 1645 | for (StackVersion j = 0; j < i; j++) { | ||
| 1646 | ErrorStatus status_j = ts_parser__version_status(self, j); | ||
| 1647 | |||
| 1648 | switch (ts_parser__compare_versions(self, status_j, status_i)) { | ||
| 1649 | case ErrorComparisonTakeLeft: | ||
| 1650 | made_changes = true; | ||
| 1651 | ts_stack_remove_version(self->stack, i); | ||
| 1652 | i--; | ||
| 1653 | j = i; | ||
| 1654 | break; | ||
| 1655 | |||
| 1656 | case ErrorComparisonPreferLeft: | ||
| 1657 | case ErrorComparisonNone: | ||
| 1658 | if (ts_stack_merge(self->stack, j, i)) { | ||
| 1659 | made_changes = true; | ||
| 1660 | i--; | ||
| 1661 | j = i; | ||
| 1662 | } | ||
| 1663 | break; | ||
| 1664 | |||
| 1665 | case ErrorComparisonPreferRight: | ||
| 1666 | made_changes = true; | ||
| 1667 | if (ts_stack_merge(self->stack, j, i)) { | ||
| 1668 | i--; | ||
| 1669 | j = i; | ||
| 1670 | } else { | ||
| 1671 | ts_stack_swap_versions(self->stack, i, j); | ||
| 1672 | } | ||
| 1673 | break; | ||
| 1674 | |||
| 1675 | case ErrorComparisonTakeRight: | ||
| 1676 | made_changes = true; | ||
| 1677 | ts_stack_remove_version(self->stack, j); | ||
| 1678 | i--; | ||
| 1679 | j--; | ||
| 1680 | break; | ||
| 1681 | } | ||
| 1682 | } | ||
| 1683 | } | ||
| 1684 | |||
| 1685 | // Enfore a hard upper bound on the number of stack versions by | ||
| 1686 | // discarding the least promising versions. | ||
| 1687 | while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { | ||
| 1688 | ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); | ||
| 1689 | made_changes = true; | ||
| 1690 | } | ||
| 1691 | |||
| 1692 | // If the best-performing stack version is currently paused, or all | ||
| 1693 | // versions are paused, then resume the best paused version and begin | ||
| 1694 | // the error recovery process. Otherwise, remove the paused versions. | ||
| 1695 | if (ts_stack_version_count(self->stack) > 0) { | ||
| 1696 | bool has_unpaused_version = false; | ||
| 1697 | for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { | ||
| 1698 | if (ts_stack_is_paused(self->stack, i)) { | ||
| 1699 | if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) { | ||
| 1700 | LOG("resume version:%u", i); | ||
| 1701 | min_error_cost = ts_stack_error_cost(self->stack, i); | ||
| 1702 | Subtree lookahead = ts_stack_resume(self->stack, i); | ||
| 1703 | ts_parser__handle_error(self, i, lookahead); | ||
| 1704 | has_unpaused_version = true; | ||
| 1705 | } else { | ||
| 1706 | ts_stack_remove_version(self->stack, i); | ||
| 1707 | i--; | ||
| 1708 | n--; | ||
| 1709 | } | ||
| 1710 | } else { | ||
| 1711 | has_unpaused_version = true; | ||
| 1712 | } | ||
| 1713 | } | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | if (made_changes) { | ||
| 1717 | LOG("condense"); | ||
| 1718 | LOG_STACK(); | ||
| 1719 | } | ||
| 1720 | |||
| 1721 | return min_error_cost; | ||
| 1722 | } | ||
| 1723 | |||
| 1724 | static bool ts_parser_has_outstanding_parse(TSParser *self) { | ||
| 1725 | return ( | ||
| 1726 | ts_stack_state(self->stack, 0) != 1 || | ||
| 1727 | ts_stack_node_count_since_error(self->stack, 0) != 0 | ||
| 1728 | ); | ||
| 1729 | } | ||
| 1730 | |||
| 1731 | // Parser - Public | ||
| 1732 | |||
| 1733 | TSParser *ts_parser_new(void) { | ||
| 1734 | TSParser *self = ts_calloc(1, sizeof(TSParser)); | ||
| 1735 | ts_lexer_init(&self->lexer); | ||
| 1736 | array_init(&self->reduce_actions); | ||
| 1737 | array_reserve(&self->reduce_actions, 4); | ||
| 1738 | self->tree_pool = ts_subtree_pool_new(32); | ||
| 1739 | self->stack = ts_stack_new(&self->tree_pool); | ||
| 1740 | self->finished_tree = NULL_SUBTREE; | ||
| 1741 | self->reusable_node = reusable_node_new(); | ||
| 1742 | self->dot_graph_file = NULL; | ||
| 1743 | self->cancellation_flag = NULL; | ||
| 1744 | self->timeout_duration = 0; | ||
| 1745 | self->end_clock = clock_null(); | ||
| 1746 | self->operation_count = 0; | ||
| 1747 | self->old_tree = NULL_SUBTREE; | ||
| 1748 | self->included_range_differences = (TSRangeArray) array_new(); | ||
| 1749 | self->included_range_difference_index = 0; | ||
| 1750 | ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); | ||
| 1751 | return self; | ||
| 1752 | } | ||
| 1753 | |||
| 1754 | void ts_parser_delete(TSParser *self) { | ||
| 1755 | if (!self) return; | ||
| 1756 | |||
| 1757 | ts_parser_set_language(self, NULL); | ||
| 1758 | ts_stack_delete(self->stack); | ||
| 1759 | if (self->reduce_actions.contents) { | ||
| 1760 | array_delete(&self->reduce_actions); | ||
| 1761 | } | ||
| 1762 | if (self->included_range_differences.contents) { | ||
| 1763 | array_delete(&self->included_range_differences); | ||
| 1764 | } | ||
| 1765 | if (self->old_tree.ptr) { | ||
| 1766 | ts_subtree_release(&self->tree_pool, self->old_tree); | ||
| 1767 | self->old_tree = NULL_SUBTREE; | ||
| 1768 | } | ||
| 1769 | ts_lexer_delete(&self->lexer); | ||
| 1770 | ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); | ||
| 1771 | ts_subtree_pool_delete(&self->tree_pool); | ||
| 1772 | reusable_node_delete(&self->reusable_node); | ||
| 1773 | array_delete(&self->trailing_extras); | ||
| 1774 | array_delete(&self->trailing_extras2); | ||
| 1775 | array_delete(&self->scratch_trees); | ||
| 1776 | ts_free(self); | ||
| 1777 | } | ||
| 1778 | |||
| 1779 | const TSLanguage *ts_parser_language(const TSParser *self) { | ||
| 1780 | return self->language; | ||
| 1781 | } | ||
| 1782 | |||
| 1783 | bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { | ||
| 1784 | if (language) { | ||
| 1785 | if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false; | ||
| 1786 | if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false; | ||
| 1787 | } | ||
| 1788 | |||
| 1789 | if (self->external_scanner_payload && self->language->external_scanner.destroy) { | ||
| 1790 | self->language->external_scanner.destroy(self->external_scanner_payload); | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | if (language && language->external_scanner.create) { | ||
| 1794 | self->external_scanner_payload = language->external_scanner.create(); | ||
| 1795 | } else { | ||
| 1796 | self->external_scanner_payload = NULL; | ||
| 1797 | } | ||
| 1798 | |||
| 1799 | self->language = language; | ||
| 1800 | ts_parser_reset(self); | ||
| 1801 | return true; | ||
| 1802 | } | ||
| 1803 | |||
| 1804 | TSLogger ts_parser_logger(const TSParser *self) { | ||
| 1805 | return self->lexer.logger; | ||
| 1806 | } | ||
| 1807 | |||
| 1808 | void ts_parser_set_logger(TSParser *self, TSLogger logger) { | ||
| 1809 | self->lexer.logger = logger; | ||
| 1810 | } | ||
| 1811 | |||
| 1812 | void ts_parser_print_dot_graphs(TSParser *self, int fd) { | ||
| 1813 | if (self->dot_graph_file) { | ||
| 1814 | fclose(self->dot_graph_file); | ||
| 1815 | } | ||
| 1816 | |||
| 1817 | if (fd >= 0) { | ||
| 1818 | #ifdef _WIN32 | ||
| 1819 | self->dot_graph_file = _fdopen(fd, "a"); | ||
| 1820 | #else | ||
| 1821 | self->dot_graph_file = fdopen(fd, "a"); | ||
| 1822 | #endif | ||
| 1823 | } else { | ||
| 1824 | self->dot_graph_file = NULL; | ||
| 1825 | } | ||
| 1826 | } | ||
| 1827 | |||
| 1828 | const size_t *ts_parser_cancellation_flag(const TSParser *self) { | ||
| 1829 | return (const size_t *)self->cancellation_flag; | ||
| 1830 | } | ||
| 1831 | |||
| 1832 | void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) { | ||
| 1833 | self->cancellation_flag = (const volatile size_t *)flag; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | uint64_t ts_parser_timeout_micros(const TSParser *self) { | ||
| 1837 | return duration_to_micros(self->timeout_duration); | ||
| 1838 | } | ||
| 1839 | |||
| 1840 | void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) { | ||
| 1841 | self->timeout_duration = duration_from_micros(timeout_micros); | ||
| 1842 | } | ||
| 1843 | |||
| 1844 | bool ts_parser_set_included_ranges( | ||
| 1845 | TSParser *self, | ||
| 1846 | const TSRange *ranges, | ||
| 1847 | uint32_t count | ||
| 1848 | ) { | ||
| 1849 | return ts_lexer_set_included_ranges(&self->lexer, ranges, count); | ||
| 1850 | } | ||
| 1851 | |||
| 1852 | const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) { | ||
| 1853 | return ts_lexer_included_ranges(&self->lexer, count); | ||
| 1854 | } | ||
| 1855 | |||
| 1856 | void ts_parser_reset(TSParser *self) { | ||
| 1857 | if (self->language && self->language->external_scanner.deserialize) { | ||
| 1858 | self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0); | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | if (self->old_tree.ptr) { | ||
| 1862 | ts_subtree_release(&self->tree_pool, self->old_tree); | ||
| 1863 | self->old_tree = NULL_SUBTREE; | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | reusable_node_clear(&self->reusable_node); | ||
| 1867 | ts_lexer_reset(&self->lexer, length_zero()); | ||
| 1868 | ts_stack_clear(self->stack); | ||
| 1869 | ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); | ||
| 1870 | if (self->finished_tree.ptr) { | ||
| 1871 | ts_subtree_release(&self->tree_pool, self->finished_tree); | ||
| 1872 | self->finished_tree = NULL_SUBTREE; | ||
| 1873 | } | ||
| 1874 | self->accept_count = 0; | ||
| 1875 | } | ||
| 1876 | |||
| 1877 | TSTree *ts_parser_parse( | ||
| 1878 | TSParser *self, | ||
| 1879 | const TSTree *old_tree, | ||
| 1880 | TSInput input | ||
| 1881 | ) { | ||
| 1882 | if (!self->language || !input.read) return NULL; | ||
| 1883 | |||
| 1884 | ts_lexer_set_input(&self->lexer, input); | ||
| 1885 | |||
| 1886 | array_clear(&self->included_range_differences); | ||
| 1887 | self->included_range_difference_index = 0; | ||
| 1888 | |||
| 1889 | if (ts_parser_has_outstanding_parse(self)) { | ||
| 1890 | LOG("resume_parsing"); | ||
| 1891 | } else if (old_tree) { | ||
| 1892 | ts_subtree_retain(old_tree->root); | ||
| 1893 | self->old_tree = old_tree->root; | ||
| 1894 | ts_range_array_get_changed_ranges( | ||
| 1895 | old_tree->included_ranges, old_tree->included_range_count, | ||
| 1896 | self->lexer.included_ranges, self->lexer.included_range_count, | ||
| 1897 | &self->included_range_differences | ||
| 1898 | ); | ||
| 1899 | reusable_node_reset(&self->reusable_node, old_tree->root); | ||
| 1900 | LOG("parse_after_edit"); | ||
| 1901 | LOG_TREE(self->old_tree); | ||
| 1902 | for (unsigned i = 0; i < self->included_range_differences.size; i++) { | ||
| 1903 | TSRange *range = &self->included_range_differences.contents[i]; | ||
| 1904 | LOG("different_included_range %u - %u", range->start_byte, range->end_byte); | ||
| 1905 | } | ||
| 1906 | } else { | ||
| 1907 | reusable_node_clear(&self->reusable_node); | ||
| 1908 | LOG("new_parse"); | ||
| 1909 | } | ||
| 1910 | |||
| 1911 | self->operation_count = 0; | ||
| 1912 | if (self->timeout_duration) { | ||
| 1913 | self->end_clock = clock_after(clock_now(), self->timeout_duration); | ||
| 1914 | } else { | ||
| 1915 | self->end_clock = clock_null(); | ||
| 1916 | } | ||
| 1917 | |||
| 1918 | uint32_t position = 0, last_position = 0, version_count = 0; | ||
| 1919 | do { | ||
| 1920 | for ( | ||
| 1921 | StackVersion version = 0; | ||
| 1922 | version_count = ts_stack_version_count(self->stack), | ||
| 1923 | version < version_count; | ||
| 1924 | version++ | ||
| 1925 | ) { | ||
| 1926 | bool allow_node_reuse = version_count == 1; | ||
| 1927 | while (ts_stack_is_active(self->stack, version)) { | ||
| 1928 | LOG( | ||
| 1929 | "process version:%d, version_count:%u, state:%d, row:%u, col:%u", | ||
| 1930 | version, | ||
| 1931 | ts_stack_version_count(self->stack), | ||
| 1932 | ts_stack_state(self->stack, version), | ||
| 1933 | ts_stack_position(self->stack, version).extent.row, | ||
| 1934 | ts_stack_position(self->stack, version).extent.column | ||
| 1935 | ); | ||
| 1936 | |||
| 1937 | if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL; | ||
| 1938 | LOG_STACK(); | ||
| 1939 | |||
| 1940 | position = ts_stack_position(self->stack, version).bytes; | ||
| 1941 | if (position > last_position || (version > 0 && position == last_position)) { | ||
| 1942 | last_position = position; | ||
| 1943 | break; | ||
| 1944 | } | ||
| 1945 | } | ||
| 1946 | } | ||
| 1947 | |||
| 1948 | // After advancing each version of the stack, re-sort the versions by their cost, | ||
| 1949 | // removing any versions that are no longer worth pursuing. | ||
| 1950 | unsigned min_error_cost = ts_parser__condense_stack(self); | ||
| 1951 | |||
| 1952 | // If there's already a finished parse tree that's better than any in-progress version, | ||
| 1953 | // then terminate parsing. Clear the parse stack to remove any extra references to subtrees | ||
| 1954 | // within the finished tree, ensuring that these subtrees can be safely mutated in-place | ||
| 1955 | // for rebalancing. | ||
| 1956 | if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) { | ||
| 1957 | ts_stack_clear(self->stack); | ||
| 1958 | break; | ||
| 1959 | } | ||
| 1960 | |||
| 1961 | while (self->included_range_difference_index < self->included_range_differences.size) { | ||
| 1962 | TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index]; | ||
| 1963 | if (range->end_byte <= position) { | ||
| 1964 | self->included_range_difference_index++; | ||
| 1965 | } else { | ||
| 1966 | break; | ||
| 1967 | } | ||
| 1968 | } | ||
| 1969 | } while (version_count != 0); | ||
| 1970 | |||
| 1971 | assert(self->finished_tree.ptr); | ||
| 1972 | ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language); | ||
| 1973 | LOG("done"); | ||
| 1974 | LOG_TREE(self->finished_tree); | ||
| 1975 | |||
| 1976 | TSTree *result = ts_tree_new( | ||
| 1977 | self->finished_tree, | ||
| 1978 | self->language, | ||
| 1979 | self->lexer.included_ranges, | ||
| 1980 | self->lexer.included_range_count | ||
| 1981 | ); | ||
| 1982 | self->finished_tree = NULL_SUBTREE; | ||
| 1983 | ts_parser_reset(self); | ||
| 1984 | return result; | ||
| 1985 | } | ||
| 1986 | |||
| 1987 | TSTree *ts_parser_parse_string( | ||
| 1988 | TSParser *self, | ||
| 1989 | const TSTree *old_tree, | ||
| 1990 | const char *string, | ||
| 1991 | uint32_t length | ||
| 1992 | ) { | ||
| 1993 | return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8); | ||
| 1994 | } | ||
| 1995 | |||
| 1996 | TSTree *ts_parser_parse_string_encoding( | ||
| 1997 | TSParser *self, | ||
| 1998 | const TSTree *old_tree, | ||
| 1999 | const char *string, | ||
| 2000 | uint32_t length, | ||
| 2001 | TSInputEncoding encoding | ||
| 2002 | ) { | ||
| 2003 | TSStringInput input = {string, length}; | ||
| 2004 | return ts_parser_parse(self, old_tree, (TSInput) { | ||
| 2005 | &input, | ||
| 2006 | ts_string_input_read, | ||
| 2007 | encoding, | ||
| 2008 | }); | ||
| 2009 | } | ||
| 2010 | |||
| 2011 | #undef LOG | ||
diff --git a/vendor/tree-sitter/lib/src/point.h b/vendor/tree-sitter/lib/src/point.h new file mode 100644 index 0000000..37346c8 --- /dev/null +++ b/vendor/tree-sitter/lib/src/point.h | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | #ifndef TREE_SITTER_POINT_H_ | ||
| 2 | #define TREE_SITTER_POINT_H_ | ||
| 3 | |||
| 4 | #include "tree_sitter/api.h" | ||
| 5 | |||
| 6 | #define POINT_ZERO ((TSPoint) {0, 0}) | ||
| 7 | #define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX}) | ||
| 8 | |||
| 9 | static inline TSPoint point__new(unsigned row, unsigned column) { | ||
| 10 | TSPoint result = {row, column}; | ||
| 11 | return result; | ||
| 12 | } | ||
| 13 | |||
| 14 | static inline TSPoint point_add(TSPoint a, TSPoint b) { | ||
| 15 | if (b.row > 0) | ||
| 16 | return point__new(a.row + b.row, b.column); | ||
| 17 | else | ||
| 18 | return point__new(a.row, a.column + b.column); | ||
| 19 | } | ||
| 20 | |||
| 21 | static inline TSPoint point_sub(TSPoint a, TSPoint b) { | ||
| 22 | if (a.row > b.row) | ||
| 23 | return point__new(a.row - b.row, a.column); | ||
| 24 | else | ||
| 25 | return point__new(0, a.column - b.column); | ||
| 26 | } | ||
| 27 | |||
| 28 | static inline bool point_lte(TSPoint a, TSPoint b) { | ||
| 29 | return (a.row < b.row) || (a.row == b.row && a.column <= b.column); | ||
| 30 | } | ||
| 31 | |||
| 32 | static inline bool point_lt(TSPoint a, TSPoint b) { | ||
| 33 | return (a.row < b.row) || (a.row == b.row && a.column < b.column); | ||
| 34 | } | ||
| 35 | |||
| 36 | static inline bool point_gt(TSPoint a, TSPoint b) { | ||
| 37 | return (a.row > b.row) || (a.row == b.row && a.column > b.column); | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline bool point_gte(TSPoint a, TSPoint b) { | ||
| 41 | return (a.row > b.row) || (a.row == b.row && a.column >= b.column); | ||
| 42 | } | ||
| 43 | |||
| 44 | static inline bool point_eq(TSPoint a, TSPoint b) { | ||
| 45 | return a.row == b.row && a.column == b.column; | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline TSPoint point_min(TSPoint a, TSPoint b) { | ||
| 49 | if (a.row < b.row || (a.row == b.row && a.column < b.column)) | ||
| 50 | return a; | ||
| 51 | else | ||
| 52 | return b; | ||
| 53 | } | ||
| 54 | |||
| 55 | static inline TSPoint point_max(TSPoint a, TSPoint b) { | ||
| 56 | if (a.row > b.row || (a.row == b.row && a.column > b.column)) | ||
| 57 | return a; | ||
| 58 | else | ||
| 59 | return b; | ||
| 60 | } | ||
| 61 | |||
| 62 | #endif | ||
diff --git a/vendor/tree-sitter/lib/src/query.c b/vendor/tree-sitter/lib/src/query.c new file mode 100644 index 0000000..4e623ae --- /dev/null +++ b/vendor/tree-sitter/lib/src/query.c | |||
| @@ -0,0 +1,4130 @@ | |||
| 1 | #include "tree_sitter/api.h" | ||
| 2 | #include "./alloc.h" | ||
| 3 | #include "./array.h" | ||
| 4 | #include "./language.h" | ||
| 5 | #include "./point.h" | ||
| 6 | #include "./tree_cursor.h" | ||
| 7 | #include "./unicode.h" | ||
| 8 | #include <wctype.h> | ||
| 9 | |||
| 10 | // #define DEBUG_ANALYZE_QUERY | ||
| 11 | // #define DEBUG_EXECUTE_QUERY | ||
| 12 | |||
| 13 | #define MAX_STEP_CAPTURE_COUNT 3 | ||
| 14 | #define MAX_NEGATED_FIELD_COUNT 8 | ||
| 15 | #define MAX_STATE_PREDECESSOR_COUNT 256 | ||
| 16 | #define MAX_ANALYSIS_STATE_DEPTH 8 | ||
| 17 | #define MAX_ANALYSIS_ITERATION_COUNT 256 | ||
| 18 | |||
| 19 | /* | ||
| 20 | * Stream - A sequence of unicode characters derived from a UTF8 string. | ||
| 21 | * This struct is used in parsing queries from S-expressions. | ||
| 22 | */ | ||
| 23 | typedef struct { | ||
| 24 | const char *input; | ||
| 25 | const char *start; | ||
| 26 | const char *end; | ||
| 27 | int32_t next; | ||
| 28 | uint8_t next_size; | ||
| 29 | } Stream; | ||
| 30 | |||
| 31 | /* | ||
| 32 | * QueryStep - A step in the process of matching a query. Each node within | ||
| 33 | * a query S-expression corresponds to one of these steps. An entire pattern | ||
| 34 | * is represented as a sequence of these steps. The basic properties of a | ||
| 35 | * node are represented by these fields: | ||
| 36 | * - `symbol` - The grammar symbol to match. A zero value represents the | ||
| 37 | * wildcard symbol, '_'. | ||
| 38 | * - `field` - The field name to match. A zero value means that a field name | ||
| 39 | * was not specified. | ||
| 40 | * - `capture_ids` - An array of integers representing the names of captures | ||
| 41 | * associated with this node in the pattern, terminated by a `NONE` value. | ||
| 42 | * - `depth` - The depth where this node occurs in the pattern. The root node | ||
| 43 | * of the pattern has depth zero. | ||
| 44 | * - `negated_field_list_id` - An id representing a set of fields that must | ||
| 45 | * that must not be present on a node matching this step. | ||
| 46 | * | ||
| 47 | * Steps have some additional fields in order to handle the `.` (or "anchor") operator, | ||
| 48 | * which forbids additional child nodes: | ||
| 49 | * - `is_immediate` - Indicates that the node matching this step cannot be preceded | ||
| 50 | * by other sibling nodes that weren't specified in the pattern. | ||
| 51 | * - `is_last_child` - Indicates that the node matching this step cannot have any | ||
| 52 | * subsequent named siblings. | ||
| 53 | * | ||
| 54 | * For simple patterns, steps are matched in sequential order. But in order to | ||
| 55 | * handle alternative/repeated/optional sub-patterns, query steps are not always | ||
| 56 | * structured as a linear sequence; they sometimes need to split and merge. This | ||
| 57 | * is done using the following fields: | ||
| 58 | * - `alternative_index` - The index of a different query step that serves as | ||
| 59 | * an alternative to this step. A `NONE` value represents no alternative. | ||
| 60 | * When a query state reaches a step with an alternative index, the state | ||
| 61 | * is duplicated, with one copy remaining at the original step, and one copy | ||
| 62 | * moving to the alternative step. The alternative may have its own alternative | ||
| 63 | * step, so this splitting is an iterative process. | ||
| 64 | * - `is_dead_end` - Indicates that this state cannot be passed directly, and | ||
| 65 | * exists only in order to redirect to an alternative index, with no splitting. | ||
| 66 | * - `is_pass_through` - Indicates that state has no matching logic of its own, | ||
| 67 | * and exists only to split a state. One copy of the state advances immediately | ||
| 68 | * to the next step, and one moves to the alternative step. | ||
| 69 | * - `alternative_is_immediate` - Indicates that this step's alternative step | ||
| 70 | * should be treated as if `is_immediate` is true. | ||
| 71 | * | ||
| 72 | * Steps also store some derived state that summarizes how they relate to other | ||
| 73 | * steps within the same pattern. This is used to optimize the matching process: | ||
| 74 | * - `contains_captures` - Indicates that this step or one of its child steps | ||
| 75 | * has a non-empty `capture_ids` list. | ||
| 76 | * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then | ||
| 77 | * it and all of its subsequent sibling steps within the same parent pattern | ||
| 78 | * are guaranteed to match. | ||
| 79 | * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but | ||
| 80 | * for the entire top-level pattern. When iterating through a query's | ||
| 81 | * captures using `ts_query_cursor_next_capture`, this field is used to | ||
| 82 | * detect that a capture can safely be returned from a match that has not | ||
| 83 | * even completed yet. | ||
| 84 | */ | ||
| 85 | typedef struct { | ||
| 86 | TSSymbol symbol; | ||
| 87 | TSSymbol supertype_symbol; | ||
| 88 | TSFieldId field; | ||
| 89 | uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; | ||
| 90 | uint16_t depth; | ||
| 91 | uint16_t alternative_index; | ||
| 92 | uint16_t negated_field_list_id; | ||
| 93 | bool is_named: 1; | ||
| 94 | bool is_immediate: 1; | ||
| 95 | bool is_last_child: 1; | ||
| 96 | bool is_pass_through: 1; | ||
| 97 | bool is_dead_end: 1; | ||
| 98 | bool alternative_is_immediate: 1; | ||
| 99 | bool contains_captures: 1; | ||
| 100 | bool root_pattern_guaranteed: 1; | ||
| 101 | bool parent_pattern_guaranteed: 1; | ||
| 102 | } QueryStep; | ||
| 103 | |||
| 104 | /* | ||
| 105 | * Slice - A slice of an external array. Within a query, capture names, | ||
| 106 | * literal string values, and predicate step information are stored in three | ||
| 107 | * contiguous arrays. Individual captures, string values, and predicates are | ||
| 108 | * represented as slices of these three arrays. | ||
| 109 | */ | ||
| 110 | typedef struct { | ||
| 111 | uint32_t offset; | ||
| 112 | uint32_t length; | ||
| 113 | } Slice; | ||
| 114 | |||
| 115 | /* | ||
| 116 | * SymbolTable - a two-way mapping of strings to ids. | ||
| 117 | */ | ||
| 118 | typedef struct { | ||
| 119 | Array(char) characters; | ||
| 120 | Array(Slice) slices; | ||
| 121 | } SymbolTable; | ||
| 122 | |||
| 123 | /** | ||
| 124 | * CaptureQuantififers - a data structure holding the quantifiers of pattern captures. | ||
| 125 | */ | ||
| 126 | typedef Array(uint8_t) CaptureQuantifiers; | ||
| 127 | |||
| 128 | /* | ||
| 129 | * PatternEntry - Information about the starting point for matching a particular | ||
| 130 | * pattern. These entries are stored in a 'pattern map' - a sorted array that | ||
| 131 | * makes it possible to efficiently lookup patterns based on the symbol for their | ||
| 132 | * first step. The entry consists of the following fields: | ||
| 133 | * - `pattern_index` - the index of the pattern within the query | ||
| 134 | * - `step_index` - the index of the pattern's first step in the shared `steps` array | ||
| 135 | * - `is_rooted` - whether or not the pattern has a single root node. This property | ||
| 136 | * affects decisions about whether or not to start the pattern for nodes outside | ||
| 137 | * of a QueryCursor's range restriction. | ||
| 138 | */ | ||
| 139 | typedef struct { | ||
| 140 | uint16_t step_index; | ||
| 141 | uint16_t pattern_index; | ||
| 142 | bool is_rooted; | ||
| 143 | } PatternEntry; | ||
| 144 | |||
| 145 | typedef struct { | ||
| 146 | Slice step; | ||
| 147 | Slice predicate_step; | ||
| 148 | uint32_t start_byte; | ||
| 149 | bool is_non_local; | ||
| 150 | } QueryPattern; | ||
| 151 | |||
| 152 | typedef struct { | ||
| 153 | uint32_t byte_offset; | ||
| 154 | uint16_t step_index; | ||
| 155 | } StepOffset; | ||
| 156 | |||
| 157 | /* | ||
| 158 | * QueryState - The state of an in-progress match of a particular pattern | ||
| 159 | * in a query. While executing, a `TSQueryCursor` must keep track of a number | ||
| 160 | * of possible in-progress matches. Each of those possible matches is | ||
| 161 | * represented as one of these states. Fields: | ||
| 162 | * - `id` - A numeric id that is exposed to the public API. This allows the | ||
| 163 | * caller to remove a given match, preventing any more of its captures | ||
| 164 | * from being returned. | ||
| 165 | * - `start_depth` - The depth in the tree where the first step of the state's | ||
| 166 | * pattern was matched. | ||
| 167 | * - `pattern_index` - The pattern that the state is matching. | ||
| 168 | * - `consumed_capture_count` - The number of captures from this match that | ||
| 169 | * have already been returned. | ||
| 170 | * - `capture_list_id` - A numeric id that can be used to retrieve the state's | ||
| 171 | * list of captures from the `CaptureListPool`. | ||
| 172 | * - `seeking_immediate_match` - A flag that indicates that the state's next | ||
| 173 | * step must be matched by the very next sibling. This is used when | ||
| 174 | * processing repetitions. | ||
| 175 | * - `has_in_progress_alternatives` - A flag that indicates that there is are | ||
| 176 | * other states that have the same captures as this state, but are at | ||
| 177 | * different steps in their pattern. This means that in order to obey the | ||
| 178 | * 'longest-match' rule, this state should not be returned as a match until | ||
| 179 | * it is clear that there can be no other alternative match with more captures. | ||
| 180 | */ | ||
| 181 | typedef struct { | ||
| 182 | uint32_t id; | ||
| 183 | uint32_t capture_list_id; | ||
| 184 | uint16_t start_depth; | ||
| 185 | uint16_t step_index; | ||
| 186 | uint16_t pattern_index; | ||
| 187 | uint16_t consumed_capture_count: 12; | ||
| 188 | bool seeking_immediate_match: 1; | ||
| 189 | bool has_in_progress_alternatives: 1; | ||
| 190 | bool dead: 1; | ||
| 191 | bool needs_parent: 1; | ||
| 192 | } QueryState; | ||
| 193 | |||
| 194 | typedef Array(TSQueryCapture) CaptureList; | ||
| 195 | |||
| 196 | /* | ||
| 197 | * CaptureListPool - A collection of *lists* of captures. Each query state needs | ||
| 198 | * to maintain its own list of captures. To avoid repeated allocations, this struct | ||
| 199 | * maintains a fixed set of capture lists, and keeps track of which ones are | ||
| 200 | * currently in use by a query state. | ||
| 201 | */ | ||
| 202 | typedef struct { | ||
| 203 | Array(CaptureList) list; | ||
| 204 | CaptureList empty_list; | ||
| 205 | // The maximum number of capture lists that we are allowed to allocate. We | ||
| 206 | // never allow `list` to allocate more entries than this, dropping pending | ||
| 207 | // matches if needed to stay under the limit. | ||
| 208 | uint32_t max_capture_list_count; | ||
| 209 | // The number of capture lists allocated in `list` that are not currently in | ||
| 210 | // use. We reuse those existing-but-unused capture lists before trying to | ||
| 211 | // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture | ||
| 212 | // list's length to indicate that it's not in use. | ||
| 213 | uint32_t free_capture_list_count; | ||
| 214 | } CaptureListPool; | ||
| 215 | |||
| 216 | /* | ||
| 217 | * AnalysisState - The state needed for walking the parse table when analyzing | ||
| 218 | * a query pattern, to determine at which steps the pattern might fail to match. | ||
| 219 | */ | ||
| 220 | typedef struct { | ||
| 221 | TSStateId parse_state; | ||
| 222 | TSSymbol parent_symbol; | ||
| 223 | uint16_t child_index; | ||
| 224 | TSFieldId field_id: 15; | ||
| 225 | bool done: 1; | ||
| 226 | } AnalysisStateEntry; | ||
| 227 | |||
| 228 | typedef struct { | ||
| 229 | AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; | ||
| 230 | uint16_t depth; | ||
| 231 | uint16_t step_index; | ||
| 232 | TSSymbol root_symbol; | ||
| 233 | } AnalysisState; | ||
| 234 | |||
| 235 | typedef Array(AnalysisState *) AnalysisStateSet; | ||
| 236 | |||
| 237 | typedef struct { | ||
| 238 | AnalysisStateSet states; | ||
| 239 | AnalysisStateSet next_states; | ||
| 240 | AnalysisStateSet deeper_states; | ||
| 241 | AnalysisStateSet state_pool; | ||
| 242 | Array(uint16_t) final_step_indices; | ||
| 243 | Array(TSSymbol) finished_parent_symbols; | ||
| 244 | bool did_abort; | ||
| 245 | } QueryAnalysis; | ||
| 246 | |||
| 247 | /* | ||
| 248 | * AnalysisSubgraph - A subset of the states in the parse table that are used | ||
| 249 | * in constructing nodes with a certain symbol. Each state is accompanied by | ||
| 250 | * some information about the possible node that could be produced in | ||
| 251 | * downstream states. | ||
| 252 | */ | ||
| 253 | typedef struct { | ||
| 254 | TSStateId state; | ||
| 255 | uint16_t production_id; | ||
| 256 | uint8_t child_index: 7; | ||
| 257 | bool done: 1; | ||
| 258 | } AnalysisSubgraphNode; | ||
| 259 | |||
| 260 | typedef struct { | ||
| 261 | TSSymbol symbol; | ||
| 262 | Array(TSStateId) start_states; | ||
| 263 | Array(AnalysisSubgraphNode) nodes; | ||
| 264 | } AnalysisSubgraph; | ||
| 265 | |||
| 266 | typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; | ||
| 267 | |||
| 268 | /* | ||
| 269 | * StatePredecessorMap - A map that stores the predecessors of each parse state. | ||
| 270 | * This is used during query analysis to determine which parse states can lead | ||
| 271 | * to which reduce actions. | ||
| 272 | */ | ||
| 273 | typedef struct { | ||
| 274 | TSStateId *contents; | ||
| 275 | } StatePredecessorMap; | ||
| 276 | |||
| 277 | /* | ||
| 278 | * TSQuery - A tree query, compiled from a string of S-expressions. The query | ||
| 279 | * itself is immutable. The mutable state used in the process of executing the | ||
| 280 | * query is stored in a `TSQueryCursor`. | ||
| 281 | */ | ||
| 282 | struct TSQuery { | ||
| 283 | SymbolTable captures; | ||
| 284 | SymbolTable predicate_values; | ||
| 285 | Array(CaptureQuantifiers) capture_quantifiers; | ||
| 286 | Array(QueryStep) steps; | ||
| 287 | Array(PatternEntry) pattern_map; | ||
| 288 | Array(TSQueryPredicateStep) predicate_steps; | ||
| 289 | Array(QueryPattern) patterns; | ||
| 290 | Array(StepOffset) step_offsets; | ||
| 291 | Array(TSFieldId) negated_fields; | ||
| 292 | Array(char) string_buffer; | ||
| 293 | Array(TSSymbol) repeat_symbols_with_rootless_patterns; | ||
| 294 | const TSLanguage *language; | ||
| 295 | uint16_t wildcard_root_pattern_count; | ||
| 296 | }; | ||
| 297 | |||
| 298 | /* | ||
| 299 | * TSQueryCursor - A stateful struct used to execute a query on a tree. | ||
| 300 | */ | ||
| 301 | struct TSQueryCursor { | ||
| 302 | const TSQuery *query; | ||
| 303 | TSTreeCursor cursor; | ||
| 304 | Array(QueryState) states; | ||
| 305 | Array(QueryState) finished_states; | ||
| 306 | CaptureListPool capture_list_pool; | ||
| 307 | uint32_t depth; | ||
| 308 | uint32_t max_start_depth; | ||
| 309 | uint32_t start_byte; | ||
| 310 | uint32_t end_byte; | ||
| 311 | TSPoint start_point; | ||
| 312 | TSPoint end_point; | ||
| 313 | uint32_t next_state_id; | ||
| 314 | bool on_visible_node; | ||
| 315 | bool ascending; | ||
| 316 | bool halted; | ||
| 317 | bool did_exceed_match_limit; | ||
| 318 | }; | ||
| 319 | |||
| 320 | static const TSQueryError PARENT_DONE = -1; | ||
| 321 | static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; | ||
| 322 | static const uint16_t NONE = UINT16_MAX; | ||
| 323 | static const TSSymbol WILDCARD_SYMBOL = 0; | ||
| 324 | |||
| 325 | /********** | ||
| 326 | * Stream | ||
| 327 | **********/ | ||
| 328 | |||
| 329 | // Advance to the next unicode code point in the stream. | ||
| 330 | static bool stream_advance(Stream *self) { | ||
| 331 | self->input += self->next_size; | ||
| 332 | if (self->input < self->end) { | ||
| 333 | uint32_t size = ts_decode_utf8( | ||
| 334 | (const uint8_t *)self->input, | ||
| 335 | (uint32_t)(self->end - self->input), | ||
| 336 | &self->next | ||
| 337 | ); | ||
| 338 | if (size > 0) { | ||
| 339 | self->next_size = size; | ||
| 340 | return true; | ||
| 341 | } | ||
| 342 | } else { | ||
| 343 | self->next_size = 0; | ||
| 344 | self->next = '\0'; | ||
| 345 | } | ||
| 346 | return false; | ||
| 347 | } | ||
| 348 | |||
| 349 | // Reset the stream to the given input position, represented as a pointer | ||
| 350 | // into the input string. | ||
| 351 | static void stream_reset(Stream *self, const char *input) { | ||
| 352 | self->input = input; | ||
| 353 | self->next_size = 0; | ||
| 354 | stream_advance(self); | ||
| 355 | } | ||
| 356 | |||
| 357 | static Stream stream_new(const char *string, uint32_t length) { | ||
| 358 | Stream self = { | ||
| 359 | .next = 0, | ||
| 360 | .input = string, | ||
| 361 | .start = string, | ||
| 362 | .end = string + length, | ||
| 363 | }; | ||
| 364 | stream_advance(&self); | ||
| 365 | return self; | ||
| 366 | } | ||
| 367 | |||
| 368 | static void stream_skip_whitespace(Stream *self) { | ||
| 369 | for (;;) { | ||
| 370 | if (iswspace(self->next)) { | ||
| 371 | stream_advance(self); | ||
| 372 | } else if (self->next == ';') { | ||
| 373 | // skip over comments | ||
| 374 | stream_advance(self); | ||
| 375 | while (self->next && self->next != '\n') { | ||
| 376 | if (!stream_advance(self)) break; | ||
| 377 | } | ||
| 378 | } else { | ||
| 379 | break; | ||
| 380 | } | ||
| 381 | } | ||
| 382 | } | ||
| 383 | |||
| 384 | static bool stream_is_ident_start(Stream *self) { | ||
| 385 | return iswalnum(self->next) || self->next == '_' || self->next == '-'; | ||
| 386 | } | ||
| 387 | |||
| 388 | static void stream_scan_identifier(Stream *stream) { | ||
| 389 | do { | ||
| 390 | stream_advance(stream); | ||
| 391 | } while ( | ||
| 392 | iswalnum(stream->next) || | ||
| 393 | stream->next == '_' || | ||
| 394 | stream->next == '-' || | ||
| 395 | stream->next == '.' || | ||
| 396 | stream->next == '?' || | ||
| 397 | stream->next == '!' | ||
| 398 | ); | ||
| 399 | } | ||
| 400 | |||
| 401 | static uint32_t stream_offset(Stream *self) { | ||
| 402 | return (uint32_t)(self->input - self->start); | ||
| 403 | } | ||
| 404 | |||
| 405 | /****************** | ||
| 406 | * CaptureListPool | ||
| 407 | ******************/ | ||
| 408 | |||
| 409 | static CaptureListPool capture_list_pool_new(void) { | ||
| 410 | return (CaptureListPool) { | ||
| 411 | .list = array_new(), | ||
| 412 | .empty_list = array_new(), | ||
| 413 | .max_capture_list_count = UINT32_MAX, | ||
| 414 | .free_capture_list_count = 0, | ||
| 415 | }; | ||
| 416 | } | ||
| 417 | |||
| 418 | static void capture_list_pool_reset(CaptureListPool *self) { | ||
| 419 | for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { | ||
| 420 | // This invalid size means that the list is not in use. | ||
| 421 | self->list.contents[i].size = UINT32_MAX; | ||
| 422 | } | ||
| 423 | self->free_capture_list_count = self->list.size; | ||
| 424 | } | ||
| 425 | |||
| 426 | static void capture_list_pool_delete(CaptureListPool *self) { | ||
| 427 | for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { | ||
| 428 | array_delete(&self->list.contents[i]); | ||
| 429 | } | ||
| 430 | array_delete(&self->list); | ||
| 431 | } | ||
| 432 | |||
| 433 | static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { | ||
| 434 | if (id >= self->list.size) return &self->empty_list; | ||
| 435 | return &self->list.contents[id]; | ||
| 436 | } | ||
| 437 | |||
| 438 | static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { | ||
| 439 | assert(id < self->list.size); | ||
| 440 | return &self->list.contents[id]; | ||
| 441 | } | ||
| 442 | |||
| 443 | static bool capture_list_pool_is_empty(const CaptureListPool *self) { | ||
| 444 | // The capture list pool is empty if all allocated lists are in use, and we | ||
| 445 | // have reached the maximum allowed number of allocated lists. | ||
| 446 | return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; | ||
| 447 | } | ||
| 448 | |||
| 449 | static uint16_t capture_list_pool_acquire(CaptureListPool *self) { | ||
| 450 | // First see if any already allocated capture list is currently unused. | ||
| 451 | if (self->free_capture_list_count > 0) { | ||
| 452 | for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { | ||
| 453 | if (self->list.contents[i].size == UINT32_MAX) { | ||
| 454 | array_clear(&self->list.contents[i]); | ||
| 455 | self->free_capture_list_count--; | ||
| 456 | return i; | ||
| 457 | } | ||
| 458 | } | ||
| 459 | } | ||
| 460 | |||
| 461 | // Otherwise allocate and initialize a new capture list, as long as that | ||
| 462 | // doesn't put us over the requested maximum. | ||
| 463 | uint32_t i = self->list.size; | ||
| 464 | if (i >= self->max_capture_list_count) { | ||
| 465 | return NONE; | ||
| 466 | } | ||
| 467 | CaptureList list; | ||
| 468 | array_init(&list); | ||
| 469 | array_push(&self->list, list); | ||
| 470 | return i; | ||
| 471 | } | ||
| 472 | |||
| 473 | static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { | ||
| 474 | if (id >= self->list.size) return; | ||
| 475 | self->list.contents[id].size = UINT32_MAX; | ||
| 476 | self->free_capture_list_count++; | ||
| 477 | } | ||
| 478 | |||
| 479 | /************** | ||
| 480 | * Quantifiers | ||
| 481 | **************/ | ||
| 482 | |||
| 483 | static TSQuantifier quantifier_mul( | ||
| 484 | TSQuantifier left, | ||
| 485 | TSQuantifier right | ||
| 486 | ) { | ||
| 487 | switch (left) | ||
| 488 | { | ||
| 489 | case TSQuantifierZero: | ||
| 490 | return TSQuantifierZero; | ||
| 491 | case TSQuantifierZeroOrOne: | ||
| 492 | switch (right) { | ||
| 493 | case TSQuantifierZero: | ||
| 494 | return TSQuantifierZero; | ||
| 495 | case TSQuantifierZeroOrOne: | ||
| 496 | case TSQuantifierOne: | ||
| 497 | return TSQuantifierZeroOrOne; | ||
| 498 | case TSQuantifierZeroOrMore: | ||
| 499 | case TSQuantifierOneOrMore: | ||
| 500 | return TSQuantifierZeroOrMore; | ||
| 501 | }; | ||
| 502 | break; | ||
| 503 | case TSQuantifierZeroOrMore: | ||
| 504 | switch (right) { | ||
| 505 | case TSQuantifierZero: | ||
| 506 | return TSQuantifierZero; | ||
| 507 | case TSQuantifierZeroOrOne: | ||
| 508 | case TSQuantifierZeroOrMore: | ||
| 509 | case TSQuantifierOne: | ||
| 510 | case TSQuantifierOneOrMore: | ||
| 511 | return TSQuantifierZeroOrMore; | ||
| 512 | }; | ||
| 513 | break; | ||
| 514 | case TSQuantifierOne: | ||
| 515 | return right; | ||
| 516 | case TSQuantifierOneOrMore: | ||
| 517 | switch (right) { | ||
| 518 | case TSQuantifierZero: | ||
| 519 | return TSQuantifierZero; | ||
| 520 | case TSQuantifierZeroOrOne: | ||
| 521 | case TSQuantifierZeroOrMore: | ||
| 522 | return TSQuantifierZeroOrMore; | ||
| 523 | case TSQuantifierOne: | ||
| 524 | case TSQuantifierOneOrMore: | ||
| 525 | return TSQuantifierOneOrMore; | ||
| 526 | }; | ||
| 527 | break; | ||
| 528 | } | ||
| 529 | return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! | ||
| 530 | } | ||
| 531 | |||
| 532 | static TSQuantifier quantifier_join( | ||
| 533 | TSQuantifier left, | ||
| 534 | TSQuantifier right | ||
| 535 | ) { | ||
| 536 | switch (left) | ||
| 537 | { | ||
| 538 | case TSQuantifierZero: | ||
| 539 | switch (right) { | ||
| 540 | case TSQuantifierZero: | ||
| 541 | return TSQuantifierZero; | ||
| 542 | case TSQuantifierZeroOrOne: | ||
| 543 | case TSQuantifierOne: | ||
| 544 | return TSQuantifierZeroOrOne; | ||
| 545 | case TSQuantifierZeroOrMore: | ||
| 546 | case TSQuantifierOneOrMore: | ||
| 547 | return TSQuantifierZeroOrMore; | ||
| 548 | }; | ||
| 549 | break; | ||
| 550 | case TSQuantifierZeroOrOne: | ||
| 551 | switch (right) { | ||
| 552 | case TSQuantifierZero: | ||
| 553 | case TSQuantifierZeroOrOne: | ||
| 554 | case TSQuantifierOne: | ||
| 555 | return TSQuantifierZeroOrOne; | ||
| 556 | break; | ||
| 557 | case TSQuantifierZeroOrMore: | ||
| 558 | case TSQuantifierOneOrMore: | ||
| 559 | return TSQuantifierZeroOrMore; | ||
| 560 | break; | ||
| 561 | }; | ||
| 562 | break; | ||
| 563 | case TSQuantifierZeroOrMore: | ||
| 564 | return TSQuantifierZeroOrMore; | ||
| 565 | case TSQuantifierOne: | ||
| 566 | switch (right) { | ||
| 567 | case TSQuantifierZero: | ||
| 568 | case TSQuantifierZeroOrOne: | ||
| 569 | return TSQuantifierZeroOrOne; | ||
| 570 | case TSQuantifierZeroOrMore: | ||
| 571 | return TSQuantifierZeroOrMore; | ||
| 572 | case TSQuantifierOne: | ||
| 573 | return TSQuantifierOne; | ||
| 574 | case TSQuantifierOneOrMore: | ||
| 575 | return TSQuantifierOneOrMore; | ||
| 576 | }; | ||
| 577 | break; | ||
| 578 | case TSQuantifierOneOrMore: | ||
| 579 | switch (right) { | ||
| 580 | case TSQuantifierZero: | ||
| 581 | case TSQuantifierZeroOrOne: | ||
| 582 | case TSQuantifierZeroOrMore: | ||
| 583 | return TSQuantifierZeroOrMore; | ||
| 584 | case TSQuantifierOne: | ||
| 585 | case TSQuantifierOneOrMore: | ||
| 586 | return TSQuantifierOneOrMore; | ||
| 587 | }; | ||
| 588 | break; | ||
| 589 | } | ||
| 590 | return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! | ||
| 591 | } | ||
| 592 | |||
| 593 | static TSQuantifier quantifier_add( | ||
| 594 | TSQuantifier left, | ||
| 595 | TSQuantifier right | ||
| 596 | ) { | ||
| 597 | switch (left) | ||
| 598 | { | ||
| 599 | case TSQuantifierZero: | ||
| 600 | return right; | ||
| 601 | case TSQuantifierZeroOrOne: | ||
| 602 | switch (right) { | ||
| 603 | case TSQuantifierZero: | ||
| 604 | return TSQuantifierZeroOrOne; | ||
| 605 | case TSQuantifierZeroOrOne: | ||
| 606 | case TSQuantifierZeroOrMore: | ||
| 607 | return TSQuantifierZeroOrMore; | ||
| 608 | case TSQuantifierOne: | ||
| 609 | case TSQuantifierOneOrMore: | ||
| 610 | return TSQuantifierOneOrMore; | ||
| 611 | }; | ||
| 612 | break; | ||
| 613 | case TSQuantifierZeroOrMore: | ||
| 614 | switch (right) { | ||
| 615 | case TSQuantifierZero: | ||
| 616 | return TSQuantifierZeroOrMore; | ||
| 617 | case TSQuantifierZeroOrOne: | ||
| 618 | case TSQuantifierZeroOrMore: | ||
| 619 | return TSQuantifierZeroOrMore; | ||
| 620 | case TSQuantifierOne: | ||
| 621 | case TSQuantifierOneOrMore: | ||
| 622 | return TSQuantifierOneOrMore; | ||
| 623 | }; | ||
| 624 | break; | ||
| 625 | case TSQuantifierOne: | ||
| 626 | switch (right) { | ||
| 627 | case TSQuantifierZero: | ||
| 628 | return TSQuantifierOne; | ||
| 629 | case TSQuantifierZeroOrOne: | ||
| 630 | case TSQuantifierZeroOrMore: | ||
| 631 | case TSQuantifierOne: | ||
| 632 | case TSQuantifierOneOrMore: | ||
| 633 | return TSQuantifierOneOrMore; | ||
| 634 | }; | ||
| 635 | break; | ||
| 636 | case TSQuantifierOneOrMore: | ||
| 637 | return TSQuantifierOneOrMore; | ||
| 638 | } | ||
| 639 | return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! | ||
| 640 | } | ||
| 641 | |||
| 642 | // Create new capture quantifiers structure | ||
| 643 | static CaptureQuantifiers capture_quantifiers_new(void) { | ||
| 644 | return (CaptureQuantifiers) array_new(); | ||
| 645 | } | ||
| 646 | |||
| 647 | // Delete capture quantifiers structure | ||
| 648 | static void capture_quantifiers_delete( | ||
| 649 | CaptureQuantifiers *self | ||
| 650 | ) { | ||
| 651 | array_delete(self); | ||
| 652 | } | ||
| 653 | |||
| 654 | // Clear capture quantifiers structure | ||
| 655 | static void capture_quantifiers_clear( | ||
| 656 | CaptureQuantifiers *self | ||
| 657 | ) { | ||
| 658 | array_clear(self); | ||
| 659 | } | ||
| 660 | |||
| 661 | // Replace capture quantifiers with the given quantifiers | ||
| 662 | static void capture_quantifiers_replace( | ||
| 663 | CaptureQuantifiers *self, | ||
| 664 | CaptureQuantifiers *quantifiers | ||
| 665 | ) { | ||
| 666 | array_clear(self); | ||
| 667 | array_push_all(self, quantifiers); | ||
| 668 | } | ||
| 669 | |||
| 670 | // Return capture quantifier for the given capture id | ||
| 671 | static TSQuantifier capture_quantifier_for_id( | ||
| 672 | const CaptureQuantifiers *self, | ||
| 673 | uint16_t id | ||
| 674 | ) { | ||
| 675 | return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id); | ||
| 676 | } | ||
| 677 | |||
| 678 | // Add the given quantifier to the current value for id | ||
| 679 | static void capture_quantifiers_add_for_id( | ||
| 680 | CaptureQuantifiers *self, | ||
| 681 | uint16_t id, | ||
| 682 | TSQuantifier quantifier | ||
| 683 | ) { | ||
| 684 | if (self->size <= id) { | ||
| 685 | array_grow_by(self, id + 1 - self->size); | ||
| 686 | } | ||
| 687 | uint8_t *own_quantifier = array_get(self, id); | ||
| 688 | *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier); | ||
| 689 | } | ||
| 690 | |||
| 691 | // Point-wise add the given quantifiers to the current values | ||
| 692 | static void capture_quantifiers_add_all( | ||
| 693 | CaptureQuantifiers *self, | ||
| 694 | CaptureQuantifiers *quantifiers | ||
| 695 | ) { | ||
| 696 | if (self->size < quantifiers->size) { | ||
| 697 | array_grow_by(self, quantifiers->size - self->size); | ||
| 698 | } | ||
| 699 | for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) { | ||
| 700 | uint8_t *quantifier = array_get(quantifiers, id); | ||
| 701 | uint8_t *own_quantifier = array_get(self, id); | ||
| 702 | *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); | ||
| 703 | } | ||
| 704 | } | ||
| 705 | |||
| 706 | // Join the given quantifier with the current values | ||
| 707 | static void capture_quantifiers_mul( | ||
| 708 | CaptureQuantifiers *self, | ||
| 709 | TSQuantifier quantifier | ||
| 710 | ) { | ||
| 711 | for (uint16_t id = 0; id < (uint16_t)self->size; id++) { | ||
| 712 | uint8_t *own_quantifier = array_get(self, id); | ||
| 713 | *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier); | ||
| 714 | } | ||
| 715 | } | ||
| 716 | |||
| 717 | // Point-wise join the quantifiers from a list of alternatives with the current values | ||
| 718 | static void capture_quantifiers_join_all( | ||
| 719 | CaptureQuantifiers *self, | ||
| 720 | CaptureQuantifiers *quantifiers | ||
| 721 | ) { | ||
| 722 | if (self->size < quantifiers->size) { | ||
| 723 | array_grow_by(self, quantifiers->size - self->size); | ||
| 724 | } | ||
| 725 | for (uint32_t id = 0; id < quantifiers->size; id++) { | ||
| 726 | uint8_t *quantifier = array_get(quantifiers, id); | ||
| 727 | uint8_t *own_quantifier = array_get(self, id); | ||
| 728 | *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); | ||
| 729 | } | ||
| 730 | for (uint32_t id = quantifiers->size; id < self->size; id++) { | ||
| 731 | uint8_t *own_quantifier = array_get(self, id); | ||
| 732 | *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero); | ||
| 733 | } | ||
| 734 | } | ||
| 735 | |||
| 736 | /************** | ||
| 737 | * SymbolTable | ||
| 738 | **************/ | ||
| 739 | |||
| 740 | static SymbolTable symbol_table_new(void) { | ||
| 741 | return (SymbolTable) { | ||
| 742 | .characters = array_new(), | ||
| 743 | .slices = array_new(), | ||
| 744 | }; | ||
| 745 | } | ||
| 746 | |||
| 747 | static void symbol_table_delete(SymbolTable *self) { | ||
| 748 | array_delete(&self->characters); | ||
| 749 | array_delete(&self->slices); | ||
| 750 | } | ||
| 751 | |||
| 752 | static int symbol_table_id_for_name( | ||
| 753 | const SymbolTable *self, | ||
| 754 | const char *name, | ||
| 755 | uint32_t length | ||
| 756 | ) { | ||
| 757 | for (unsigned i = 0; i < self->slices.size; i++) { | ||
| 758 | Slice slice = self->slices.contents[i]; | ||
| 759 | if ( | ||
| 760 | slice.length == length && | ||
| 761 | !strncmp(&self->characters.contents[slice.offset], name, length) | ||
| 762 | ) return i; | ||
| 763 | } | ||
| 764 | return -1; | ||
| 765 | } | ||
| 766 | |||
| 767 | static const char *symbol_table_name_for_id( | ||
| 768 | const SymbolTable *self, | ||
| 769 | uint16_t id, | ||
| 770 | uint32_t *length | ||
| 771 | ) { | ||
| 772 | Slice slice = self->slices.contents[id]; | ||
| 773 | *length = slice.length; | ||
| 774 | return &self->characters.contents[slice.offset]; | ||
| 775 | } | ||
| 776 | |||
| 777 | static uint16_t symbol_table_insert_name( | ||
| 778 | SymbolTable *self, | ||
| 779 | const char *name, | ||
| 780 | uint32_t length | ||
| 781 | ) { | ||
| 782 | int id = symbol_table_id_for_name(self, name, length); | ||
| 783 | if (id >= 0) return (uint16_t)id; | ||
| 784 | Slice slice = { | ||
| 785 | .offset = self->characters.size, | ||
| 786 | .length = length, | ||
| 787 | }; | ||
| 788 | array_grow_by(&self->characters, length + 1); | ||
| 789 | memcpy(&self->characters.contents[slice.offset], name, length); | ||
| 790 | self->characters.contents[self->characters.size - 1] = 0; | ||
| 791 | array_push(&self->slices, slice); | ||
| 792 | return self->slices.size - 1; | ||
| 793 | } | ||
| 794 | |||
| 795 | /************ | ||
| 796 | * QueryStep | ||
| 797 | ************/ | ||
| 798 | |||
| 799 | static QueryStep query_step__new( | ||
| 800 | TSSymbol symbol, | ||
| 801 | uint16_t depth, | ||
| 802 | bool is_immediate | ||
| 803 | ) { | ||
| 804 | QueryStep step = { | ||
| 805 | .symbol = symbol, | ||
| 806 | .depth = depth, | ||
| 807 | .field = 0, | ||
| 808 | .alternative_index = NONE, | ||
| 809 | .negated_field_list_id = 0, | ||
| 810 | .contains_captures = false, | ||
| 811 | .is_last_child = false, | ||
| 812 | .is_named = false, | ||
| 813 | .is_pass_through = false, | ||
| 814 | .is_dead_end = false, | ||
| 815 | .root_pattern_guaranteed = false, | ||
| 816 | .is_immediate = is_immediate, | ||
| 817 | .alternative_is_immediate = false, | ||
| 818 | }; | ||
| 819 | for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { | ||
| 820 | step.capture_ids[i] = NONE; | ||
| 821 | } | ||
| 822 | return step; | ||
| 823 | } | ||
| 824 | |||
| 825 | static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { | ||
| 826 | for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { | ||
| 827 | if (self->capture_ids[i] == NONE) { | ||
| 828 | self->capture_ids[i] = capture_id; | ||
| 829 | break; | ||
| 830 | } | ||
| 831 | } | ||
| 832 | } | ||
| 833 | |||
| 834 | static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { | ||
| 835 | for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { | ||
| 836 | if (self->capture_ids[i] == capture_id) { | ||
| 837 | self->capture_ids[i] = NONE; | ||
| 838 | while (i + 1 < MAX_STEP_CAPTURE_COUNT) { | ||
| 839 | if (self->capture_ids[i + 1] == NONE) break; | ||
| 840 | self->capture_ids[i] = self->capture_ids[i + 1]; | ||
| 841 | self->capture_ids[i + 1] = NONE; | ||
| 842 | i++; | ||
| 843 | } | ||
| 844 | break; | ||
| 845 | } | ||
| 846 | } | ||
| 847 | } | ||
| 848 | |||
| 849 | /********************** | ||
| 850 | * StatePredecessorMap | ||
| 851 | **********************/ | ||
| 852 | |||
| 853 | static inline StatePredecessorMap state_predecessor_map_new( | ||
| 854 | const TSLanguage *language | ||
| 855 | ) { | ||
| 856 | return (StatePredecessorMap) { | ||
| 857 | .contents = ts_calloc( | ||
| 858 | (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), | ||
| 859 | sizeof(TSStateId) | ||
| 860 | ), | ||
| 861 | }; | ||
| 862 | } | ||
| 863 | |||
| 864 | static inline void state_predecessor_map_delete(StatePredecessorMap *self) { | ||
| 865 | ts_free(self->contents); | ||
| 866 | } | ||
| 867 | |||
| 868 | static inline void state_predecessor_map_add( | ||
| 869 | StatePredecessorMap *self, | ||
| 870 | TSStateId state, | ||
| 871 | TSStateId predecessor | ||
| 872 | ) { | ||
| 873 | size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); | ||
| 874 | TSStateId *count = &self->contents[index]; | ||
| 875 | if ( | ||
| 876 | *count == 0 || | ||
| 877 | (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor) | ||
| 878 | ) { | ||
| 879 | (*count)++; | ||
| 880 | self->contents[index + *count] = predecessor; | ||
| 881 | } | ||
| 882 | } | ||
| 883 | |||
| 884 | static inline const TSStateId *state_predecessor_map_get( | ||
| 885 | const StatePredecessorMap *self, | ||
| 886 | TSStateId state, | ||
| 887 | unsigned *count | ||
| 888 | ) { | ||
| 889 | size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); | ||
| 890 | *count = self->contents[index]; | ||
| 891 | return &self->contents[index + 1]; | ||
| 892 | } | ||
| 893 | |||
| 894 | /**************** | ||
| 895 | * AnalysisState | ||
| 896 | ****************/ | ||
| 897 | |||
| 898 | static unsigned analysis_state__recursion_depth(const AnalysisState *self) { | ||
| 899 | unsigned result = 0; | ||
| 900 | for (unsigned i = 0; i < self->depth; i++) { | ||
| 901 | TSSymbol symbol = self->stack[i].parent_symbol; | ||
| 902 | for (unsigned j = 0; j < i; j++) { | ||
| 903 | if (self->stack[j].parent_symbol == symbol) { | ||
| 904 | result++; | ||
| 905 | break; | ||
| 906 | } | ||
| 907 | } | ||
| 908 | } | ||
| 909 | return result; | ||
| 910 | } | ||
| 911 | |||
| 912 | static inline int analysis_state__compare_position( | ||
| 913 | AnalysisState *const *self, | ||
| 914 | AnalysisState *const *other | ||
| 915 | ) { | ||
| 916 | for (unsigned i = 0; i < (*self)->depth; i++) { | ||
| 917 | if (i >= (*other)->depth) return -1; | ||
| 918 | if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1; | ||
| 919 | if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1; | ||
| 920 | } | ||
| 921 | if ((*self)->depth < (*other)->depth) return 1; | ||
| 922 | if ((*self)->step_index < (*other)->step_index) return -1; | ||
| 923 | if ((*self)->step_index > (*other)->step_index) return 1; | ||
| 924 | return 0; | ||
| 925 | } | ||
| 926 | |||
| 927 | static inline int analysis_state__compare( | ||
| 928 | AnalysisState *const *self, | ||
| 929 | AnalysisState *const *other | ||
| 930 | ) { | ||
| 931 | int result = analysis_state__compare_position(self, other); | ||
| 932 | if (result != 0) return result; | ||
| 933 | for (unsigned i = 0; i < (*self)->depth; i++) { | ||
| 934 | if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1; | ||
| 935 | if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1; | ||
| 936 | if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1; | ||
| 937 | if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1; | ||
| 938 | if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1; | ||
| 939 | if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1; | ||
| 940 | } | ||
| 941 | return 0; | ||
| 942 | } | ||
| 943 | |||
| 944 | static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { | ||
| 945 | if (self->depth == 0) { | ||
| 946 | return &self->stack[0]; | ||
| 947 | } | ||
| 948 | return &self->stack[self->depth - 1]; | ||
| 949 | } | ||
| 950 | |||
| 951 | static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) { | ||
| 952 | for (unsigned i = 0; i < self->depth; i++) { | ||
| 953 | if (self->stack[i].parent_symbol == symbol) return true; | ||
| 954 | } | ||
| 955 | return false; | ||
| 956 | } | ||
| 957 | |||
| 958 | /****************** | ||
| 959 | * AnalysisStateSet | ||
| 960 | ******************/ | ||
| 961 | |||
| 962 | // Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by | ||
| 963 | // cloning one from scratch. | ||
| 964 | static inline AnalysisState *analysis_state_pool__clone_or_reuse( | ||
| 965 | AnalysisStateSet *self, | ||
| 966 | AnalysisState *borrowed_item | ||
| 967 | ) { | ||
| 968 | AnalysisState *new_item; | ||
| 969 | if (self->size) { | ||
| 970 | new_item = array_pop(self); | ||
| 971 | } else { | ||
| 972 | new_item = ts_malloc(sizeof(AnalysisState)); | ||
| 973 | } | ||
| 974 | *new_item = *borrowed_item; | ||
| 975 | return new_item; | ||
| 976 | } | ||
| 977 | |||
| 978 | // Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this | ||
| 979 | // set. The set does not contain duplicates, so if the item is already present, it will not be | ||
| 980 | // inserted, and no clone will be made. | ||
| 981 | // | ||
| 982 | // The caller retains ownership of the passed-in memory. However, the clone that is created by this | ||
| 983 | // function will be managed by the state set. | ||
| 984 | static inline void analysis_state_set__insert_sorted( | ||
| 985 | AnalysisStateSet *self, | ||
| 986 | AnalysisStateSet *pool, | ||
| 987 | AnalysisState *borrowed_item | ||
| 988 | ) { | ||
| 989 | unsigned index, exists; | ||
| 990 | array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists); | ||
| 991 | if (!exists) { | ||
| 992 | AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); | ||
| 993 | array_insert(self, index, new_item); | ||
| 994 | } | ||
| 995 | } | ||
| 996 | |||
| 997 | // Inserts a clone of the passed-in item at the end position of this list. | ||
| 998 | // | ||
| 999 | // IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function | ||
| 1000 | // `analysis_state__compare`) than largest item already in this set. If items are inserted in the | ||
| 1001 | // wrong order, the set will not function properly for future use. | ||
| 1002 | // | ||
| 1003 | // The caller retains ownership of the passed-in memory. However, the clone that is created by this | ||
| 1004 | // function will be managed by the state set. | ||
| 1005 | static inline void analysis_state_set__push( | ||
| 1006 | AnalysisStateSet *self, | ||
| 1007 | AnalysisStateSet *pool, | ||
| 1008 | AnalysisState *borrowed_item | ||
| 1009 | ) { | ||
| 1010 | AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); | ||
| 1011 | array_push(self, new_item); | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | // Removes all items from this set, returning it to an empty state. | ||
| 1015 | static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) { | ||
| 1016 | array_push_all(pool, self); | ||
| 1017 | array_clear(self); | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | // Releases all memory that is managed with this state set, including any items currently present. | ||
| 1021 | // After calling this function, the set is no longer suitable for use. | ||
| 1022 | static inline void analysis_state_set__delete(AnalysisStateSet *self) { | ||
| 1023 | for (unsigned i = 0; i < self->size; i++) { | ||
| 1024 | ts_free(self->contents[i]); | ||
| 1025 | } | ||
| 1026 | array_delete(self); | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | /**************** | ||
| 1030 | * QueryAnalyzer | ||
| 1031 | ****************/ | ||
| 1032 | |||
| 1033 | static inline QueryAnalysis query_analysis__new() { | ||
| 1034 | return (QueryAnalysis) { | ||
| 1035 | .states = array_new(), | ||
| 1036 | .next_states = array_new(), | ||
| 1037 | .deeper_states = array_new(), | ||
| 1038 | .state_pool = array_new(), | ||
| 1039 | .final_step_indices = array_new(), | ||
| 1040 | .finished_parent_symbols = array_new(), | ||
| 1041 | .did_abort = false, | ||
| 1042 | }; | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | static inline void query_analysis__delete(QueryAnalysis *self) { | ||
| 1046 | analysis_state_set__delete(&self->states); | ||
| 1047 | analysis_state_set__delete(&self->next_states); | ||
| 1048 | analysis_state_set__delete(&self->deeper_states); | ||
| 1049 | analysis_state_set__delete(&self->state_pool); | ||
| 1050 | array_delete(&self->final_step_indices); | ||
| 1051 | array_delete(&self->finished_parent_symbols); | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | /*********************** | ||
| 1055 | * AnalysisSubgraphNode | ||
| 1056 | ***********************/ | ||
| 1057 | |||
| 1058 | static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { | ||
| 1059 | if (self->state < other->state) return -1; | ||
| 1060 | if (self->state > other->state) return 1; | ||
| 1061 | if (self->child_index < other->child_index) return -1; | ||
| 1062 | if (self->child_index > other->child_index) return 1; | ||
| 1063 | if (self->done < other->done) return -1; | ||
| 1064 | if (self->done > other->done) return 1; | ||
| 1065 | if (self->production_id < other->production_id) return -1; | ||
| 1066 | if (self->production_id > other->production_id) return 1; | ||
| 1067 | return 0; | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | /********* | ||
| 1071 | * Query | ||
| 1072 | *********/ | ||
| 1073 | |||
| 1074 | // The `pattern_map` contains a mapping from TSSymbol values to indices in the | ||
| 1075 | // `steps` array. For a given syntax node, the `pattern_map` makes it possible | ||
| 1076 | // to quickly find the starting steps of all of the patterns whose root matches | ||
| 1077 | // that node. Each entry has two fields: a `pattern_index`, which identifies one | ||
| 1078 | // of the patterns in the query, and a `step_index`, which indicates the start | ||
| 1079 | // offset of that pattern's steps within the `steps` array. | ||
| 1080 | // | ||
| 1081 | // The entries are sorted by the patterns' root symbols, and lookups use a | ||
| 1082 | // binary search. This ensures that the cost of this initial lookup step | ||
| 1083 | // scales logarithmically with the number of patterns in the query. | ||
| 1084 | // | ||
| 1085 | // This returns `true` if the symbol is present and `false` otherwise. | ||
| 1086 | // If the symbol is not present `*result` is set to the index where the | ||
| 1087 | // symbol should be inserted. | ||
| 1088 | static inline bool ts_query__pattern_map_search( | ||
| 1089 | const TSQuery *self, | ||
| 1090 | TSSymbol needle, | ||
| 1091 | uint32_t *result | ||
| 1092 | ) { | ||
| 1093 | uint32_t base_index = self->wildcard_root_pattern_count; | ||
| 1094 | uint32_t size = self->pattern_map.size - base_index; | ||
| 1095 | if (size == 0) { | ||
| 1096 | *result = base_index; | ||
| 1097 | return false; | ||
| 1098 | } | ||
| 1099 | while (size > 1) { | ||
| 1100 | uint32_t half_size = size / 2; | ||
| 1101 | uint32_t mid_index = base_index + half_size; | ||
| 1102 | TSSymbol mid_symbol = self->steps.contents[ | ||
| 1103 | self->pattern_map.contents[mid_index].step_index | ||
| 1104 | ].symbol; | ||
| 1105 | if (needle > mid_symbol) base_index = mid_index; | ||
| 1106 | size -= half_size; | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | TSSymbol symbol = self->steps.contents[ | ||
| 1110 | self->pattern_map.contents[base_index].step_index | ||
| 1111 | ].symbol; | ||
| 1112 | |||
| 1113 | if (needle > symbol) { | ||
| 1114 | base_index++; | ||
| 1115 | if (base_index < self->pattern_map.size) { | ||
| 1116 | symbol = self->steps.contents[ | ||
| 1117 | self->pattern_map.contents[base_index].step_index | ||
| 1118 | ].symbol; | ||
| 1119 | } | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | *result = base_index; | ||
| 1123 | return needle == symbol; | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | // Insert a new pattern's start index into the pattern map, maintaining | ||
| 1127 | // the pattern map's ordering invariant. | ||
| 1128 | static inline void ts_query__pattern_map_insert( | ||
| 1129 | TSQuery *self, | ||
| 1130 | TSSymbol symbol, | ||
| 1131 | PatternEntry new_entry | ||
| 1132 | ) { | ||
| 1133 | uint32_t index; | ||
| 1134 | ts_query__pattern_map_search(self, symbol, &index); | ||
| 1135 | |||
| 1136 | // Ensure that the entries are sorted not only by symbol, but also | ||
| 1137 | // by pattern_index. This way, states for earlier patterns will be | ||
| 1138 | // initiated first, which allows the ordering of the states array | ||
| 1139 | // to be maintained more efficiently. | ||
| 1140 | while (index < self->pattern_map.size) { | ||
| 1141 | PatternEntry *entry = &self->pattern_map.contents[index]; | ||
| 1142 | if ( | ||
| 1143 | self->steps.contents[entry->step_index].symbol == symbol && | ||
| 1144 | entry->pattern_index < new_entry.pattern_index | ||
| 1145 | ) { | ||
| 1146 | index++; | ||
| 1147 | } else { | ||
| 1148 | break; | ||
| 1149 | } | ||
| 1150 | } | ||
| 1151 | |||
| 1152 | array_insert(&self->pattern_map, index, new_entry); | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | // Walk the subgraph for this non-terminal, tracking all of the possible | ||
| 1156 | // sequences of progress within the pattern. | ||
| 1157 | static void ts_query__perform_analysis( | ||
| 1158 | TSQuery *self, | ||
| 1159 | const AnalysisSubgraphArray *subgraphs, | ||
| 1160 | QueryAnalysis *analysis | ||
| 1161 | ) { | ||
| 1162 | unsigned recursion_depth_limit = 0; | ||
| 1163 | unsigned prev_final_step_count = 0; | ||
| 1164 | array_clear(&analysis->final_step_indices); | ||
| 1165 | array_clear(&analysis->finished_parent_symbols); | ||
| 1166 | |||
| 1167 | for (unsigned iteration = 0;; iteration++) { | ||
| 1168 | if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { | ||
| 1169 | analysis->did_abort = true; | ||
| 1170 | break; | ||
| 1171 | } | ||
| 1172 | |||
| 1173 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1174 | printf("Iteration: %u. Final step indices:", iteration); | ||
| 1175 | for (unsigned j = 0; j < analysis->final_step_indices.size; j++) { | ||
| 1176 | printf(" %4u", analysis->final_step_indices.contents[j]); | ||
| 1177 | } | ||
| 1178 | printf("\n"); | ||
| 1179 | for (unsigned j = 0; j < analysis->states.size; j++) { | ||
| 1180 | AnalysisState *state = analysis->states.contents[j]; | ||
| 1181 | printf(" %3u: step: %u, stack: [", j, state->step_index); | ||
| 1182 | for (unsigned k = 0; k < state->depth; k++) { | ||
| 1183 | printf( | ||
| 1184 | " {%s, child: %u, state: %4u", | ||
| 1185 | self->language->symbol_names[state->stack[k].parent_symbol], | ||
| 1186 | state->stack[k].child_index, | ||
| 1187 | state->stack[k].parse_state | ||
| 1188 | ); | ||
| 1189 | if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); | ||
| 1190 | if (state->stack[k].done) printf(", DONE"); | ||
| 1191 | printf("}"); | ||
| 1192 | } | ||
| 1193 | printf(" ]\n"); | ||
| 1194 | } | ||
| 1195 | #endif | ||
| 1196 | |||
| 1197 | // If no further progress can be made within the current recursion depth limit, then | ||
| 1198 | // bump the depth limit by one, and continue to process the states the exceeded the | ||
| 1199 | // limit. But only allow this if progress has been made since the last time the depth | ||
| 1200 | // limit was increased. | ||
| 1201 | if (analysis->states.size == 0) { | ||
| 1202 | if ( | ||
| 1203 | analysis->deeper_states.size > 0 && | ||
| 1204 | analysis->final_step_indices.size > prev_final_step_count | ||
| 1205 | ) { | ||
| 1206 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1207 | printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); | ||
| 1208 | #endif | ||
| 1209 | |||
| 1210 | prev_final_step_count = analysis->final_step_indices.size; | ||
| 1211 | recursion_depth_limit++; | ||
| 1212 | AnalysisStateSet _states = analysis->states; | ||
| 1213 | analysis->states = analysis->deeper_states; | ||
| 1214 | analysis->deeper_states = _states; | ||
| 1215 | continue; | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | break; | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | analysis_state_set__clear(&analysis->next_states, &analysis->state_pool); | ||
| 1222 | for (unsigned j = 0; j < analysis->states.size; j++) { | ||
| 1223 | AnalysisState * const state = analysis->states.contents[j]; | ||
| 1224 | |||
| 1225 | // For efficiency, it's important to avoid processing the same analysis state more | ||
| 1226 | // than once. To achieve this, keep the states in order of ascending position within | ||
| 1227 | // their hypothetical syntax trees. In each iteration of this loop, start by advancing | ||
| 1228 | // the states that have made the least progress. Avoid advancing states that have already | ||
| 1229 | // made more progress. | ||
| 1230 | if (analysis->next_states.size > 0) { | ||
| 1231 | int comparison = analysis_state__compare_position( | ||
| 1232 | &state, | ||
| 1233 | array_back(&analysis->next_states) | ||
| 1234 | ); | ||
| 1235 | if (comparison == 0) { | ||
| 1236 | analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state); | ||
| 1237 | continue; | ||
| 1238 | } else if (comparison > 0) { | ||
| 1239 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1240 | printf("Terminate iteration at state %u\n", j); | ||
| 1241 | #endif | ||
| 1242 | while (j < analysis->states.size) { | ||
| 1243 | analysis_state_set__push( | ||
| 1244 | &analysis->next_states, | ||
| 1245 | &analysis->state_pool, | ||
| 1246 | analysis->states.contents[j] | ||
| 1247 | ); | ||
| 1248 | j++; | ||
| 1249 | } | ||
| 1250 | break; | ||
| 1251 | } | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | const TSStateId parse_state = analysis_state__top(state)->parse_state; | ||
| 1255 | const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol; | ||
| 1256 | const TSFieldId parent_field_id = analysis_state__top(state)->field_id; | ||
| 1257 | const unsigned child_index = analysis_state__top(state)->child_index; | ||
| 1258 | const QueryStep * const step = &self->steps.contents[state->step_index]; | ||
| 1259 | |||
| 1260 | unsigned subgraph_index, exists; | ||
| 1261 | array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); | ||
| 1262 | if (!exists) continue; | ||
| 1263 | const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index]; | ||
| 1264 | |||
| 1265 | // Follow every possible path in the parse table, but only visit states that | ||
| 1266 | // are part of the subgraph for the current symbol. | ||
| 1267 | LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); | ||
| 1268 | while (ts_lookahead_iterator__next(&lookahead_iterator)) { | ||
| 1269 | TSSymbol sym = lookahead_iterator.symbol; | ||
| 1270 | |||
| 1271 | AnalysisSubgraphNode successor = { | ||
| 1272 | .state = parse_state, | ||
| 1273 | .child_index = child_index, | ||
| 1274 | }; | ||
| 1275 | if (lookahead_iterator.action_count) { | ||
| 1276 | const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; | ||
| 1277 | if (action->type == TSParseActionTypeShift) { | ||
| 1278 | if (!action->shift.extra) { | ||
| 1279 | successor.state = action->shift.state; | ||
| 1280 | successor.child_index++; | ||
| 1281 | } | ||
| 1282 | } else { | ||
| 1283 | continue; | ||
| 1284 | } | ||
| 1285 | } else if (lookahead_iterator.next_state != 0) { | ||
| 1286 | successor.state = lookahead_iterator.next_state; | ||
| 1287 | successor.child_index++; | ||
| 1288 | } else { | ||
| 1289 | continue; | ||
| 1290 | } | ||
| 1291 | |||
| 1292 | unsigned node_index; | ||
| 1293 | array_search_sorted_with( | ||
| 1294 | &subgraph->nodes, | ||
| 1295 | analysis_subgraph_node__compare, &successor, | ||
| 1296 | &node_index, &exists | ||
| 1297 | ); | ||
| 1298 | while (node_index < subgraph->nodes.size) { | ||
| 1299 | AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; | ||
| 1300 | if (node->state != successor.state || node->child_index != successor.child_index) break; | ||
| 1301 | |||
| 1302 | // Use the subgraph to determine what alias and field will eventually be applied | ||
| 1303 | // to this child node. | ||
| 1304 | TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); | ||
| 1305 | TSSymbol visible_symbol = alias | ||
| 1306 | ? alias | ||
| 1307 | : self->language->symbol_metadata[sym].visible | ||
| 1308 | ? self->language->public_symbol_map[sym] | ||
| 1309 | : 0; | ||
| 1310 | TSFieldId field_id = parent_field_id; | ||
| 1311 | if (!field_id) { | ||
| 1312 | const TSFieldMapEntry *field_map, *field_map_end; | ||
| 1313 | ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); | ||
| 1314 | for (; field_map != field_map_end; field_map++) { | ||
| 1315 | if (!field_map->inherited && field_map->child_index == child_index) { | ||
| 1316 | field_id = field_map->field_id; | ||
| 1317 | break; | ||
| 1318 | } | ||
| 1319 | } | ||
| 1320 | } | ||
| 1321 | |||
| 1322 | // Create a new state that has advanced past this hypothetical subtree. | ||
| 1323 | AnalysisState next_state = *state; | ||
| 1324 | AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); | ||
| 1325 | next_state_top->child_index = successor.child_index; | ||
| 1326 | next_state_top->parse_state = successor.state; | ||
| 1327 | if (node->done) next_state_top->done = true; | ||
| 1328 | |||
| 1329 | // Determine if this hypothetical child node would match the current step | ||
| 1330 | // of the query pattern. | ||
| 1331 | bool does_match = false; | ||
| 1332 | if (visible_symbol) { | ||
| 1333 | does_match = true; | ||
| 1334 | if (step->symbol == WILDCARD_SYMBOL) { | ||
| 1335 | if ( | ||
| 1336 | step->is_named && | ||
| 1337 | !self->language->symbol_metadata[visible_symbol].named | ||
| 1338 | ) does_match = false; | ||
| 1339 | } else if (step->symbol != visible_symbol) { | ||
| 1340 | does_match = false; | ||
| 1341 | } | ||
| 1342 | if (step->field && step->field != field_id) { | ||
| 1343 | does_match = false; | ||
| 1344 | } | ||
| 1345 | if ( | ||
| 1346 | step->supertype_symbol && | ||
| 1347 | !analysis_state__has_supertype(state, step->supertype_symbol) | ||
| 1348 | ) does_match = false; | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | // If this child is hidden, then descend into it and walk through its children. | ||
| 1352 | // If the top entry of the stack is at the end of its rule, then that entry can | ||
| 1353 | // be replaced. Otherwise, push a new entry onto the stack. | ||
| 1354 | else if (sym >= self->language->token_count) { | ||
| 1355 | if (!next_state_top->done) { | ||
| 1356 | if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { | ||
| 1357 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1358 | printf("Exceeded depth limit for state %u\n", j); | ||
| 1359 | #endif | ||
| 1360 | |||
| 1361 | analysis->did_abort = true; | ||
| 1362 | continue; | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | next_state.depth++; | ||
| 1366 | next_state_top = analysis_state__top(&next_state); | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | *next_state_top = (AnalysisStateEntry) { | ||
| 1370 | .parse_state = parse_state, | ||
| 1371 | .parent_symbol = sym, | ||
| 1372 | .child_index = 0, | ||
| 1373 | .field_id = field_id, | ||
| 1374 | .done = false, | ||
| 1375 | }; | ||
| 1376 | |||
| 1377 | if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { | ||
| 1378 | analysis_state_set__insert_sorted( | ||
| 1379 | &analysis->deeper_states, | ||
| 1380 | &analysis->state_pool, | ||
| 1381 | &next_state | ||
| 1382 | ); | ||
| 1383 | continue; | ||
| 1384 | } | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | // Pop from the stack when this state reached the end of its current syntax node. | ||
| 1388 | while (next_state.depth > 0 && next_state_top->done) { | ||
| 1389 | next_state.depth--; | ||
| 1390 | next_state_top = analysis_state__top(&next_state); | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | // If this hypothetical child did match the current step of the query pattern, | ||
| 1394 | // then advance to the next step at the current depth. This involves skipping | ||
| 1395 | // over any descendant steps of the current child. | ||
| 1396 | const QueryStep *next_step = step; | ||
| 1397 | if (does_match) { | ||
| 1398 | for (;;) { | ||
| 1399 | next_state.step_index++; | ||
| 1400 | next_step = &self->steps.contents[next_state.step_index]; | ||
| 1401 | if ( | ||
| 1402 | next_step->depth == PATTERN_DONE_MARKER || | ||
| 1403 | next_step->depth <= step->depth | ||
| 1404 | ) break; | ||
| 1405 | } | ||
| 1406 | } else if (successor.state == parse_state) { | ||
| 1407 | continue; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | for (;;) { | ||
| 1411 | // Skip pass-through states. Although these states have alternatives, they are only | ||
| 1412 | // used to implement repetitions, and query analysis does not need to process | ||
| 1413 | // repetitions in order to determine whether steps are possible and definite. | ||
| 1414 | if (next_step->is_pass_through) { | ||
| 1415 | next_state.step_index++; | ||
| 1416 | next_step++; | ||
| 1417 | continue; | ||
| 1418 | } | ||
| 1419 | |||
| 1420 | // If the pattern is finished or hypothetical parent node is complete, then | ||
| 1421 | // record that matching can terminate at this step of the pattern. Otherwise, | ||
| 1422 | // add this state to the list of states to process on the next iteration. | ||
| 1423 | if (!next_step->is_dead_end) { | ||
| 1424 | bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth; | ||
| 1425 | if (did_finish_pattern) { | ||
| 1426 | array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol); | ||
| 1427 | } else if (next_state.depth == 0) { | ||
| 1428 | array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index); | ||
| 1429 | } else { | ||
| 1430 | analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state); | ||
| 1431 | } | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | // If the state has advanced to a step with an alternative step, then add another state | ||
| 1435 | // at that alternative step. This process is simpler than the process of actually matching a | ||
| 1436 | // pattern during query execution, because for the purposes of query analysis, there is no | ||
| 1437 | // need to process repetitions. | ||
| 1438 | if ( | ||
| 1439 | does_match && | ||
| 1440 | next_step->alternative_index != NONE && | ||
| 1441 | next_step->alternative_index > next_state.step_index | ||
| 1442 | ) { | ||
| 1443 | next_state.step_index = next_step->alternative_index; | ||
| 1444 | next_step = &self->steps.contents[next_state.step_index]; | ||
| 1445 | } else { | ||
| 1446 | break; | ||
| 1447 | } | ||
| 1448 | } | ||
| 1449 | } | ||
| 1450 | } | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | AnalysisStateSet _states = analysis->states; | ||
| 1454 | analysis->states = analysis->next_states; | ||
| 1455 | analysis->next_states = _states; | ||
| 1456 | } | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { | ||
| 1460 | Array(uint16_t) non_rooted_pattern_start_steps = array_new(); | ||
| 1461 | for (unsigned i = 0; i < self->pattern_map.size; i++) { | ||
| 1462 | PatternEntry *pattern = &self->pattern_map.contents[i]; | ||
| 1463 | if (!pattern->is_rooted) { | ||
| 1464 | QueryStep *step = &self->steps.contents[pattern->step_index]; | ||
| 1465 | if (step->symbol != WILDCARD_SYMBOL) { | ||
| 1466 | array_push(&non_rooted_pattern_start_steps, i); | ||
| 1467 | } | ||
| 1468 | } | ||
| 1469 | } | ||
| 1470 | |||
| 1471 | // Walk forward through all of the steps in the query, computing some | ||
| 1472 | // basic information about each step. Mark all of the steps that contain | ||
| 1473 | // captures, and record the indices of all of the steps that have child steps. | ||
| 1474 | Array(uint32_t) parent_step_indices = array_new(); | ||
| 1475 | for (unsigned i = 0; i < self->steps.size; i++) { | ||
| 1476 | QueryStep *step = &self->steps.contents[i]; | ||
| 1477 | if (step->depth == PATTERN_DONE_MARKER) { | ||
| 1478 | step->parent_pattern_guaranteed = true; | ||
| 1479 | step->root_pattern_guaranteed = true; | ||
| 1480 | continue; | ||
| 1481 | } | ||
| 1482 | |||
| 1483 | bool has_children = false; | ||
| 1484 | bool is_wildcard = step->symbol == WILDCARD_SYMBOL; | ||
| 1485 | step->contains_captures = step->capture_ids[0] != NONE; | ||
| 1486 | for (unsigned j = i + 1; j < self->steps.size; j++) { | ||
| 1487 | QueryStep *next_step = &self->steps.contents[j]; | ||
| 1488 | if ( | ||
| 1489 | next_step->depth == PATTERN_DONE_MARKER || | ||
| 1490 | next_step->depth <= step->depth | ||
| 1491 | ) break; | ||
| 1492 | if (next_step->capture_ids[0] != NONE) { | ||
| 1493 | step->contains_captures = true; | ||
| 1494 | } | ||
| 1495 | if (!is_wildcard) { | ||
| 1496 | next_step->root_pattern_guaranteed = true; | ||
| 1497 | next_step->parent_pattern_guaranteed = true; | ||
| 1498 | } | ||
| 1499 | has_children = true; | ||
| 1500 | } | ||
| 1501 | |||
| 1502 | if (has_children && !is_wildcard) { | ||
| 1503 | array_push(&parent_step_indices, i); | ||
| 1504 | } | ||
| 1505 | } | ||
| 1506 | |||
| 1507 | // For every parent symbol in the query, initialize an 'analysis subgraph'. | ||
| 1508 | // This subgraph lists all of the states in the parse table that are directly | ||
| 1509 | // involved in building subtrees for this symbol. | ||
| 1510 | // | ||
| 1511 | // In addition to the parent symbols in the query, construct subgraphs for all | ||
| 1512 | // of the hidden symbols in the grammar, because these might occur within | ||
| 1513 | // one of the parent nodes, such that their children appear to belong to the | ||
| 1514 | // parent. | ||
| 1515 | AnalysisSubgraphArray subgraphs = array_new(); | ||
| 1516 | for (unsigned i = 0; i < parent_step_indices.size; i++) { | ||
| 1517 | uint32_t parent_step_index = parent_step_indices.contents[i]; | ||
| 1518 | TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; | ||
| 1519 | AnalysisSubgraph subgraph = { .symbol = parent_symbol }; | ||
| 1520 | array_insert_sorted_by(&subgraphs, .symbol, subgraph); | ||
| 1521 | } | ||
| 1522 | for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { | ||
| 1523 | if (!ts_language_symbol_metadata(self->language, sym).visible) { | ||
| 1524 | AnalysisSubgraph subgraph = { .symbol = sym }; | ||
| 1525 | array_insert_sorted_by(&subgraphs, .symbol, subgraph); | ||
| 1526 | } | ||
| 1527 | } | ||
| 1528 | |||
| 1529 | // Scan the parse table to find the data needed to populate these subgraphs. | ||
| 1530 | // Collect three things during this scan: | ||
| 1531 | // 1) All of the parse states where one of these symbols can start. | ||
| 1532 | // 2) All of the parse states where one of these symbols can end, along | ||
| 1533 | // with information about the node that would be created. | ||
| 1534 | // 3) A list of predecessor states for each state. | ||
| 1535 | StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); | ||
| 1536 | for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) { | ||
| 1537 | unsigned subgraph_index, exists; | ||
| 1538 | LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); | ||
| 1539 | while (ts_lookahead_iterator__next(&lookahead_iterator)) { | ||
| 1540 | if (lookahead_iterator.action_count) { | ||
| 1541 | for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { | ||
| 1542 | const TSParseAction *action = &lookahead_iterator.actions[i]; | ||
| 1543 | if (action->type == TSParseActionTypeReduce) { | ||
| 1544 | const TSSymbol *aliases, *aliases_end; | ||
| 1545 | ts_language_aliases_for_symbol( | ||
| 1546 | self->language, | ||
| 1547 | action->reduce.symbol, | ||
| 1548 | &aliases, | ||
| 1549 | &aliases_end | ||
| 1550 | ); | ||
| 1551 | for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { | ||
| 1552 | array_search_sorted_by( | ||
| 1553 | &subgraphs, | ||
| 1554 | .symbol, | ||
| 1555 | *symbol, | ||
| 1556 | &subgraph_index, | ||
| 1557 | &exists | ||
| 1558 | ); | ||
| 1559 | if (exists) { | ||
| 1560 | AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; | ||
| 1561 | if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { | ||
| 1562 | array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { | ||
| 1563 | .state = state, | ||
| 1564 | .production_id = action->reduce.production_id, | ||
| 1565 | .child_index = action->reduce.child_count, | ||
| 1566 | .done = true, | ||
| 1567 | })); | ||
| 1568 | } | ||
| 1569 | } | ||
| 1570 | } | ||
| 1571 | } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { | ||
| 1572 | TSStateId next_state = action->shift.state; | ||
| 1573 | state_predecessor_map_add(&predecessor_map, next_state, state); | ||
| 1574 | } | ||
| 1575 | } | ||
| 1576 | } else if (lookahead_iterator.next_state != 0) { | ||
| 1577 | if (lookahead_iterator.next_state != state) { | ||
| 1578 | state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); | ||
| 1579 | } | ||
| 1580 | if (ts_language_state_is_primary(self->language, state)) { | ||
| 1581 | const TSSymbol *aliases, *aliases_end; | ||
| 1582 | ts_language_aliases_for_symbol( | ||
| 1583 | self->language, | ||
| 1584 | lookahead_iterator.symbol, | ||
| 1585 | &aliases, | ||
| 1586 | &aliases_end | ||
| 1587 | ); | ||
| 1588 | for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { | ||
| 1589 | array_search_sorted_by( | ||
| 1590 | &subgraphs, | ||
| 1591 | .symbol, | ||
| 1592 | *symbol, | ||
| 1593 | &subgraph_index, | ||
| 1594 | &exists | ||
| 1595 | ); | ||
| 1596 | if (exists) { | ||
| 1597 | AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; | ||
| 1598 | if ( | ||
| 1599 | subgraph->start_states.size == 0 || | ||
| 1600 | *array_back(&subgraph->start_states) != state | ||
| 1601 | ) | ||
| 1602 | array_push(&subgraph->start_states, state); | ||
| 1603 | } | ||
| 1604 | } | ||
| 1605 | } | ||
| 1606 | } | ||
| 1607 | } | ||
| 1608 | } | ||
| 1609 | |||
| 1610 | // For each subgraph, compute the preceding states by walking backward | ||
| 1611 | // from the end states using the predecessor map. | ||
| 1612 | Array(AnalysisSubgraphNode) next_nodes = array_new(); | ||
| 1613 | for (unsigned i = 0; i < subgraphs.size; i++) { | ||
| 1614 | AnalysisSubgraph *subgraph = &subgraphs.contents[i]; | ||
| 1615 | if (subgraph->nodes.size == 0) { | ||
| 1616 | array_delete(&subgraph->start_states); | ||
| 1617 | array_erase(&subgraphs, i); | ||
| 1618 | i--; | ||
| 1619 | continue; | ||
| 1620 | } | ||
| 1621 | array_assign(&next_nodes, &subgraph->nodes); | ||
| 1622 | while (next_nodes.size > 0) { | ||
| 1623 | AnalysisSubgraphNode node = array_pop(&next_nodes); | ||
| 1624 | if (node.child_index > 1) { | ||
| 1625 | unsigned predecessor_count; | ||
| 1626 | const TSStateId *predecessors = state_predecessor_map_get( | ||
| 1627 | &predecessor_map, | ||
| 1628 | node.state, | ||
| 1629 | &predecessor_count | ||
| 1630 | ); | ||
| 1631 | for (unsigned j = 0; j < predecessor_count; j++) { | ||
| 1632 | AnalysisSubgraphNode predecessor_node = { | ||
| 1633 | .state = predecessors[j], | ||
| 1634 | .child_index = node.child_index - 1, | ||
| 1635 | .production_id = node.production_id, | ||
| 1636 | .done = false, | ||
| 1637 | }; | ||
| 1638 | unsigned index, exists; | ||
| 1639 | array_search_sorted_with( | ||
| 1640 | &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node, | ||
| 1641 | &index, &exists | ||
| 1642 | ); | ||
| 1643 | if (!exists) { | ||
| 1644 | array_insert(&subgraph->nodes, index, predecessor_node); | ||
| 1645 | array_push(&next_nodes, predecessor_node); | ||
| 1646 | } | ||
| 1647 | } | ||
| 1648 | } | ||
| 1649 | } | ||
| 1650 | } | ||
| 1651 | |||
| 1652 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1653 | printf("\nSubgraphs:\n"); | ||
| 1654 | for (unsigned i = 0; i < subgraphs.size; i++) { | ||
| 1655 | AnalysisSubgraph *subgraph = &subgraphs.contents[i]; | ||
| 1656 | printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); | ||
| 1657 | for (unsigned j = 0; j < subgraph->start_states.size; j++) { | ||
| 1658 | printf( | ||
| 1659 | " {state: %u}\n", | ||
| 1660 | subgraph->start_states.contents[j] | ||
| 1661 | ); | ||
| 1662 | } | ||
| 1663 | for (unsigned j = 0; j < subgraph->nodes.size; j++) { | ||
| 1664 | AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; | ||
| 1665 | printf( | ||
| 1666 | " {state: %u, child_index: %u, production_id: %u, done: %d}\n", | ||
| 1667 | node->state, node->child_index, node->production_id, node->done | ||
| 1668 | ); | ||
| 1669 | } | ||
| 1670 | printf("\n"); | ||
| 1671 | } | ||
| 1672 | #endif | ||
| 1673 | |||
| 1674 | // For each non-terminal pattern, determine if the pattern can successfully match, | ||
| 1675 | // and identify all of the possible children within the pattern where matching could fail. | ||
| 1676 | bool all_patterns_are_valid = true; | ||
| 1677 | QueryAnalysis analysis = query_analysis__new(); | ||
| 1678 | for (unsigned i = 0; i < parent_step_indices.size; i++) { | ||
| 1679 | uint16_t parent_step_index = parent_step_indices.contents[i]; | ||
| 1680 | uint16_t parent_depth = self->steps.contents[parent_step_index].depth; | ||
| 1681 | TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; | ||
| 1682 | if (parent_symbol == ts_builtin_sym_error) continue; | ||
| 1683 | |||
| 1684 | // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's | ||
| 1685 | // root symbol is a terminal, then return an error. | ||
| 1686 | unsigned subgraph_index, exists; | ||
| 1687 | array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); | ||
| 1688 | if (!exists) { | ||
| 1689 | unsigned first_child_step_index = parent_step_index + 1; | ||
| 1690 | uint32_t j, child_exists; | ||
| 1691 | array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists); | ||
| 1692 | assert(child_exists); | ||
| 1693 | *error_offset = self->step_offsets.contents[j].byte_offset; | ||
| 1694 | all_patterns_are_valid = false; | ||
| 1695 | break; | ||
| 1696 | } | ||
| 1697 | |||
| 1698 | // Initialize an analysis state at every parse state in the table where | ||
| 1699 | // this parent symbol can occur. | ||
| 1700 | AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; | ||
| 1701 | analysis_state_set__clear(&analysis.states, &analysis.state_pool); | ||
| 1702 | analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); | ||
| 1703 | for (unsigned j = 0; j < subgraph->start_states.size; j++) { | ||
| 1704 | TSStateId parse_state = subgraph->start_states.contents[j]; | ||
| 1705 | analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { | ||
| 1706 | .step_index = parent_step_index + 1, | ||
| 1707 | .stack = { | ||
| 1708 | [0] = { | ||
| 1709 | .parse_state = parse_state, | ||
| 1710 | .parent_symbol = parent_symbol, | ||
| 1711 | .child_index = 0, | ||
| 1712 | .field_id = 0, | ||
| 1713 | .done = false, | ||
| 1714 | }, | ||
| 1715 | }, | ||
| 1716 | .depth = 1, | ||
| 1717 | .root_symbol = parent_symbol, | ||
| 1718 | })); | ||
| 1719 | } | ||
| 1720 | |||
| 1721 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1722 | printf( | ||
| 1723 | "\nWalk states for %s:\n", | ||
| 1724 | ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol) | ||
| 1725 | ); | ||
| 1726 | #endif | ||
| 1727 | |||
| 1728 | analysis.did_abort = false; | ||
| 1729 | ts_query__perform_analysis(self, &subgraphs, &analysis); | ||
| 1730 | |||
| 1731 | // If this pattern could not be fully analyzed, then every step should | ||
| 1732 | // be considered fallible. | ||
| 1733 | if (analysis.did_abort) { | ||
| 1734 | for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { | ||
| 1735 | QueryStep *step = &self->steps.contents[j]; | ||
| 1736 | if ( | ||
| 1737 | step->depth <= parent_depth || | ||
| 1738 | step->depth == PATTERN_DONE_MARKER | ||
| 1739 | ) break; | ||
| 1740 | if (!step->is_dead_end) { | ||
| 1741 | step->parent_pattern_guaranteed = false; | ||
| 1742 | step->root_pattern_guaranteed = false; | ||
| 1743 | } | ||
| 1744 | } | ||
| 1745 | continue; | ||
| 1746 | } | ||
| 1747 | |||
| 1748 | // If this pattern cannot match, store the pattern index so that it can be | ||
| 1749 | // returned to the caller. | ||
| 1750 | if (analysis.finished_parent_symbols.size == 0) { | ||
| 1751 | assert(analysis.final_step_indices.size > 0); | ||
| 1752 | uint16_t impossible_step_index = *array_back(&analysis.final_step_indices); | ||
| 1753 | uint32_t j, impossible_exists; | ||
| 1754 | array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists); | ||
| 1755 | if (j >= self->step_offsets.size) j = self->step_offsets.size - 1; | ||
| 1756 | *error_offset = self->step_offsets.contents[j].byte_offset; | ||
| 1757 | all_patterns_are_valid = false; | ||
| 1758 | break; | ||
| 1759 | } | ||
| 1760 | |||
| 1761 | // Mark as fallible any step where a match terminated. | ||
| 1762 | // Later, this property will be propagated to all of the step's predecessors. | ||
| 1763 | for (unsigned j = 0; j < analysis.final_step_indices.size; j++) { | ||
| 1764 | uint32_t final_step_index = analysis.final_step_indices.contents[j]; | ||
| 1765 | QueryStep *step = &self->steps.contents[final_step_index]; | ||
| 1766 | if ( | ||
| 1767 | step->depth != PATTERN_DONE_MARKER && | ||
| 1768 | step->depth > parent_depth && | ||
| 1769 | !step->is_dead_end | ||
| 1770 | ) { | ||
| 1771 | step->parent_pattern_guaranteed = false; | ||
| 1772 | step->root_pattern_guaranteed = false; | ||
| 1773 | } | ||
| 1774 | } | ||
| 1775 | } | ||
| 1776 | |||
| 1777 | // Mark as indefinite any step with captures that are used in predicates. | ||
| 1778 | Array(uint16_t) predicate_capture_ids = array_new(); | ||
| 1779 | for (unsigned i = 0; i < self->patterns.size; i++) { | ||
| 1780 | QueryPattern *pattern = &self->patterns.contents[i]; | ||
| 1781 | |||
| 1782 | // Gather all of the captures that are used in predicates for this pattern. | ||
| 1783 | array_clear(&predicate_capture_ids); | ||
| 1784 | for ( | ||
| 1785 | unsigned start = pattern->predicate_step.offset, | ||
| 1786 | end = start + pattern->predicate_step.length, | ||
| 1787 | j = start; j < end; j++ | ||
| 1788 | ) { | ||
| 1789 | TSQueryPredicateStep *step = &self->predicate_steps.contents[j]; | ||
| 1790 | if (step->type == TSQueryPredicateStepTypeCapture) { | ||
| 1791 | uint16_t value_id = step->value_id; | ||
| 1792 | array_insert_sorted_by(&predicate_capture_ids, , value_id); | ||
| 1793 | } | ||
| 1794 | } | ||
| 1795 | |||
| 1796 | // Find all of the steps that have these captures. | ||
| 1797 | for ( | ||
| 1798 | unsigned start = pattern->step.offset, | ||
| 1799 | end = start + pattern->step.length, | ||
| 1800 | j = start; j < end; j++ | ||
| 1801 | ) { | ||
| 1802 | QueryStep *step = &self->steps.contents[j]; | ||
| 1803 | for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) { | ||
| 1804 | uint16_t capture_id = step->capture_ids[k]; | ||
| 1805 | if (capture_id == NONE) break; | ||
| 1806 | unsigned index, exists; | ||
| 1807 | array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists); | ||
| 1808 | if (exists) { | ||
| 1809 | step->root_pattern_guaranteed = false; | ||
| 1810 | break; | ||
| 1811 | } | ||
| 1812 | } | ||
| 1813 | } | ||
| 1814 | } | ||
| 1815 | |||
| 1816 | // Propagate fallibility. If a pattern is fallible at a given step, then it is | ||
| 1817 | // fallible at all of its preceding steps. | ||
| 1818 | bool done = self->steps.size == 0; | ||
| 1819 | while (!done) { | ||
| 1820 | done = true; | ||
| 1821 | for (unsigned i = self->steps.size - 1; i > 0; i--) { | ||
| 1822 | QueryStep *step = &self->steps.contents[i]; | ||
| 1823 | if (step->depth == PATTERN_DONE_MARKER) continue; | ||
| 1824 | |||
| 1825 | // Determine if this step is definite or has definite alternatives. | ||
| 1826 | bool parent_pattern_guaranteed = false; | ||
| 1827 | for (;;) { | ||
| 1828 | if (step->root_pattern_guaranteed) { | ||
| 1829 | parent_pattern_guaranteed = true; | ||
| 1830 | break; | ||
| 1831 | } | ||
| 1832 | if (step->alternative_index == NONE || step->alternative_index < i) { | ||
| 1833 | break; | ||
| 1834 | } | ||
| 1835 | step = &self->steps.contents[step->alternative_index]; | ||
| 1836 | } | ||
| 1837 | |||
| 1838 | // If not, mark its predecessor as indefinite. | ||
| 1839 | if (!parent_pattern_guaranteed) { | ||
| 1840 | QueryStep *prev_step = &self->steps.contents[i - 1]; | ||
| 1841 | if ( | ||
| 1842 | !prev_step->is_dead_end && | ||
| 1843 | prev_step->depth != PATTERN_DONE_MARKER && | ||
| 1844 | prev_step->root_pattern_guaranteed | ||
| 1845 | ) { | ||
| 1846 | prev_step->root_pattern_guaranteed = false; | ||
| 1847 | done = false; | ||
| 1848 | } | ||
| 1849 | } | ||
| 1850 | } | ||
| 1851 | } | ||
| 1852 | |||
| 1853 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1854 | printf("Steps:\n"); | ||
| 1855 | for (unsigned i = 0; i < self->steps.size; i++) { | ||
| 1856 | QueryStep *step = &self->steps.contents[i]; | ||
| 1857 | if (step->depth == PATTERN_DONE_MARKER) { | ||
| 1858 | printf(" %u: DONE\n", i); | ||
| 1859 | } else { | ||
| 1860 | printf( | ||
| 1861 | " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n", | ||
| 1862 | i, | ||
| 1863 | (step->symbol == WILDCARD_SYMBOL) | ||
| 1864 | ? "ANY" | ||
| 1865 | : ts_language_symbol_name(self->language, step->symbol), | ||
| 1866 | (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"), | ||
| 1867 | step->depth, | ||
| 1868 | step->parent_pattern_guaranteed, | ||
| 1869 | step->root_pattern_guaranteed | ||
| 1870 | ); | ||
| 1871 | } | ||
| 1872 | } | ||
| 1873 | #endif | ||
| 1874 | |||
| 1875 | // Determine which repetition symbols in this language have the possibility | ||
| 1876 | // of matching non-rooted patterns in this query. These repetition symbols | ||
| 1877 | // prevent certain optimizations with range restrictions. | ||
| 1878 | analysis.did_abort = false; | ||
| 1879 | for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { | ||
| 1880 | uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i]; | ||
| 1881 | PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index]; | ||
| 1882 | |||
| 1883 | analysis_state_set__clear(&analysis.states, &analysis.state_pool); | ||
| 1884 | analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); | ||
| 1885 | for (unsigned j = 0; j < subgraphs.size; j++) { | ||
| 1886 | AnalysisSubgraph *subgraph = &subgraphs.contents[j]; | ||
| 1887 | TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol); | ||
| 1888 | if (metadata.visible || metadata.named) continue; | ||
| 1889 | |||
| 1890 | for (uint32_t k = 0; k < subgraph->start_states.size; k++) { | ||
| 1891 | TSStateId parse_state = subgraph->start_states.contents[k]; | ||
| 1892 | analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { | ||
| 1893 | .step_index = pattern_entry->step_index, | ||
| 1894 | .stack = { | ||
| 1895 | [0] = { | ||
| 1896 | .parse_state = parse_state, | ||
| 1897 | .parent_symbol = subgraph->symbol, | ||
| 1898 | .child_index = 0, | ||
| 1899 | .field_id = 0, | ||
| 1900 | .done = false, | ||
| 1901 | }, | ||
| 1902 | }, | ||
| 1903 | .root_symbol = subgraph->symbol, | ||
| 1904 | .depth = 1, | ||
| 1905 | })); | ||
| 1906 | } | ||
| 1907 | } | ||
| 1908 | |||
| 1909 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1910 | printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index); | ||
| 1911 | #endif | ||
| 1912 | |||
| 1913 | ts_query__perform_analysis( | ||
| 1914 | self, | ||
| 1915 | &subgraphs, | ||
| 1916 | &analysis | ||
| 1917 | ); | ||
| 1918 | |||
| 1919 | if (analysis.finished_parent_symbols.size > 0) { | ||
| 1920 | self->patterns.contents[pattern_entry->pattern_index].is_non_local = true; | ||
| 1921 | } | ||
| 1922 | |||
| 1923 | for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { | ||
| 1924 | TSSymbol symbol = analysis.finished_parent_symbols.contents[k]; | ||
| 1925 | array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); | ||
| 1926 | } | ||
| 1927 | } | ||
| 1928 | |||
| 1929 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1930 | if (self->repeat_symbols_with_rootless_patterns.size > 0) { | ||
| 1931 | printf("\nRepetition symbols with rootless patterns:\n"); | ||
| 1932 | printf("aborted analysis: %d\n", analysis.did_abort); | ||
| 1933 | for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { | ||
| 1934 | TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i]; | ||
| 1935 | printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); | ||
| 1936 | } | ||
| 1937 | printf("\n"); | ||
| 1938 | } | ||
| 1939 | #endif | ||
| 1940 | |||
| 1941 | // Cleanup | ||
| 1942 | for (unsigned i = 0; i < subgraphs.size; i++) { | ||
| 1943 | array_delete(&subgraphs.contents[i].start_states); | ||
| 1944 | array_delete(&subgraphs.contents[i].nodes); | ||
| 1945 | } | ||
| 1946 | array_delete(&subgraphs); | ||
| 1947 | query_analysis__delete(&analysis); | ||
| 1948 | array_delete(&next_nodes); | ||
| 1949 | array_delete(&non_rooted_pattern_start_steps); | ||
| 1950 | array_delete(&parent_step_indices); | ||
| 1951 | array_delete(&predicate_capture_ids); | ||
| 1952 | state_predecessor_map_delete(&predecessor_map); | ||
| 1953 | |||
| 1954 | return all_patterns_are_valid; | ||
| 1955 | } | ||
| 1956 | |||
| 1957 | static void ts_query__add_negated_fields( | ||
| 1958 | TSQuery *self, | ||
| 1959 | uint16_t step_index, | ||
| 1960 | TSFieldId *field_ids, | ||
| 1961 | uint16_t field_count | ||
| 1962 | ) { | ||
| 1963 | QueryStep *step = &self->steps.contents[step_index]; | ||
| 1964 | |||
| 1965 | // The negated field array stores a list of field lists, separated by zeros. | ||
| 1966 | // Try to find the start index of an existing list that matches this new list. | ||
| 1967 | bool failed_match = false; | ||
| 1968 | unsigned match_count = 0; | ||
| 1969 | unsigned start_i = 0; | ||
| 1970 | for (unsigned i = 0; i < self->negated_fields.size; i++) { | ||
| 1971 | TSFieldId existing_field_id = self->negated_fields.contents[i]; | ||
| 1972 | |||
| 1973 | // At each zero value, terminate the match attempt. If we've exactly | ||
| 1974 | // matched the new field list, then reuse this index. Otherwise, | ||
| 1975 | // start over the matching process. | ||
| 1976 | if (existing_field_id == 0) { | ||
| 1977 | if (match_count == field_count) { | ||
| 1978 | step->negated_field_list_id = start_i; | ||
| 1979 | return; | ||
| 1980 | } else { | ||
| 1981 | start_i = i + 1; | ||
| 1982 | match_count = 0; | ||
| 1983 | failed_match = false; | ||
| 1984 | } | ||
| 1985 | } | ||
| 1986 | |||
| 1987 | // If the existing list matches our new list so far, then advance | ||
| 1988 | // to the next element of the new list. | ||
| 1989 | else if ( | ||
| 1990 | match_count < field_count && | ||
| 1991 | existing_field_id == field_ids[match_count] && | ||
| 1992 | !failed_match | ||
| 1993 | ) { | ||
| 1994 | match_count++; | ||
| 1995 | } | ||
| 1996 | |||
| 1997 | // Otherwise, this existing list has failed to match. | ||
| 1998 | else { | ||
| 1999 | match_count = 0; | ||
| 2000 | failed_match = true; | ||
| 2001 | } | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | step->negated_field_list_id = self->negated_fields.size; | ||
| 2005 | array_extend(&self->negated_fields, field_count, field_ids); | ||
| 2006 | array_push(&self->negated_fields, 0); | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | static TSQueryError ts_query__parse_string_literal( | ||
| 2010 | TSQuery *self, | ||
| 2011 | Stream *stream | ||
| 2012 | ) { | ||
| 2013 | const char *string_start = stream->input; | ||
| 2014 | if (stream->next != '"') return TSQueryErrorSyntax; | ||
| 2015 | stream_advance(stream); | ||
| 2016 | const char *prev_position = stream->input; | ||
| 2017 | |||
| 2018 | bool is_escaped = false; | ||
| 2019 | array_clear(&self->string_buffer); | ||
| 2020 | for (;;) { | ||
| 2021 | if (is_escaped) { | ||
| 2022 | is_escaped = false; | ||
| 2023 | switch (stream->next) { | ||
| 2024 | case 'n': | ||
| 2025 | array_push(&self->string_buffer, '\n'); | ||
| 2026 | break; | ||
| 2027 | case 'r': | ||
| 2028 | array_push(&self->string_buffer, '\r'); | ||
| 2029 | break; | ||
| 2030 | case 't': | ||
| 2031 | array_push(&self->string_buffer, '\t'); | ||
| 2032 | break; | ||
| 2033 | case '0': | ||
| 2034 | array_push(&self->string_buffer, '\0'); | ||
| 2035 | break; | ||
| 2036 | default: | ||
| 2037 | array_extend(&self->string_buffer, stream->next_size, stream->input); | ||
| 2038 | break; | ||
| 2039 | } | ||
| 2040 | prev_position = stream->input + stream->next_size; | ||
| 2041 | } else { | ||
| 2042 | if (stream->next == '\\') { | ||
| 2043 | array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); | ||
| 2044 | prev_position = stream->input + 1; | ||
| 2045 | is_escaped = true; | ||
| 2046 | } else if (stream->next == '"') { | ||
| 2047 | array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); | ||
| 2048 | stream_advance(stream); | ||
| 2049 | return TSQueryErrorNone; | ||
| 2050 | } else if (stream->next == '\n') { | ||
| 2051 | stream_reset(stream, string_start); | ||
| 2052 | return TSQueryErrorSyntax; | ||
| 2053 | } | ||
| 2054 | } | ||
| 2055 | if (!stream_advance(stream)) { | ||
| 2056 | stream_reset(stream, string_start); | ||
| 2057 | return TSQueryErrorSyntax; | ||
| 2058 | } | ||
| 2059 | } | ||
| 2060 | } | ||
| 2061 | |||
| 2062 | // Parse a single predicate associated with a pattern, adding it to the | ||
| 2063 | // query's internal `predicate_steps` array. Predicates are arbitrary | ||
| 2064 | // S-expressions associated with a pattern which are meant to be handled at | ||
| 2065 | // a higher level of abstraction, such as the Rust/JavaScript bindings. They | ||
| 2066 | // can contain '@'-prefixed capture names, double-quoted strings, and bare | ||
| 2067 | // symbols, which also represent strings. | ||
| 2068 | static TSQueryError ts_query__parse_predicate( | ||
| 2069 | TSQuery *self, | ||
| 2070 | Stream *stream | ||
| 2071 | ) { | ||
| 2072 | if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; | ||
| 2073 | const char *predicate_name = stream->input; | ||
| 2074 | stream_scan_identifier(stream); | ||
| 2075 | uint32_t length = (uint32_t)(stream->input - predicate_name); | ||
| 2076 | uint16_t id = symbol_table_insert_name( | ||
| 2077 | &self->predicate_values, | ||
| 2078 | predicate_name, | ||
| 2079 | length | ||
| 2080 | ); | ||
| 2081 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { | ||
| 2082 | .type = TSQueryPredicateStepTypeString, | ||
| 2083 | .value_id = id, | ||
| 2084 | })); | ||
| 2085 | stream_skip_whitespace(stream); | ||
| 2086 | |||
| 2087 | for (;;) { | ||
| 2088 | if (stream->next == ')') { | ||
| 2089 | stream_advance(stream); | ||
| 2090 | stream_skip_whitespace(stream); | ||
| 2091 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { | ||
| 2092 | .type = TSQueryPredicateStepTypeDone, | ||
| 2093 | .value_id = 0, | ||
| 2094 | })); | ||
| 2095 | break; | ||
| 2096 | } | ||
| 2097 | |||
| 2098 | // Parse an '@'-prefixed capture name | ||
| 2099 | else if (stream->next == '@') { | ||
| 2100 | stream_advance(stream); | ||
| 2101 | |||
| 2102 | // Parse the capture name | ||
| 2103 | if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; | ||
| 2104 | const char *capture_name = stream->input; | ||
| 2105 | stream_scan_identifier(stream); | ||
| 2106 | uint32_t capture_length = (uint32_t)(stream->input - capture_name); | ||
| 2107 | |||
| 2108 | // Add the capture id to the first step of the pattern | ||
| 2109 | int capture_id = symbol_table_id_for_name( | ||
| 2110 | &self->captures, | ||
| 2111 | capture_name, | ||
| 2112 | capture_length | ||
| 2113 | ); | ||
| 2114 | if (capture_id == -1) { | ||
| 2115 | stream_reset(stream, capture_name); | ||
| 2116 | return TSQueryErrorCapture; | ||
| 2117 | } | ||
| 2118 | |||
| 2119 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { | ||
| 2120 | .type = TSQueryPredicateStepTypeCapture, | ||
| 2121 | .value_id = capture_id, | ||
| 2122 | })); | ||
| 2123 | } | ||
| 2124 | |||
| 2125 | // Parse a string literal | ||
| 2126 | else if (stream->next == '"') { | ||
| 2127 | TSQueryError e = ts_query__parse_string_literal(self, stream); | ||
| 2128 | if (e) return e; | ||
| 2129 | uint16_t query_id = symbol_table_insert_name( | ||
| 2130 | &self->predicate_values, | ||
| 2131 | self->string_buffer.contents, | ||
| 2132 | self->string_buffer.size | ||
| 2133 | ); | ||
| 2134 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { | ||
| 2135 | .type = TSQueryPredicateStepTypeString, | ||
| 2136 | .value_id = query_id, | ||
| 2137 | })); | ||
| 2138 | } | ||
| 2139 | |||
| 2140 | // Parse a bare symbol | ||
| 2141 | else if (stream_is_ident_start(stream)) { | ||
| 2142 | const char *symbol_start = stream->input; | ||
| 2143 | stream_scan_identifier(stream); | ||
| 2144 | uint32_t symbol_length = (uint32_t)(stream->input - symbol_start); | ||
| 2145 | uint16_t query_id = symbol_table_insert_name( | ||
| 2146 | &self->predicate_values, | ||
| 2147 | symbol_start, | ||
| 2148 | symbol_length | ||
| 2149 | ); | ||
| 2150 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { | ||
| 2151 | .type = TSQueryPredicateStepTypeString, | ||
| 2152 | .value_id = query_id, | ||
| 2153 | })); | ||
| 2154 | } | ||
| 2155 | |||
| 2156 | else { | ||
| 2157 | return TSQueryErrorSyntax; | ||
| 2158 | } | ||
| 2159 | |||
| 2160 | stream_skip_whitespace(stream); | ||
| 2161 | } | ||
| 2162 | |||
| 2163 | return 0; | ||
| 2164 | } | ||
| 2165 | |||
| 2166 | // Read one S-expression pattern from the stream, and incorporate it into | ||
| 2167 | // the query's internal state machine representation. For nested patterns, | ||
| 2168 | // this function calls itself recursively. | ||
| 2169 | // | ||
| 2170 | // The caller is responsible for passing in a dedicated CaptureQuantifiers. | ||
| 2171 | // These should not be shared between different calls to ts_query__parse_pattern! | ||
| 2172 | static TSQueryError ts_query__parse_pattern( | ||
| 2173 | TSQuery *self, | ||
| 2174 | Stream *stream, | ||
| 2175 | uint32_t depth, | ||
| 2176 | bool is_immediate, | ||
| 2177 | CaptureQuantifiers *capture_quantifiers | ||
| 2178 | ) { | ||
| 2179 | if (stream->next == 0) return TSQueryErrorSyntax; | ||
| 2180 | if (stream->next == ')' || stream->next == ']') return PARENT_DONE; | ||
| 2181 | |||
| 2182 | const uint32_t starting_step_index = self->steps.size; | ||
| 2183 | |||
| 2184 | // Store the byte offset of each step in the query. | ||
| 2185 | if ( | ||
| 2186 | self->step_offsets.size == 0 || | ||
| 2187 | array_back(&self->step_offsets)->step_index != starting_step_index | ||
| 2188 | ) { | ||
| 2189 | array_push(&self->step_offsets, ((StepOffset) { | ||
| 2190 | .step_index = starting_step_index, | ||
| 2191 | .byte_offset = stream_offset(stream), | ||
| 2192 | })); | ||
| 2193 | } | ||
| 2194 | |||
| 2195 | // An open bracket is the start of an alternation. | ||
| 2196 | if (stream->next == '[') { | ||
| 2197 | stream_advance(stream); | ||
| 2198 | stream_skip_whitespace(stream); | ||
| 2199 | |||
| 2200 | // Parse each branch, and add a placeholder step in between the branches. | ||
| 2201 | Array(uint32_t) branch_step_indices = array_new(); | ||
| 2202 | CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); | ||
| 2203 | for (;;) { | ||
| 2204 | uint32_t start_index = self->steps.size; | ||
| 2205 | TSQueryError e = ts_query__parse_pattern( | ||
| 2206 | self, | ||
| 2207 | stream, | ||
| 2208 | depth, | ||
| 2209 | is_immediate, | ||
| 2210 | &branch_capture_quantifiers | ||
| 2211 | ); | ||
| 2212 | |||
| 2213 | if (e == PARENT_DONE) { | ||
| 2214 | if (stream->next == ']' && branch_step_indices.size > 0) { | ||
| 2215 | stream_advance(stream); | ||
| 2216 | break; | ||
| 2217 | } | ||
| 2218 | e = TSQueryErrorSyntax; | ||
| 2219 | } | ||
| 2220 | if (e) { | ||
| 2221 | capture_quantifiers_delete(&branch_capture_quantifiers); | ||
| 2222 | array_delete(&branch_step_indices); | ||
| 2223 | return e; | ||
| 2224 | } | ||
| 2225 | |||
| 2226 | if (start_index == starting_step_index) { | ||
| 2227 | capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); | ||
| 2228 | } else { | ||
| 2229 | capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); | ||
| 2230 | } | ||
| 2231 | |||
| 2232 | array_push(&branch_step_indices, start_index); | ||
| 2233 | array_push(&self->steps, query_step__new(0, depth, false)); | ||
| 2234 | capture_quantifiers_clear(&branch_capture_quantifiers); | ||
| 2235 | } | ||
| 2236 | (void)array_pop(&self->steps); | ||
| 2237 | |||
| 2238 | // For all of the branches except for the last one, add the subsequent branch as an | ||
| 2239 | // alternative, and link the end of the branch to the current end of the steps. | ||
| 2240 | for (unsigned i = 0; i < branch_step_indices.size - 1; i++) { | ||
| 2241 | uint32_t step_index = branch_step_indices.contents[i]; | ||
| 2242 | uint32_t next_step_index = branch_step_indices.contents[i + 1]; | ||
| 2243 | QueryStep *start_step = &self->steps.contents[step_index]; | ||
| 2244 | QueryStep *end_step = &self->steps.contents[next_step_index - 1]; | ||
| 2245 | start_step->alternative_index = next_step_index; | ||
| 2246 | end_step->alternative_index = self->steps.size; | ||
| 2247 | end_step->is_dead_end = true; | ||
| 2248 | } | ||
| 2249 | |||
| 2250 | capture_quantifiers_delete(&branch_capture_quantifiers); | ||
| 2251 | array_delete(&branch_step_indices); | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | // An open parenthesis can be the start of three possible constructs: | ||
| 2255 | // * A grouped sequence | ||
| 2256 | // * A predicate | ||
| 2257 | // * A named node | ||
| 2258 | else if (stream->next == '(') { | ||
| 2259 | stream_advance(stream); | ||
| 2260 | stream_skip_whitespace(stream); | ||
| 2261 | |||
| 2262 | // If this parenthesis is followed by a node, then it represents a grouped sequence. | ||
| 2263 | if (stream->next == '(' || stream->next == '"' || stream->next == '[') { | ||
| 2264 | bool child_is_immediate = is_immediate; | ||
| 2265 | CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); | ||
| 2266 | for (;;) { | ||
| 2267 | if (stream->next == '.') { | ||
| 2268 | child_is_immediate = true; | ||
| 2269 | stream_advance(stream); | ||
| 2270 | stream_skip_whitespace(stream); | ||
| 2271 | } | ||
| 2272 | TSQueryError e = ts_query__parse_pattern( | ||
| 2273 | self, | ||
| 2274 | stream, | ||
| 2275 | depth, | ||
| 2276 | child_is_immediate, | ||
| 2277 | &child_capture_quantifiers | ||
| 2278 | ); | ||
| 2279 | if (e == PARENT_DONE) { | ||
| 2280 | if (stream->next == ')') { | ||
| 2281 | stream_advance(stream); | ||
| 2282 | break; | ||
| 2283 | } | ||
| 2284 | e = TSQueryErrorSyntax; | ||
| 2285 | } | ||
| 2286 | if (e) { | ||
| 2287 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2288 | return e; | ||
| 2289 | } | ||
| 2290 | |||
| 2291 | capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); | ||
| 2292 | capture_quantifiers_clear(&child_capture_quantifiers); | ||
| 2293 | child_is_immediate = false; | ||
| 2294 | } | ||
| 2295 | |||
| 2296 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2297 | } | ||
| 2298 | |||
| 2299 | // A dot/pound character indicates the start of a predicate. | ||
| 2300 | else if (stream->next == '.' || stream->next == '#') { | ||
| 2301 | stream_advance(stream); | ||
| 2302 | return ts_query__parse_predicate(self, stream); | ||
| 2303 | } | ||
| 2304 | |||
| 2305 | // Otherwise, this parenthesis is the start of a named node. | ||
| 2306 | else { | ||
| 2307 | TSSymbol symbol; | ||
| 2308 | |||
| 2309 | // Parse a normal node name | ||
| 2310 | if (stream_is_ident_start(stream)) { | ||
| 2311 | const char *node_name = stream->input; | ||
| 2312 | stream_scan_identifier(stream); | ||
| 2313 | uint32_t length = (uint32_t)(stream->input - node_name); | ||
| 2314 | |||
| 2315 | // TODO - remove. | ||
| 2316 | // For temporary backward compatibility, handle predicates without the leading '#' sign. | ||
| 2317 | if (length > 0 && (node_name[length - 1] == '!' || node_name[length - 1] == '?')) { | ||
| 2318 | stream_reset(stream, node_name); | ||
| 2319 | return ts_query__parse_predicate(self, stream); | ||
| 2320 | } | ||
| 2321 | |||
| 2322 | // Parse the wildcard symbol | ||
| 2323 | else if (length == 1 && node_name[0] == '_') { | ||
| 2324 | symbol = WILDCARD_SYMBOL; | ||
| 2325 | } | ||
| 2326 | |||
| 2327 | else { | ||
| 2328 | symbol = ts_language_symbol_for_name( | ||
| 2329 | self->language, | ||
| 2330 | node_name, | ||
| 2331 | length, | ||
| 2332 | true | ||
| 2333 | ); | ||
| 2334 | if (!symbol) { | ||
| 2335 | stream_reset(stream, node_name); | ||
| 2336 | return TSQueryErrorNodeType; | ||
| 2337 | } | ||
| 2338 | } | ||
| 2339 | } else { | ||
| 2340 | return TSQueryErrorSyntax; | ||
| 2341 | } | ||
| 2342 | |||
| 2343 | // Add a step for the node. | ||
| 2344 | array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); | ||
| 2345 | QueryStep *step = array_back(&self->steps); | ||
| 2346 | if (ts_language_symbol_metadata(self->language, symbol).supertype) { | ||
| 2347 | step->supertype_symbol = step->symbol; | ||
| 2348 | step->symbol = WILDCARD_SYMBOL; | ||
| 2349 | } | ||
| 2350 | if (symbol == WILDCARD_SYMBOL) { | ||
| 2351 | step->is_named = true; | ||
| 2352 | } | ||
| 2353 | |||
| 2354 | stream_skip_whitespace(stream); | ||
| 2355 | |||
| 2356 | if (stream->next == '/') { | ||
| 2357 | stream_advance(stream); | ||
| 2358 | if (!stream_is_ident_start(stream)) { | ||
| 2359 | return TSQueryErrorSyntax; | ||
| 2360 | } | ||
| 2361 | |||
| 2362 | const char *node_name = stream->input; | ||
| 2363 | stream_scan_identifier(stream); | ||
| 2364 | uint32_t length = (uint32_t)(stream->input - node_name); | ||
| 2365 | |||
| 2366 | step->symbol = ts_language_symbol_for_name( | ||
| 2367 | self->language, | ||
| 2368 | node_name, | ||
| 2369 | length, | ||
| 2370 | true | ||
| 2371 | ); | ||
| 2372 | if (!step->symbol) { | ||
| 2373 | stream_reset(stream, node_name); | ||
| 2374 | return TSQueryErrorNodeType; | ||
| 2375 | } | ||
| 2376 | |||
| 2377 | stream_skip_whitespace(stream); | ||
| 2378 | } | ||
| 2379 | |||
| 2380 | // Parse the child patterns | ||
| 2381 | bool child_is_immediate = false; | ||
| 2382 | uint16_t last_child_step_index = 0; | ||
| 2383 | uint16_t negated_field_count = 0; | ||
| 2384 | TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; | ||
| 2385 | CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); | ||
| 2386 | for (;;) { | ||
| 2387 | // Parse a negated field assertion | ||
| 2388 | if (stream->next == '!') { | ||
| 2389 | stream_advance(stream); | ||
| 2390 | stream_skip_whitespace(stream); | ||
| 2391 | if (!stream_is_ident_start(stream)) { | ||
| 2392 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2393 | return TSQueryErrorSyntax; | ||
| 2394 | } | ||
| 2395 | const char *field_name = stream->input; | ||
| 2396 | stream_scan_identifier(stream); | ||
| 2397 | uint32_t length = (uint32_t)(stream->input - field_name); | ||
| 2398 | stream_skip_whitespace(stream); | ||
| 2399 | |||
| 2400 | TSFieldId field_id = ts_language_field_id_for_name( | ||
| 2401 | self->language, | ||
| 2402 | field_name, | ||
| 2403 | length | ||
| 2404 | ); | ||
| 2405 | if (!field_id) { | ||
| 2406 | stream->input = field_name; | ||
| 2407 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2408 | return TSQueryErrorField; | ||
| 2409 | } | ||
| 2410 | |||
| 2411 | // Keep the field ids sorted. | ||
| 2412 | if (negated_field_count < MAX_NEGATED_FIELD_COUNT) { | ||
| 2413 | negated_field_ids[negated_field_count] = field_id; | ||
| 2414 | negated_field_count++; | ||
| 2415 | } | ||
| 2416 | |||
| 2417 | continue; | ||
| 2418 | } | ||
| 2419 | |||
| 2420 | // Parse a sibling anchor | ||
| 2421 | if (stream->next == '.') { | ||
| 2422 | child_is_immediate = true; | ||
| 2423 | stream_advance(stream); | ||
| 2424 | stream_skip_whitespace(stream); | ||
| 2425 | } | ||
| 2426 | |||
| 2427 | uint16_t step_index = self->steps.size; | ||
| 2428 | TSQueryError e = ts_query__parse_pattern( | ||
| 2429 | self, | ||
| 2430 | stream, | ||
| 2431 | depth + 1, | ||
| 2432 | child_is_immediate, | ||
| 2433 | &child_capture_quantifiers | ||
| 2434 | ); | ||
| 2435 | if (e == PARENT_DONE) { | ||
| 2436 | if (stream->next == ')') { | ||
| 2437 | if (child_is_immediate) { | ||
| 2438 | if (last_child_step_index == 0) { | ||
| 2439 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2440 | return TSQueryErrorSyntax; | ||
| 2441 | } | ||
| 2442 | self->steps.contents[last_child_step_index].is_last_child = true; | ||
| 2443 | } | ||
| 2444 | |||
| 2445 | if (negated_field_count) { | ||
| 2446 | ts_query__add_negated_fields( | ||
| 2447 | self, | ||
| 2448 | starting_step_index, | ||
| 2449 | negated_field_ids, | ||
| 2450 | negated_field_count | ||
| 2451 | ); | ||
| 2452 | } | ||
| 2453 | |||
| 2454 | stream_advance(stream); | ||
| 2455 | break; | ||
| 2456 | } | ||
| 2457 | e = TSQueryErrorSyntax; | ||
| 2458 | } | ||
| 2459 | if (e) { | ||
| 2460 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2461 | return e; | ||
| 2462 | } | ||
| 2463 | |||
| 2464 | capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); | ||
| 2465 | |||
| 2466 | last_child_step_index = step_index; | ||
| 2467 | child_is_immediate = false; | ||
| 2468 | capture_quantifiers_clear(&child_capture_quantifiers); | ||
| 2469 | } | ||
| 2470 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2471 | } | ||
| 2472 | } | ||
| 2473 | |||
| 2474 | // Parse a wildcard pattern | ||
| 2475 | else if (stream->next == '_') { | ||
| 2476 | stream_advance(stream); | ||
| 2477 | stream_skip_whitespace(stream); | ||
| 2478 | |||
| 2479 | // Add a step that matches any kind of node | ||
| 2480 | array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); | ||
| 2481 | } | ||
| 2482 | |||
| 2483 | // Parse a double-quoted anonymous leaf node expression | ||
| 2484 | else if (stream->next == '"') { | ||
| 2485 | const char *string_start = stream->input; | ||
| 2486 | TSQueryError e = ts_query__parse_string_literal(self, stream); | ||
| 2487 | if (e) return e; | ||
| 2488 | |||
| 2489 | // Add a step for the node | ||
| 2490 | TSSymbol symbol = ts_language_symbol_for_name( | ||
| 2491 | self->language, | ||
| 2492 | self->string_buffer.contents, | ||
| 2493 | self->string_buffer.size, | ||
| 2494 | false | ||
| 2495 | ); | ||
| 2496 | if (!symbol) { | ||
| 2497 | stream_reset(stream, string_start + 1); | ||
| 2498 | return TSQueryErrorNodeType; | ||
| 2499 | } | ||
| 2500 | array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); | ||
| 2501 | } | ||
| 2502 | |||
| 2503 | // Parse a field-prefixed pattern | ||
| 2504 | else if (stream_is_ident_start(stream)) { | ||
| 2505 | // Parse the field name | ||
| 2506 | const char *field_name = stream->input; | ||
| 2507 | stream_scan_identifier(stream); | ||
| 2508 | uint32_t length = (uint32_t)(stream->input - field_name); | ||
| 2509 | stream_skip_whitespace(stream); | ||
| 2510 | |||
| 2511 | if (stream->next != ':') { | ||
| 2512 | stream_reset(stream, field_name); | ||
| 2513 | return TSQueryErrorSyntax; | ||
| 2514 | } | ||
| 2515 | stream_advance(stream); | ||
| 2516 | stream_skip_whitespace(stream); | ||
| 2517 | |||
| 2518 | // Parse the pattern | ||
| 2519 | CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); | ||
| 2520 | TSQueryError e = ts_query__parse_pattern( | ||
| 2521 | self, | ||
| 2522 | stream, | ||
| 2523 | depth, | ||
| 2524 | is_immediate, | ||
| 2525 | &field_capture_quantifiers | ||
| 2526 | ); | ||
| 2527 | if (e) { | ||
| 2528 | capture_quantifiers_delete(&field_capture_quantifiers); | ||
| 2529 | if (e == PARENT_DONE) e = TSQueryErrorSyntax; | ||
| 2530 | return e; | ||
| 2531 | } | ||
| 2532 | |||
| 2533 | // Add the field name to the first step of the pattern | ||
| 2534 | TSFieldId field_id = ts_language_field_id_for_name( | ||
| 2535 | self->language, | ||
| 2536 | field_name, | ||
| 2537 | length | ||
| 2538 | ); | ||
| 2539 | if (!field_id) { | ||
| 2540 | stream->input = field_name; | ||
| 2541 | return TSQueryErrorField; | ||
| 2542 | } | ||
| 2543 | |||
| 2544 | uint32_t step_index = starting_step_index; | ||
| 2545 | QueryStep *step = &self->steps.contents[step_index]; | ||
| 2546 | for (;;) { | ||
| 2547 | step->field = field_id; | ||
| 2548 | if ( | ||
| 2549 | step->alternative_index != NONE && | ||
| 2550 | step->alternative_index > step_index && | ||
| 2551 | step->alternative_index < self->steps.size | ||
| 2552 | ) { | ||
| 2553 | step_index = step->alternative_index; | ||
| 2554 | step = &self->steps.contents[step_index]; | ||
| 2555 | } else { | ||
| 2556 | break; | ||
| 2557 | } | ||
| 2558 | } | ||
| 2559 | |||
| 2560 | capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers); | ||
| 2561 | capture_quantifiers_delete(&field_capture_quantifiers); | ||
| 2562 | } | ||
| 2563 | |||
| 2564 | else { | ||
| 2565 | return TSQueryErrorSyntax; | ||
| 2566 | } | ||
| 2567 | |||
| 2568 | stream_skip_whitespace(stream); | ||
| 2569 | |||
| 2570 | // Parse suffixes modifiers for this pattern | ||
| 2571 | TSQuantifier quantifier = TSQuantifierOne; | ||
| 2572 | for (;;) { | ||
| 2573 | // Parse the one-or-more operator. | ||
| 2574 | if (stream->next == '+') { | ||
| 2575 | quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); | ||
| 2576 | |||
| 2577 | stream_advance(stream); | ||
| 2578 | stream_skip_whitespace(stream); | ||
| 2579 | |||
| 2580 | QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); | ||
| 2581 | repeat_step.alternative_index = starting_step_index; | ||
| 2582 | repeat_step.is_pass_through = true; | ||
| 2583 | repeat_step.alternative_is_immediate = true; | ||
| 2584 | array_push(&self->steps, repeat_step); | ||
| 2585 | } | ||
| 2586 | |||
| 2587 | // Parse the zero-or-more repetition operator. | ||
| 2588 | else if (stream->next == '*') { | ||
| 2589 | quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); | ||
| 2590 | |||
| 2591 | stream_advance(stream); | ||
| 2592 | stream_skip_whitespace(stream); | ||
| 2593 | |||
| 2594 | QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); | ||
| 2595 | repeat_step.alternative_index = starting_step_index; | ||
| 2596 | repeat_step.is_pass_through = true; | ||
| 2597 | repeat_step.alternative_is_immediate = true; | ||
| 2598 | array_push(&self->steps, repeat_step); | ||
| 2599 | |||
| 2600 | // Stop when `step->alternative_index` is `NONE` or it points to | ||
| 2601 | // `repeat_step` or beyond. Note that having just been pushed, | ||
| 2602 | // `repeat_step` occupies slot `self->steps.size - 1`. | ||
| 2603 | QueryStep *step = &self->steps.contents[starting_step_index]; | ||
| 2604 | while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) { | ||
| 2605 | step = &self->steps.contents[step->alternative_index]; | ||
| 2606 | } | ||
| 2607 | step->alternative_index = self->steps.size; | ||
| 2608 | } | ||
| 2609 | |||
| 2610 | // Parse the optional operator. | ||
| 2611 | else if (stream->next == '?') { | ||
| 2612 | quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); | ||
| 2613 | |||
| 2614 | stream_advance(stream); | ||
| 2615 | stream_skip_whitespace(stream); | ||
| 2616 | |||
| 2617 | QueryStep *step = &self->steps.contents[starting_step_index]; | ||
| 2618 | while (step->alternative_index != NONE && step->alternative_index < self->steps.size) { | ||
| 2619 | step = &self->steps.contents[step->alternative_index]; | ||
| 2620 | } | ||
| 2621 | step->alternative_index = self->steps.size; | ||
| 2622 | } | ||
| 2623 | |||
| 2624 | // Parse an '@'-prefixed capture pattern | ||
| 2625 | else if (stream->next == '@') { | ||
| 2626 | stream_advance(stream); | ||
| 2627 | if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; | ||
| 2628 | const char *capture_name = stream->input; | ||
| 2629 | stream_scan_identifier(stream); | ||
| 2630 | uint32_t length = (uint32_t)(stream->input - capture_name); | ||
| 2631 | stream_skip_whitespace(stream); | ||
| 2632 | |||
| 2633 | // Add the capture id to the first step of the pattern | ||
| 2634 | uint16_t capture_id = symbol_table_insert_name( | ||
| 2635 | &self->captures, | ||
| 2636 | capture_name, | ||
| 2637 | length | ||
| 2638 | ); | ||
| 2639 | |||
| 2640 | // Add the capture quantifier | ||
| 2641 | capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); | ||
| 2642 | |||
| 2643 | uint32_t step_index = starting_step_index; | ||
| 2644 | for (;;) { | ||
| 2645 | QueryStep *step = &self->steps.contents[step_index]; | ||
| 2646 | query_step__add_capture(step, capture_id); | ||
| 2647 | if ( | ||
| 2648 | step->alternative_index != NONE && | ||
| 2649 | step->alternative_index > step_index && | ||
| 2650 | step->alternative_index < self->steps.size | ||
| 2651 | ) { | ||
| 2652 | step_index = step->alternative_index; | ||
| 2653 | step = &self->steps.contents[step_index]; | ||
| 2654 | } else { | ||
| 2655 | break; | ||
| 2656 | } | ||
| 2657 | } | ||
| 2658 | } | ||
| 2659 | |||
| 2660 | // No more suffix modifiers | ||
| 2661 | else { | ||
| 2662 | break; | ||
| 2663 | } | ||
| 2664 | } | ||
| 2665 | |||
| 2666 | capture_quantifiers_mul(capture_quantifiers, quantifier); | ||
| 2667 | |||
| 2668 | return 0; | ||
| 2669 | } | ||
| 2670 | |||
| 2671 | TSQuery *ts_query_new( | ||
| 2672 | const TSLanguage *language, | ||
| 2673 | const char *source, | ||
| 2674 | uint32_t source_len, | ||
| 2675 | uint32_t *error_offset, | ||
| 2676 | TSQueryError *error_type | ||
| 2677 | ) { | ||
| 2678 | if ( | ||
| 2679 | !language || | ||
| 2680 | language->version > TREE_SITTER_LANGUAGE_VERSION || | ||
| 2681 | language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION | ||
| 2682 | ) { | ||
| 2683 | *error_type = TSQueryErrorLanguage; | ||
| 2684 | return NULL; | ||
| 2685 | } | ||
| 2686 | |||
| 2687 | TSQuery *self = ts_malloc(sizeof(TSQuery)); | ||
| 2688 | *self = (TSQuery) { | ||
| 2689 | .steps = array_new(), | ||
| 2690 | .pattern_map = array_new(), | ||
| 2691 | .captures = symbol_table_new(), | ||
| 2692 | .capture_quantifiers = array_new(), | ||
| 2693 | .predicate_values = symbol_table_new(), | ||
| 2694 | .predicate_steps = array_new(), | ||
| 2695 | .patterns = array_new(), | ||
| 2696 | .step_offsets = array_new(), | ||
| 2697 | .string_buffer = array_new(), | ||
| 2698 | .negated_fields = array_new(), | ||
| 2699 | .repeat_symbols_with_rootless_patterns = array_new(), | ||
| 2700 | .wildcard_root_pattern_count = 0, | ||
| 2701 | .language = language, | ||
| 2702 | }; | ||
| 2703 | |||
| 2704 | array_push(&self->negated_fields, 0); | ||
| 2705 | |||
| 2706 | // Parse all of the S-expressions in the given string. | ||
| 2707 | Stream stream = stream_new(source, source_len); | ||
| 2708 | stream_skip_whitespace(&stream); | ||
| 2709 | while (stream.input < stream.end) { | ||
| 2710 | uint32_t pattern_index = self->patterns.size; | ||
| 2711 | uint32_t start_step_index = self->steps.size; | ||
| 2712 | uint32_t start_predicate_step_index = self->predicate_steps.size; | ||
| 2713 | array_push(&self->patterns, ((QueryPattern) { | ||
| 2714 | .step = (Slice) {.offset = start_step_index}, | ||
| 2715 | .predicate_step = (Slice) {.offset = start_predicate_step_index}, | ||
| 2716 | .start_byte = stream_offset(&stream), | ||
| 2717 | .is_non_local = false, | ||
| 2718 | })); | ||
| 2719 | CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); | ||
| 2720 | *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); | ||
| 2721 | array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); | ||
| 2722 | |||
| 2723 | QueryPattern *pattern = array_back(&self->patterns); | ||
| 2724 | pattern->step.length = self->steps.size - start_step_index; | ||
| 2725 | pattern->predicate_step.length = self->predicate_steps.size - start_predicate_step_index; | ||
| 2726 | |||
| 2727 | // If any pattern could not be parsed, then report the error information | ||
| 2728 | // and terminate. | ||
| 2729 | if (*error_type) { | ||
| 2730 | if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; | ||
| 2731 | *error_offset = stream_offset(&stream); | ||
| 2732 | capture_quantifiers_delete(&capture_quantifiers); | ||
| 2733 | ts_query_delete(self); | ||
| 2734 | return NULL; | ||
| 2735 | } | ||
| 2736 | |||
| 2737 | // Maintain a list of capture quantifiers for each pattern | ||
| 2738 | array_push(&self->capture_quantifiers, capture_quantifiers); | ||
| 2739 | |||
| 2740 | // Maintain a map that can look up patterns for a given root symbol. | ||
| 2741 | uint16_t wildcard_root_alternative_index = NONE; | ||
| 2742 | for (;;) { | ||
| 2743 | QueryStep *step = &self->steps.contents[start_step_index]; | ||
| 2744 | |||
| 2745 | // If a pattern has a wildcard at its root, but it has a non-wildcard child, | ||
| 2746 | // then optimize the matching process by skipping matching the wildcard. | ||
| 2747 | // Later, during the matching process, the query cursor will check that | ||
| 2748 | // there is a parent node, and capture it if necessary. | ||
| 2749 | if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) { | ||
| 2750 | QueryStep *second_step = &self->steps.contents[start_step_index + 1]; | ||
| 2751 | if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) { | ||
| 2752 | wildcard_root_alternative_index = step->alternative_index; | ||
| 2753 | start_step_index += 1; | ||
| 2754 | step = second_step; | ||
| 2755 | } | ||
| 2756 | } | ||
| 2757 | |||
| 2758 | // Determine whether the pattern has a single root node. This affects | ||
| 2759 | // decisions about whether or not to start matching the pattern when | ||
| 2760 | // a query cursor has a range restriction or when immediately within an | ||
| 2761 | // error node. | ||
| 2762 | uint32_t start_depth = step->depth; | ||
| 2763 | bool is_rooted = start_depth == 0; | ||
| 2764 | for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { | ||
| 2765 | QueryStep *child_step = &self->steps.contents[step_index]; | ||
| 2766 | if (child_step->is_dead_end) break; | ||
| 2767 | if (child_step->depth == start_depth) { | ||
| 2768 | is_rooted = false; | ||
| 2769 | break; | ||
| 2770 | } | ||
| 2771 | } | ||
| 2772 | |||
| 2773 | ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) { | ||
| 2774 | .step_index = start_step_index, | ||
| 2775 | .pattern_index = pattern_index, | ||
| 2776 | .is_rooted = is_rooted | ||
| 2777 | }); | ||
| 2778 | if (step->symbol == WILDCARD_SYMBOL) { | ||
| 2779 | self->wildcard_root_pattern_count++; | ||
| 2780 | } | ||
| 2781 | |||
| 2782 | // If there are alternatives or options at the root of the pattern, | ||
| 2783 | // then add multiple entries to the pattern map. | ||
| 2784 | if (step->alternative_index != NONE) { | ||
| 2785 | start_step_index = step->alternative_index; | ||
| 2786 | } else if (wildcard_root_alternative_index != NONE) { | ||
| 2787 | start_step_index = wildcard_root_alternative_index; | ||
| 2788 | wildcard_root_alternative_index = NONE; | ||
| 2789 | } else { | ||
| 2790 | break; | ||
| 2791 | } | ||
| 2792 | } | ||
| 2793 | } | ||
| 2794 | |||
| 2795 | if (!ts_query__analyze_patterns(self, error_offset)) { | ||
| 2796 | *error_type = TSQueryErrorStructure; | ||
| 2797 | ts_query_delete(self); | ||
| 2798 | return NULL; | ||
| 2799 | } | ||
| 2800 | |||
| 2801 | array_delete(&self->string_buffer); | ||
| 2802 | return self; | ||
| 2803 | } | ||
| 2804 | |||
| 2805 | void ts_query_delete(TSQuery *self) { | ||
| 2806 | if (self) { | ||
| 2807 | array_delete(&self->steps); | ||
| 2808 | array_delete(&self->pattern_map); | ||
| 2809 | array_delete(&self->predicate_steps); | ||
| 2810 | array_delete(&self->patterns); | ||
| 2811 | array_delete(&self->step_offsets); | ||
| 2812 | array_delete(&self->string_buffer); | ||
| 2813 | array_delete(&self->negated_fields); | ||
| 2814 | array_delete(&self->repeat_symbols_with_rootless_patterns); | ||
| 2815 | symbol_table_delete(&self->captures); | ||
| 2816 | symbol_table_delete(&self->predicate_values); | ||
| 2817 | for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { | ||
| 2818 | CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index); | ||
| 2819 | capture_quantifiers_delete(capture_quantifiers); | ||
| 2820 | } | ||
| 2821 | array_delete(&self->capture_quantifiers); | ||
| 2822 | ts_free(self); | ||
| 2823 | } | ||
| 2824 | } | ||
| 2825 | |||
| 2826 | uint32_t ts_query_pattern_count(const TSQuery *self) { | ||
| 2827 | return self->patterns.size; | ||
| 2828 | } | ||
| 2829 | |||
| 2830 | uint32_t ts_query_capture_count(const TSQuery *self) { | ||
| 2831 | return self->captures.slices.size; | ||
| 2832 | } | ||
| 2833 | |||
| 2834 | uint32_t ts_query_string_count(const TSQuery *self) { | ||
| 2835 | return self->predicate_values.slices.size; | ||
| 2836 | } | ||
| 2837 | |||
| 2838 | const char *ts_query_capture_name_for_id( | ||
| 2839 | const TSQuery *self, | ||
| 2840 | uint32_t index, | ||
| 2841 | uint32_t *length | ||
| 2842 | ) { | ||
| 2843 | return symbol_table_name_for_id(&self->captures, index, length); | ||
| 2844 | } | ||
| 2845 | |||
| 2846 | TSQuantifier ts_query_capture_quantifier_for_id( | ||
| 2847 | const TSQuery *self, | ||
| 2848 | uint32_t pattern_index, | ||
| 2849 | uint32_t capture_index | ||
| 2850 | ) { | ||
| 2851 | CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index); | ||
| 2852 | return capture_quantifier_for_id(capture_quantifiers, capture_index); | ||
| 2853 | } | ||
| 2854 | |||
| 2855 | const char *ts_query_string_value_for_id( | ||
| 2856 | const TSQuery *self, | ||
| 2857 | uint32_t index, | ||
| 2858 | uint32_t *length | ||
| 2859 | ) { | ||
| 2860 | return symbol_table_name_for_id(&self->predicate_values, index, length); | ||
| 2861 | } | ||
| 2862 | |||
| 2863 | const TSQueryPredicateStep *ts_query_predicates_for_pattern( | ||
| 2864 | const TSQuery *self, | ||
| 2865 | uint32_t pattern_index, | ||
| 2866 | uint32_t *step_count | ||
| 2867 | ) { | ||
| 2868 | Slice slice = self->patterns.contents[pattern_index].predicate_step; | ||
| 2869 | *step_count = slice.length; | ||
| 2870 | if (self->predicate_steps.contents == NULL) { | ||
| 2871 | return NULL; | ||
| 2872 | } | ||
| 2873 | return &self->predicate_steps.contents[slice.offset]; | ||
| 2874 | } | ||
| 2875 | |||
| 2876 | uint32_t ts_query_start_byte_for_pattern( | ||
| 2877 | const TSQuery *self, | ||
| 2878 | uint32_t pattern_index | ||
| 2879 | ) { | ||
| 2880 | return self->patterns.contents[pattern_index].start_byte; | ||
| 2881 | } | ||
| 2882 | |||
| 2883 | bool ts_query_is_pattern_rooted( | ||
| 2884 | const TSQuery *self, | ||
| 2885 | uint32_t pattern_index | ||
| 2886 | ) { | ||
| 2887 | for (unsigned i = 0; i < self->pattern_map.size; i++) { | ||
| 2888 | PatternEntry *entry = &self->pattern_map.contents[i]; | ||
| 2889 | if (entry->pattern_index == pattern_index) { | ||
| 2890 | if (!entry->is_rooted) return false; | ||
| 2891 | } | ||
| 2892 | } | ||
| 2893 | return true; | ||
| 2894 | } | ||
| 2895 | |||
| 2896 | bool ts_query_is_pattern_non_local( | ||
| 2897 | const TSQuery *self, | ||
| 2898 | uint32_t pattern_index | ||
| 2899 | ) { | ||
| 2900 | if (pattern_index < self->patterns.size) { | ||
| 2901 | return self->patterns.contents[pattern_index].is_non_local; | ||
| 2902 | } else { | ||
| 2903 | return false; | ||
| 2904 | } | ||
| 2905 | } | ||
| 2906 | |||
| 2907 | bool ts_query_is_pattern_guaranteed_at_step( | ||
| 2908 | const TSQuery *self, | ||
| 2909 | uint32_t byte_offset | ||
| 2910 | ) { | ||
| 2911 | uint32_t step_index = UINT32_MAX; | ||
| 2912 | for (unsigned i = 0; i < self->step_offsets.size; i++) { | ||
| 2913 | StepOffset *step_offset = &self->step_offsets.contents[i]; | ||
| 2914 | if (step_offset->byte_offset > byte_offset) break; | ||
| 2915 | step_index = step_offset->step_index; | ||
| 2916 | } | ||
| 2917 | if (step_index < self->steps.size) { | ||
| 2918 | return self->steps.contents[step_index].root_pattern_guaranteed; | ||
| 2919 | } else { | ||
| 2920 | return false; | ||
| 2921 | } | ||
| 2922 | } | ||
| 2923 | |||
| 2924 | bool ts_query__step_is_fallible( | ||
| 2925 | const TSQuery *self, | ||
| 2926 | uint16_t step_index | ||
| 2927 | ) { | ||
| 2928 | assert((uint32_t)step_index + 1 < self->steps.size); | ||
| 2929 | QueryStep *step = &self->steps.contents[step_index]; | ||
| 2930 | QueryStep *next_step = &self->steps.contents[step_index + 1]; | ||
| 2931 | return ( | ||
| 2932 | next_step->depth != PATTERN_DONE_MARKER && | ||
| 2933 | next_step->depth > step->depth && | ||
| 2934 | !next_step->parent_pattern_guaranteed | ||
| 2935 | ); | ||
| 2936 | } | ||
| 2937 | |||
| 2938 | void ts_query_disable_capture( | ||
| 2939 | TSQuery *self, | ||
| 2940 | const char *name, | ||
| 2941 | uint32_t length | ||
| 2942 | ) { | ||
| 2943 | // Remove capture information for any pattern step that previously | ||
| 2944 | // captured with the given name. | ||
| 2945 | int id = symbol_table_id_for_name(&self->captures, name, length); | ||
| 2946 | if (id != -1) { | ||
| 2947 | for (unsigned i = 0; i < self->steps.size; i++) { | ||
| 2948 | QueryStep *step = &self->steps.contents[i]; | ||
| 2949 | query_step__remove_capture(step, id); | ||
| 2950 | } | ||
| 2951 | } | ||
| 2952 | } | ||
| 2953 | |||
| 2954 | void ts_query_disable_pattern( | ||
| 2955 | TSQuery *self, | ||
| 2956 | uint32_t pattern_index | ||
| 2957 | ) { | ||
| 2958 | // Remove the given pattern from the pattern map. Its steps will still | ||
| 2959 | // be in the `steps` array, but they will never be read. | ||
| 2960 | for (unsigned i = 0; i < self->pattern_map.size; i++) { | ||
| 2961 | PatternEntry *pattern = &self->pattern_map.contents[i]; | ||
| 2962 | if (pattern->pattern_index == pattern_index) { | ||
| 2963 | array_erase(&self->pattern_map, i); | ||
| 2964 | i--; | ||
| 2965 | } | ||
| 2966 | } | ||
| 2967 | } | ||
| 2968 | |||
| 2969 | /*************** | ||
| 2970 | * QueryCursor | ||
| 2971 | ***************/ | ||
| 2972 | |||
| 2973 | TSQueryCursor *ts_query_cursor_new(void) { | ||
| 2974 | TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); | ||
| 2975 | *self = (TSQueryCursor) { | ||
| 2976 | .did_exceed_match_limit = false, | ||
| 2977 | .ascending = false, | ||
| 2978 | .halted = false, | ||
| 2979 | .states = array_new(), | ||
| 2980 | .finished_states = array_new(), | ||
| 2981 | .capture_list_pool = capture_list_pool_new(), | ||
| 2982 | .start_byte = 0, | ||
| 2983 | .end_byte = UINT32_MAX, | ||
| 2984 | .start_point = {0, 0}, | ||
| 2985 | .end_point = POINT_MAX, | ||
| 2986 | .max_start_depth = UINT32_MAX, | ||
| 2987 | }; | ||
| 2988 | array_reserve(&self->states, 8); | ||
| 2989 | array_reserve(&self->finished_states, 8); | ||
| 2990 | return self; | ||
| 2991 | } | ||
| 2992 | |||
| 2993 | void ts_query_cursor_delete(TSQueryCursor *self) { | ||
| 2994 | array_delete(&self->states); | ||
| 2995 | array_delete(&self->finished_states); | ||
| 2996 | ts_tree_cursor_delete(&self->cursor); | ||
| 2997 | capture_list_pool_delete(&self->capture_list_pool); | ||
| 2998 | ts_free(self); | ||
| 2999 | } | ||
| 3000 | |||
| 3001 | bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) { | ||
| 3002 | return self->did_exceed_match_limit; | ||
| 3003 | } | ||
| 3004 | |||
| 3005 | uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) { | ||
| 3006 | return self->capture_list_pool.max_capture_list_count; | ||
| 3007 | } | ||
| 3008 | |||
| 3009 | void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { | ||
| 3010 | self->capture_list_pool.max_capture_list_count = limit; | ||
| 3011 | } | ||
| 3012 | |||
| 3013 | #ifdef DEBUG_EXECUTE_QUERY | ||
| 3014 | #define LOG(...) fprintf(stderr, __VA_ARGS__) | ||
| 3015 | #else | ||
| 3016 | #define LOG(...) | ||
| 3017 | #endif | ||
| 3018 | |||
| 3019 | void ts_query_cursor_exec( | ||
| 3020 | TSQueryCursor *self, | ||
| 3021 | const TSQuery *query, | ||
| 3022 | TSNode node | ||
| 3023 | ) { | ||
| 3024 | if (query) { | ||
| 3025 | LOG("query steps:\n"); | ||
| 3026 | for (unsigned i = 0; i < query->steps.size; i++) { | ||
| 3027 | QueryStep *step = &query->steps.contents[i]; | ||
| 3028 | LOG(" %u: {", i); | ||
| 3029 | if (step->depth == PATTERN_DONE_MARKER) { | ||
| 3030 | LOG("DONE"); | ||
| 3031 | } else if (step->is_dead_end) { | ||
| 3032 | LOG("dead_end"); | ||
| 3033 | } else if (step->is_pass_through) { | ||
| 3034 | LOG("pass_through"); | ||
| 3035 | } else if (step->symbol != WILDCARD_SYMBOL) { | ||
| 3036 | LOG("symbol: %s", query->language->symbol_names[step->symbol]); | ||
| 3037 | } else { | ||
| 3038 | LOG("symbol: *"); | ||
| 3039 | } | ||
| 3040 | if (step->field) { | ||
| 3041 | LOG(", field: %s", query->language->field_names[step->field]); | ||
| 3042 | } | ||
| 3043 | if (step->alternative_index != NONE) { | ||
| 3044 | LOG(", alternative: %u", step->alternative_index); | ||
| 3045 | } | ||
| 3046 | LOG("},\n"); | ||
| 3047 | } | ||
| 3048 | } | ||
| 3049 | |||
| 3050 | array_clear(&self->states); | ||
| 3051 | array_clear(&self->finished_states); | ||
| 3052 | ts_tree_cursor_reset(&self->cursor, node); | ||
| 3053 | capture_list_pool_reset(&self->capture_list_pool); | ||
| 3054 | self->on_visible_node = true; | ||
| 3055 | self->next_state_id = 0; | ||
| 3056 | self->depth = 0; | ||
| 3057 | self->ascending = false; | ||
| 3058 | self->halted = false; | ||
| 3059 | self->query = query; | ||
| 3060 | self->did_exceed_match_limit = false; | ||
| 3061 | } | ||
| 3062 | |||
| 3063 | void ts_query_cursor_set_byte_range( | ||
| 3064 | TSQueryCursor *self, | ||
| 3065 | uint32_t start_byte, | ||
| 3066 | uint32_t end_byte | ||
| 3067 | ) { | ||
| 3068 | if (end_byte == 0) { | ||
| 3069 | end_byte = UINT32_MAX; | ||
| 3070 | } | ||
| 3071 | self->start_byte = start_byte; | ||
| 3072 | self->end_byte = end_byte; | ||
| 3073 | } | ||
| 3074 | |||
| 3075 | void ts_query_cursor_set_point_range( | ||
| 3076 | TSQueryCursor *self, | ||
| 3077 | TSPoint start_point, | ||
| 3078 | TSPoint end_point | ||
| 3079 | ) { | ||
| 3080 | if (end_point.row == 0 && end_point.column == 0) { | ||
| 3081 | end_point = POINT_MAX; | ||
| 3082 | } | ||
| 3083 | self->start_point = start_point; | ||
| 3084 | self->end_point = end_point; | ||
| 3085 | } | ||
| 3086 | |||
| 3087 | // Search through all of the in-progress states, and find the captured | ||
| 3088 | // node that occurs earliest in the document. | ||
| 3089 | static bool ts_query_cursor__first_in_progress_capture( | ||
| 3090 | TSQueryCursor *self, | ||
| 3091 | uint32_t *state_index, | ||
| 3092 | uint32_t *byte_offset, | ||
| 3093 | uint32_t *pattern_index, | ||
| 3094 | bool *root_pattern_guaranteed | ||
| 3095 | ) { | ||
| 3096 | bool result = false; | ||
| 3097 | *state_index = UINT32_MAX; | ||
| 3098 | *byte_offset = UINT32_MAX; | ||
| 3099 | *pattern_index = UINT32_MAX; | ||
| 3100 | for (unsigned i = 0; i < self->states.size; i++) { | ||
| 3101 | QueryState *state = &self->states.contents[i]; | ||
| 3102 | if (state->dead) continue; | ||
| 3103 | |||
| 3104 | const CaptureList *captures = capture_list_pool_get( | ||
| 3105 | &self->capture_list_pool, | ||
| 3106 | state->capture_list_id | ||
| 3107 | ); | ||
| 3108 | if (state->consumed_capture_count >= captures->size) { | ||
| 3109 | continue; | ||
| 3110 | } | ||
| 3111 | |||
| 3112 | TSNode node = captures->contents[state->consumed_capture_count].node; | ||
| 3113 | if ( | ||
| 3114 | ts_node_end_byte(node) <= self->start_byte || | ||
| 3115 | point_lte(ts_node_end_point(node), self->start_point) | ||
| 3116 | ) { | ||
| 3117 | state->consumed_capture_count++; | ||
| 3118 | i--; | ||
| 3119 | continue; | ||
| 3120 | } | ||
| 3121 | |||
| 3122 | uint32_t node_start_byte = ts_node_start_byte(node); | ||
| 3123 | if ( | ||
| 3124 | !result || | ||
| 3125 | node_start_byte < *byte_offset || | ||
| 3126 | (node_start_byte == *byte_offset && state->pattern_index < *pattern_index) | ||
| 3127 | ) { | ||
| 3128 | QueryStep *step = &self->query->steps.contents[state->step_index]; | ||
| 3129 | if (root_pattern_guaranteed) { | ||
| 3130 | *root_pattern_guaranteed = step->root_pattern_guaranteed; | ||
| 3131 | } else if (step->root_pattern_guaranteed) { | ||
| 3132 | continue; | ||
| 3133 | } | ||
| 3134 | |||
| 3135 | result = true; | ||
| 3136 | *state_index = i; | ||
| 3137 | *byte_offset = node_start_byte; | ||
| 3138 | *pattern_index = state->pattern_index; | ||
| 3139 | } | ||
| 3140 | } | ||
| 3141 | return result; | ||
| 3142 | } | ||
| 3143 | |||
| 3144 | // Determine which node is first in a depth-first traversal | ||
| 3145 | int ts_query_cursor__compare_nodes(TSNode left, TSNode right) { | ||
| 3146 | if (left.id != right.id) { | ||
| 3147 | uint32_t left_start = ts_node_start_byte(left); | ||
| 3148 | uint32_t right_start = ts_node_start_byte(right); | ||
| 3149 | if (left_start < right_start) return -1; | ||
| 3150 | if (left_start > right_start) return 1; | ||
| 3151 | uint32_t left_node_count = ts_node_end_byte(left); | ||
| 3152 | uint32_t right_node_count = ts_node_end_byte(right); | ||
| 3153 | if (left_node_count > right_node_count) return -1; | ||
| 3154 | if (left_node_count < right_node_count) return 1; | ||
| 3155 | } | ||
| 3156 | return 0; | ||
| 3157 | } | ||
| 3158 | |||
| 3159 | // Determine if either state contains a superset of the other state's captures. | ||
| 3160 | void ts_query_cursor__compare_captures( | ||
| 3161 | TSQueryCursor *self, | ||
| 3162 | QueryState *left_state, | ||
| 3163 | QueryState *right_state, | ||
| 3164 | bool *left_contains_right, | ||
| 3165 | bool *right_contains_left | ||
| 3166 | ) { | ||
| 3167 | const CaptureList *left_captures = capture_list_pool_get( | ||
| 3168 | &self->capture_list_pool, | ||
| 3169 | left_state->capture_list_id | ||
| 3170 | ); | ||
| 3171 | const CaptureList *right_captures = capture_list_pool_get( | ||
| 3172 | &self->capture_list_pool, | ||
| 3173 | right_state->capture_list_id | ||
| 3174 | ); | ||
| 3175 | *left_contains_right = true; | ||
| 3176 | *right_contains_left = true; | ||
| 3177 | unsigned i = 0, j = 0; | ||
| 3178 | for (;;) { | ||
| 3179 | if (i < left_captures->size) { | ||
| 3180 | if (j < right_captures->size) { | ||
| 3181 | TSQueryCapture *left = &left_captures->contents[i]; | ||
| 3182 | TSQueryCapture *right = &right_captures->contents[j]; | ||
| 3183 | if (left->node.id == right->node.id && left->index == right->index) { | ||
| 3184 | i++; | ||
| 3185 | j++; | ||
| 3186 | } else { | ||
| 3187 | switch (ts_query_cursor__compare_nodes(left->node, right->node)) { | ||
| 3188 | case -1: | ||
| 3189 | *right_contains_left = false; | ||
| 3190 | i++; | ||
| 3191 | break; | ||
| 3192 | case 1: | ||
| 3193 | *left_contains_right = false; | ||
| 3194 | j++; | ||
| 3195 | break; | ||
| 3196 | default: | ||
| 3197 | *right_contains_left = false; | ||
| 3198 | *left_contains_right = false; | ||
| 3199 | i++; | ||
| 3200 | j++; | ||
| 3201 | break; | ||
| 3202 | } | ||
| 3203 | } | ||
| 3204 | } else { | ||
| 3205 | *right_contains_left = false; | ||
| 3206 | break; | ||
| 3207 | } | ||
| 3208 | } else { | ||
| 3209 | if (j < right_captures->size) { | ||
| 3210 | *left_contains_right = false; | ||
| 3211 | } | ||
| 3212 | break; | ||
| 3213 | } | ||
| 3214 | } | ||
| 3215 | } | ||
| 3216 | |||
| 3217 | static void ts_query_cursor__add_state( | ||
| 3218 | TSQueryCursor *self, | ||
| 3219 | const PatternEntry *pattern | ||
| 3220 | ) { | ||
| 3221 | QueryStep *step = &self->query->steps.contents[pattern->step_index]; | ||
| 3222 | uint32_t start_depth = self->depth - step->depth; | ||
| 3223 | |||
| 3224 | // Keep the states array in ascending order of start_depth and pattern_index, | ||
| 3225 | // so that it can be processed more efficiently elsewhere. Usually, there is | ||
| 3226 | // no work to do here because of two facts: | ||
| 3227 | // * States with lower start_depth are naturally added first due to the | ||
| 3228 | // order in which nodes are visited. | ||
| 3229 | // * Earlier patterns are naturally added first because of the ordering of the | ||
| 3230 | // pattern_map data structure that's used to initiate matches. | ||
| 3231 | // | ||
| 3232 | // This loop is only needed in cases where two conditions hold: | ||
| 3233 | // * A pattern consists of more than one sibling node, so that its states | ||
| 3234 | // remain in progress after exiting the node that started the match. | ||
| 3235 | // * The first node in the pattern matches against multiple nodes at the | ||
| 3236 | // same depth. | ||
| 3237 | // | ||
| 3238 | // An example of this is the pattern '((comment)* (function))'. If multiple | ||
| 3239 | // `comment` nodes appear in a row, then we may initiate a new state for this | ||
| 3240 | // pattern while another state for the same pattern is already in progress. | ||
| 3241 | // If there are multiple patterns like this in a query, then this loop will | ||
| 3242 | // need to execute in order to keep the states ordered by pattern_index. | ||
| 3243 | uint32_t index = self->states.size; | ||
| 3244 | while (index > 0) { | ||
| 3245 | QueryState *prev_state = &self->states.contents[index - 1]; | ||
| 3246 | if (prev_state->start_depth < start_depth) break; | ||
| 3247 | if (prev_state->start_depth == start_depth) { | ||
| 3248 | // Avoid inserting an unnecessary duplicate state, which would be | ||
| 3249 | // immediately pruned by the longest-match criteria. | ||
| 3250 | if ( | ||
| 3251 | prev_state->pattern_index == pattern->pattern_index && | ||
| 3252 | prev_state->step_index == pattern->step_index | ||
| 3253 | ) return; | ||
| 3254 | if (prev_state->pattern_index <= pattern->pattern_index) break; | ||
| 3255 | } | ||
| 3256 | index--; | ||
| 3257 | } | ||
| 3258 | |||
| 3259 | LOG( | ||
| 3260 | " start state. pattern:%u, step:%u\n", | ||
| 3261 | pattern->pattern_index, | ||
| 3262 | pattern->step_index | ||
| 3263 | ); | ||
| 3264 | array_insert(&self->states, index, ((QueryState) { | ||
| 3265 | .id = UINT32_MAX, | ||
| 3266 | .capture_list_id = NONE, | ||
| 3267 | .step_index = pattern->step_index, | ||
| 3268 | .pattern_index = pattern->pattern_index, | ||
| 3269 | .start_depth = start_depth, | ||
| 3270 | .consumed_capture_count = 0, | ||
| 3271 | .seeking_immediate_match = true, | ||
| 3272 | .has_in_progress_alternatives = false, | ||
| 3273 | .needs_parent = step->depth == 1, | ||
| 3274 | .dead = false, | ||
| 3275 | })); | ||
| 3276 | } | ||
| 3277 | |||
| 3278 | // Acquire a capture list for this state. If there are no capture lists left in the | ||
| 3279 | // pool, this will steal the capture list from another existing state, and mark that | ||
| 3280 | // other state as 'dead'. | ||
| 3281 | static CaptureList *ts_query_cursor__prepare_to_capture( | ||
| 3282 | TSQueryCursor *self, | ||
| 3283 | QueryState *state, | ||
| 3284 | unsigned state_index_to_preserve | ||
| 3285 | ) { | ||
| 3286 | if (state->capture_list_id == NONE) { | ||
| 3287 | state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); | ||
| 3288 | |||
| 3289 | // If there are no capture lists left in the pool, then terminate whichever | ||
| 3290 | // state has captured the earliest node in the document, and steal its | ||
| 3291 | // capture list. | ||
| 3292 | if (state->capture_list_id == NONE) { | ||
| 3293 | self->did_exceed_match_limit = true; | ||
| 3294 | uint32_t state_index, byte_offset, pattern_index; | ||
| 3295 | if ( | ||
| 3296 | ts_query_cursor__first_in_progress_capture( | ||
| 3297 | self, | ||
| 3298 | &state_index, | ||
| 3299 | &byte_offset, | ||
| 3300 | &pattern_index, | ||
| 3301 | NULL | ||
| 3302 | ) && | ||
| 3303 | state_index != state_index_to_preserve | ||
| 3304 | ) { | ||
| 3305 | LOG( | ||
| 3306 | " abandon state. index:%u, pattern:%u, offset:%u.\n", | ||
| 3307 | state_index, pattern_index, byte_offset | ||
| 3308 | ); | ||
| 3309 | QueryState *other_state = &self->states.contents[state_index]; | ||
| 3310 | state->capture_list_id = other_state->capture_list_id; | ||
| 3311 | other_state->capture_list_id = NONE; | ||
| 3312 | other_state->dead = true; | ||
| 3313 | CaptureList *list = capture_list_pool_get_mut( | ||
| 3314 | &self->capture_list_pool, | ||
| 3315 | state->capture_list_id | ||
| 3316 | ); | ||
| 3317 | array_clear(list); | ||
| 3318 | return list; | ||
| 3319 | } else { | ||
| 3320 | LOG(" ran out of capture lists"); | ||
| 3321 | return NULL; | ||
| 3322 | } | ||
| 3323 | } | ||
| 3324 | } | ||
| 3325 | return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id); | ||
| 3326 | } | ||
| 3327 | |||
| 3328 | static void ts_query_cursor__capture( | ||
| 3329 | TSQueryCursor *self, | ||
| 3330 | QueryState *state, | ||
| 3331 | QueryStep *step, | ||
| 3332 | TSNode node | ||
| 3333 | ) { | ||
| 3334 | if (state->dead) return; | ||
| 3335 | CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); | ||
| 3336 | if (!capture_list) { | ||
| 3337 | state->dead = true; | ||
| 3338 | return; | ||
| 3339 | } | ||
| 3340 | |||
| 3341 | for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { | ||
| 3342 | uint16_t capture_id = step->capture_ids[j]; | ||
| 3343 | if (step->capture_ids[j] == NONE) break; | ||
| 3344 | array_push(capture_list, ((TSQueryCapture) { node, capture_id })); | ||
| 3345 | LOG( | ||
| 3346 | " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n", | ||
| 3347 | ts_node_type(node), | ||
| 3348 | state->pattern_index, | ||
| 3349 | capture_id, | ||
| 3350 | capture_list->size | ||
| 3351 | ); | ||
| 3352 | } | ||
| 3353 | } | ||
| 3354 | |||
| 3355 | // Duplicate the given state and insert the newly-created state immediately after | ||
| 3356 | // the given state in the `states` array. Ensures that the given state reference is | ||
| 3357 | // still valid, even if the states array is reallocated. | ||
| 3358 | static QueryState *ts_query_cursor__copy_state( | ||
| 3359 | TSQueryCursor *self, | ||
| 3360 | QueryState **state_ref | ||
| 3361 | ) { | ||
| 3362 | const QueryState *state = *state_ref; | ||
| 3363 | uint32_t state_index = (uint32_t)(state - self->states.contents); | ||
| 3364 | QueryState copy = *state; | ||
| 3365 | copy.capture_list_id = NONE; | ||
| 3366 | |||
| 3367 | // If the state has captures, copy its capture list. | ||
| 3368 | if (state->capture_list_id != NONE) { | ||
| 3369 | CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index); | ||
| 3370 | if (!new_captures) return NULL; | ||
| 3371 | const CaptureList *old_captures = capture_list_pool_get( | ||
| 3372 | &self->capture_list_pool, | ||
| 3373 | state->capture_list_id | ||
| 3374 | ); | ||
| 3375 | array_push_all(new_captures, old_captures); | ||
| 3376 | } | ||
| 3377 | |||
| 3378 | array_insert(&self->states, state_index + 1, copy); | ||
| 3379 | *state_ref = &self->states.contents[state_index]; | ||
| 3380 | return &self->states.contents[state_index + 1]; | ||
| 3381 | } | ||
| 3382 | |||
| 3383 | static inline bool ts_query_cursor__should_descend( | ||
| 3384 | TSQueryCursor *self, | ||
| 3385 | bool node_intersects_range | ||
| 3386 | ) { | ||
| 3387 | |||
| 3388 | if (node_intersects_range && self->depth < self->max_start_depth) { | ||
| 3389 | return true; | ||
| 3390 | } | ||
| 3391 | |||
| 3392 | // If there are in-progress matches whose remaining steps occur | ||
| 3393 | // deeper in the tree, then descend. | ||
| 3394 | for (unsigned i = 0; i < self->states.size; i++) { | ||
| 3395 | QueryState *state = &self->states.contents[i];; | ||
| 3396 | QueryStep *next_step = &self->query->steps.contents[state->step_index]; | ||
| 3397 | if ( | ||
| 3398 | next_step->depth != PATTERN_DONE_MARKER && | ||
| 3399 | state->start_depth + next_step->depth > self->depth | ||
| 3400 | ) { | ||
| 3401 | return true; | ||
| 3402 | } | ||
| 3403 | } | ||
| 3404 | |||
| 3405 | if (self->depth >= self->max_start_depth) { | ||
| 3406 | return false; | ||
| 3407 | } | ||
| 3408 | |||
| 3409 | // If the current node is hidden, then a non-rooted pattern might match | ||
| 3410 | // one if its roots inside of this node, and match another of its roots | ||
| 3411 | // as part of a sibling node, so we may need to descend. | ||
| 3412 | if (!self->on_visible_node) { | ||
| 3413 | // Descending into a repetition node outside of the range can be | ||
| 3414 | // expensive, because these nodes can have many visible children. | ||
| 3415 | // Avoid descending into repetition nodes unless we have already | ||
| 3416 | // determined that this query can match rootless patterns inside | ||
| 3417 | // of this type of repetition node. | ||
| 3418 | Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor); | ||
| 3419 | if (ts_subtree_is_repetition(subtree)) { | ||
| 3420 | bool exists; | ||
| 3421 | uint32_t index; | ||
| 3422 | array_search_sorted_by( | ||
| 3423 | &self->query->repeat_symbols_with_rootless_patterns,, | ||
| 3424 | ts_subtree_symbol(subtree), | ||
| 3425 | &index, | ||
| 3426 | &exists | ||
| 3427 | ); | ||
| 3428 | return exists; | ||
| 3429 | } | ||
| 3430 | |||
| 3431 | return true; | ||
| 3432 | } | ||
| 3433 | |||
| 3434 | return false; | ||
| 3435 | } | ||
| 3436 | |||
| 3437 | // Walk the tree, processing patterns until at least one pattern finishes, | ||
| 3438 | // If one or more patterns finish, return `true` and store their states in the | ||
| 3439 | // `finished_states` array. Multiple patterns can finish on the same node. If | ||
| 3440 | // there are no more matches, return `false`. | ||
| 3441 | static inline bool ts_query_cursor__advance( | ||
| 3442 | TSQueryCursor *self, | ||
| 3443 | bool stop_on_definite_step | ||
| 3444 | ) { | ||
| 3445 | bool did_match = false; | ||
| 3446 | for (;;) { | ||
| 3447 | if (self->halted) { | ||
| 3448 | while (self->states.size > 0) { | ||
| 3449 | QueryState state = array_pop(&self->states); | ||
| 3450 | capture_list_pool_release( | ||
| 3451 | &self->capture_list_pool, | ||
| 3452 | state.capture_list_id | ||
| 3453 | ); | ||
| 3454 | } | ||
| 3455 | } | ||
| 3456 | |||
| 3457 | if (did_match || self->halted) return did_match; | ||
| 3458 | |||
| 3459 | // Exit the current node. | ||
| 3460 | if (self->ascending) { | ||
| 3461 | if (self->on_visible_node) { | ||
| 3462 | LOG( | ||
| 3463 | "leave node. depth:%u, type:%s\n", | ||
| 3464 | self->depth, | ||
| 3465 | ts_node_type(ts_tree_cursor_current_node(&self->cursor)) | ||
| 3466 | ); | ||
| 3467 | |||
| 3468 | // After leaving a node, remove any states that cannot make further progress. | ||
| 3469 | uint32_t deleted_count = 0; | ||
| 3470 | for (unsigned i = 0, n = self->states.size; i < n; i++) { | ||
| 3471 | QueryState *state = &self->states.contents[i]; | ||
| 3472 | QueryStep *step = &self->query->steps.contents[state->step_index]; | ||
| 3473 | |||
| 3474 | // If a state completed its pattern inside of this node, but was deferred from finishing | ||
| 3475 | // in order to search for longer matches, mark it as finished. | ||
| 3476 | if ( | ||
| 3477 | step->depth == PATTERN_DONE_MARKER && | ||
| 3478 | (state->start_depth > self->depth || self->depth == 0) | ||
| 3479 | ) { | ||
| 3480 | LOG(" finish pattern %u\n", state->pattern_index); | ||
| 3481 | array_push(&self->finished_states, *state); | ||
| 3482 | did_match = true; | ||
| 3483 | deleted_count++; | ||
| 3484 | } | ||
| 3485 | |||
| 3486 | // If a state needed to match something within this node, then remove that state | ||
| 3487 | // as it has failed to match. | ||
| 3488 | else if ( | ||
| 3489 | step->depth != PATTERN_DONE_MARKER && | ||
| 3490 | (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth | ||
| 3491 | ) { | ||
| 3492 | LOG( | ||
| 3493 | " failed to match. pattern:%u, step:%u\n", | ||
| 3494 | state->pattern_index, | ||
| 3495 | state->step_index | ||
| 3496 | ); | ||
| 3497 | capture_list_pool_release( | ||
| 3498 | &self->capture_list_pool, | ||
| 3499 | state->capture_list_id | ||
| 3500 | ); | ||
| 3501 | deleted_count++; | ||
| 3502 | } | ||
| 3503 | |||
| 3504 | else if (deleted_count > 0) { | ||
| 3505 | self->states.contents[i - deleted_count] = *state; | ||
| 3506 | } | ||
| 3507 | } | ||
| 3508 | self->states.size -= deleted_count; | ||
| 3509 | } | ||
| 3510 | |||
| 3511 | // Leave this node by stepping to its next sibling or to its parent. | ||
| 3512 | switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { | ||
| 3513 | case TreeCursorStepVisible: | ||
| 3514 | if (!self->on_visible_node) { | ||
| 3515 | self->depth++; | ||
| 3516 | self->on_visible_node = true; | ||
| 3517 | } | ||
| 3518 | self->ascending = false; | ||
| 3519 | break; | ||
| 3520 | case TreeCursorStepHidden: | ||
| 3521 | if (self->on_visible_node) { | ||
| 3522 | self->depth--; | ||
| 3523 | self->on_visible_node = false; | ||
| 3524 | } | ||
| 3525 | self->ascending = false; | ||
| 3526 | break; | ||
| 3527 | default: | ||
| 3528 | if (ts_tree_cursor_goto_parent(&self->cursor)) { | ||
| 3529 | self->depth--; | ||
| 3530 | } else { | ||
| 3531 | LOG("halt at root\n"); | ||
| 3532 | self->halted = true; | ||
| 3533 | } | ||
| 3534 | } | ||
| 3535 | } | ||
| 3536 | |||
| 3537 | // Enter a new node. | ||
| 3538 | else { | ||
| 3539 | // Get the properties of the current node. | ||
| 3540 | TSNode node = ts_tree_cursor_current_node(&self->cursor); | ||
| 3541 | TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); | ||
| 3542 | bool parent_precedes_range = !ts_node_is_null(parent_node) && ( | ||
| 3543 | ts_node_end_byte(parent_node) <= self->start_byte || | ||
| 3544 | point_lte(ts_node_end_point(parent_node), self->start_point) | ||
| 3545 | ); | ||
| 3546 | bool parent_follows_range = !ts_node_is_null(parent_node) && ( | ||
| 3547 | ts_node_start_byte(parent_node) >= self->end_byte || | ||
| 3548 | point_gte(ts_node_start_point(parent_node), self->end_point) | ||
| 3549 | ); | ||
| 3550 | bool node_precedes_range = parent_precedes_range || ( | ||
| 3551 | ts_node_end_byte(node) <= self->start_byte || | ||
| 3552 | point_lte(ts_node_end_point(node), self->start_point) | ||
| 3553 | ); | ||
| 3554 | bool node_follows_range = parent_follows_range || ( | ||
| 3555 | ts_node_start_byte(node) >= self->end_byte || | ||
| 3556 | point_gte(ts_node_start_point(node), self->end_point) | ||
| 3557 | ); | ||
| 3558 | bool parent_intersects_range = !parent_precedes_range && !parent_follows_range; | ||
| 3559 | bool node_intersects_range = !node_precedes_range && !node_follows_range; | ||
| 3560 | |||
| 3561 | if (self->on_visible_node) { | ||
| 3562 | TSSymbol symbol = ts_node_symbol(node); | ||
| 3563 | bool is_named = ts_node_is_named(node); | ||
| 3564 | bool has_later_siblings; | ||
| 3565 | bool has_later_named_siblings; | ||
| 3566 | bool can_have_later_siblings_with_this_field; | ||
| 3567 | TSFieldId field_id = 0; | ||
| 3568 | TSSymbol supertypes[8] = {0}; | ||
| 3569 | unsigned supertype_count = 8; | ||
| 3570 | ts_tree_cursor_current_status( | ||
| 3571 | &self->cursor, | ||
| 3572 | &field_id, | ||
| 3573 | &has_later_siblings, | ||
| 3574 | &has_later_named_siblings, | ||
| 3575 | &can_have_later_siblings_with_this_field, | ||
| 3576 | supertypes, | ||
| 3577 | &supertype_count | ||
| 3578 | ); | ||
| 3579 | LOG( | ||
| 3580 | "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", | ||
| 3581 | self->depth, | ||
| 3582 | ts_node_type(node), | ||
| 3583 | ts_language_field_name_for_id(self->query->language, field_id), | ||
| 3584 | ts_node_start_point(node).row, | ||
| 3585 | self->states.size, | ||
| 3586 | self->finished_states.size | ||
| 3587 | ); | ||
| 3588 | |||
| 3589 | bool node_is_error = symbol == ts_builtin_sym_error; | ||
| 3590 | bool parent_is_error = | ||
| 3591 | !ts_node_is_null(parent_node) && | ||
| 3592 | ts_node_symbol(parent_node) == ts_builtin_sym_error; | ||
| 3593 | |||
| 3594 | // Add new states for any patterns whose root node is a wildcard. | ||
| 3595 | if (!node_is_error) { | ||
| 3596 | for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { | ||
| 3597 | PatternEntry *pattern = &self->query->pattern_map.contents[i]; | ||
| 3598 | |||
| 3599 | // If this node matches the first step of the pattern, then add a new | ||
| 3600 | // state at the start of this pattern. | ||
| 3601 | QueryStep *step = &self->query->steps.contents[pattern->step_index]; | ||
| 3602 | uint32_t start_depth = self->depth - step->depth; | ||
| 3603 | if ( | ||
| 3604 | (pattern->is_rooted ? | ||
| 3605 | node_intersects_range : | ||
| 3606 | (parent_intersects_range && !parent_is_error)) && | ||
| 3607 | (!step->field || field_id == step->field) && | ||
| 3608 | (!step->supertype_symbol || supertype_count > 0) && | ||
| 3609 | (start_depth <= self->max_start_depth) | ||
| 3610 | ) { | ||
| 3611 | ts_query_cursor__add_state(self, pattern); | ||
| 3612 | } | ||
| 3613 | } | ||
| 3614 | } | ||
| 3615 | |||
| 3616 | // Add new states for any patterns whose root node matches this node. | ||
| 3617 | unsigned i; | ||
| 3618 | if (ts_query__pattern_map_search(self->query, symbol, &i)) { | ||
| 3619 | PatternEntry *pattern = &self->query->pattern_map.contents[i]; | ||
| 3620 | |||
| 3621 | QueryStep *step = &self->query->steps.contents[pattern->step_index]; | ||
| 3622 | uint32_t start_depth = self->depth - step->depth; | ||
| 3623 | do { | ||
| 3624 | // If this node matches the first step of the pattern, then add a new | ||
| 3625 | // state at the start of this pattern. | ||
| 3626 | if ( | ||
| 3627 | (pattern->is_rooted ? | ||
| 3628 | node_intersects_range : | ||
| 3629 | (parent_intersects_range && !parent_is_error)) && | ||
| 3630 | (!step->field || field_id == step->field) && | ||
| 3631 | (start_depth <= self->max_start_depth) | ||
| 3632 | ) { | ||
| 3633 | ts_query_cursor__add_state(self, pattern); | ||
| 3634 | } | ||
| 3635 | |||
| 3636 | // Advance to the next pattern whose root node matches this node. | ||
| 3637 | i++; | ||
| 3638 | if (i == self->query->pattern_map.size) break; | ||
| 3639 | pattern = &self->query->pattern_map.contents[i]; | ||
| 3640 | step = &self->query->steps.contents[pattern->step_index]; | ||
| 3641 | } while (step->symbol == symbol); | ||
| 3642 | } | ||
| 3643 | |||
| 3644 | // Update all of the in-progress states with current node. | ||
| 3645 | for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) { | ||
| 3646 | QueryState *state = &self->states.contents[j]; | ||
| 3647 | QueryStep *step = &self->query->steps.contents[state->step_index]; | ||
| 3648 | state->has_in_progress_alternatives = false; | ||
| 3649 | copy_count = 0; | ||
| 3650 | |||
| 3651 | // Check that the node matches all of the criteria for the next | ||
| 3652 | // step of the pattern. | ||
| 3653 | if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; | ||
| 3654 | |||
| 3655 | // Determine if this node matches this step of the pattern, and also | ||
| 3656 | // if this node can have later siblings that match this step of the | ||
| 3657 | // pattern. | ||
| 3658 | bool node_does_match = false; | ||
| 3659 | if (step->symbol == WILDCARD_SYMBOL) { | ||
| 3660 | node_does_match = !node_is_error && (is_named || !step->is_named); | ||
| 3661 | } else { | ||
| 3662 | node_does_match = symbol == step->symbol; | ||
| 3663 | } | ||
| 3664 | bool later_sibling_can_match = has_later_siblings; | ||
| 3665 | if ((step->is_immediate && is_named) || state->seeking_immediate_match) { | ||
| 3666 | later_sibling_can_match = false; | ||
| 3667 | } | ||
| 3668 | if (step->is_last_child && has_later_named_siblings) { | ||
| 3669 | node_does_match = false; | ||
| 3670 | } | ||
| 3671 | if (step->supertype_symbol) { | ||
| 3672 | bool has_supertype = false; | ||
| 3673 | for (unsigned k = 0; k < supertype_count; k++) { | ||
| 3674 | if (supertypes[k] == step->supertype_symbol) { | ||
| 3675 | has_supertype = true; | ||
| 3676 | break; | ||
| 3677 | } | ||
| 3678 | } | ||
| 3679 | if (!has_supertype) node_does_match = false; | ||
| 3680 | } | ||
| 3681 | if (step->field) { | ||
| 3682 | if (step->field == field_id) { | ||
| 3683 | if (!can_have_later_siblings_with_this_field) { | ||
| 3684 | later_sibling_can_match = false; | ||
| 3685 | } | ||
| 3686 | } else { | ||
| 3687 | node_does_match = false; | ||
| 3688 | } | ||
| 3689 | } | ||
| 3690 | |||
| 3691 | if (step->negated_field_list_id) { | ||
| 3692 | TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; | ||
| 3693 | for (;;) { | ||
| 3694 | TSFieldId negated_field_id = *negated_field_ids; | ||
| 3695 | if (negated_field_id) { | ||
| 3696 | negated_field_ids++; | ||
| 3697 | if (ts_node_child_by_field_id(node, negated_field_id).id) { | ||
| 3698 | node_does_match = false; | ||
| 3699 | break; | ||
| 3700 | } | ||
| 3701 | } else { | ||
| 3702 | break; | ||
| 3703 | } | ||
| 3704 | } | ||
| 3705 | } | ||
| 3706 | |||
| 3707 | // Remove states immediately if it is ever clear that they cannot match. | ||
| 3708 | if (!node_does_match) { | ||
| 3709 | if (!later_sibling_can_match) { | ||
| 3710 | LOG( | ||
| 3711 | " discard state. pattern:%u, step:%u\n", | ||
| 3712 | state->pattern_index, | ||
| 3713 | state->step_index | ||
| 3714 | ); | ||
| 3715 | capture_list_pool_release( | ||
| 3716 | &self->capture_list_pool, | ||
| 3717 | state->capture_list_id | ||
| 3718 | ); | ||
| 3719 | array_erase(&self->states, j); | ||
| 3720 | j--; | ||
| 3721 | } | ||
| 3722 | continue; | ||
| 3723 | } | ||
| 3724 | |||
| 3725 | // Some patterns can match their root node in multiple ways, capturing different | ||
| 3726 | // children. If this pattern step could match later children within the same | ||
| 3727 | // parent, then this query state cannot simply be updated in place. It must be | ||
| 3728 | // split into two states: one that matches this node, and one which skips over | ||
| 3729 | // this node, to preserve the possibility of matching later siblings. | ||
| 3730 | if (later_sibling_can_match && ( | ||
| 3731 | step->contains_captures || | ||
| 3732 | ts_query__step_is_fallible(self->query, state->step_index) | ||
| 3733 | )) { | ||
| 3734 | if (ts_query_cursor__copy_state(self, &state)) { | ||
| 3735 | LOG( | ||
| 3736 | " split state for capture. pattern:%u, step:%u\n", | ||
| 3737 | state->pattern_index, | ||
| 3738 | state->step_index | ||
| 3739 | ); | ||
| 3740 | copy_count++; | ||
| 3741 | } | ||
| 3742 | } | ||
| 3743 | |||
| 3744 | // If this pattern started with a wildcard, such that the pattern map | ||
| 3745 | // actually points to the *second* step of the pattern, then check | ||
| 3746 | // that the node has a parent, and capture the parent node if necessary. | ||
| 3747 | if (state->needs_parent) { | ||
| 3748 | TSNode parent = ts_tree_cursor_parent_node(&self->cursor); | ||
| 3749 | if (ts_node_is_null(parent)) { | ||
| 3750 | LOG(" missing parent node\n"); | ||
| 3751 | state->dead = true; | ||
| 3752 | } else { | ||
| 3753 | state->needs_parent = false; | ||
| 3754 | QueryStep *skipped_wildcard_step = step; | ||
| 3755 | do { | ||
| 3756 | skipped_wildcard_step--; | ||
| 3757 | } while ( | ||
| 3758 | skipped_wildcard_step->is_dead_end || | ||
| 3759 | skipped_wildcard_step->is_pass_through || | ||
| 3760 | skipped_wildcard_step->depth > 0 | ||
| 3761 | ); | ||
| 3762 | if (skipped_wildcard_step->capture_ids[0] != NONE) { | ||
| 3763 | LOG(" capture wildcard parent\n"); | ||
| 3764 | ts_query_cursor__capture( | ||
| 3765 | self, | ||
| 3766 | state, | ||
| 3767 | skipped_wildcard_step, | ||
| 3768 | parent | ||
| 3769 | ); | ||
| 3770 | } | ||
| 3771 | } | ||
| 3772 | } | ||
| 3773 | |||
| 3774 | // If the current node is captured in this pattern, add it to the capture list. | ||
| 3775 | if (step->capture_ids[0] != NONE) { | ||
| 3776 | ts_query_cursor__capture(self, state, step, node); | ||
| 3777 | } | ||
| 3778 | |||
| 3779 | if (state->dead) { | ||
| 3780 | array_erase(&self->states, j); | ||
| 3781 | j--; | ||
| 3782 | continue; | ||
| 3783 | } | ||
| 3784 | |||
| 3785 | // Advance this state to the next step of its pattern. | ||
| 3786 | state->step_index++; | ||
| 3787 | state->seeking_immediate_match = false; | ||
| 3788 | LOG( | ||
| 3789 | " advance state. pattern:%u, step:%u\n", | ||
| 3790 | state->pattern_index, | ||
| 3791 | state->step_index | ||
| 3792 | ); | ||
| 3793 | |||
| 3794 | QueryStep *next_step = &self->query->steps.contents[state->step_index]; | ||
| 3795 | if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; | ||
| 3796 | |||
| 3797 | // If this state's next step has an alternative step, then copy the state in order | ||
| 3798 | // to pursue both alternatives. The alternative step itself may have an alternative, | ||
| 3799 | // so this is an interactive process. | ||
| 3800 | unsigned end_index = j + 1; | ||
| 3801 | for (unsigned k = j; k < end_index; k++) { | ||
| 3802 | QueryState *child_state = &self->states.contents[k]; | ||
| 3803 | QueryStep *child_step = &self->query->steps.contents[child_state->step_index]; | ||
| 3804 | if (child_step->alternative_index != NONE) { | ||
| 3805 | // A "dead-end" step exists only to add a non-sequential jump into the step sequence, | ||
| 3806 | // via its alternative index. When a state reaches a dead-end step, it jumps straight | ||
| 3807 | // to the step's alternative. | ||
| 3808 | if (child_step->is_dead_end) { | ||
| 3809 | child_state->step_index = child_step->alternative_index; | ||
| 3810 | k--; | ||
| 3811 | continue; | ||
| 3812 | } | ||
| 3813 | |||
| 3814 | // A "pass-through" step exists only to add a branch into the step sequence, | ||
| 3815 | // via its alternative_index. When a state reaches a pass-through step, it splits | ||
| 3816 | // in order to process the alternative step, and then it advances to the next step. | ||
| 3817 | if (child_step->is_pass_through) { | ||
| 3818 | child_state->step_index++; | ||
| 3819 | k--; | ||
| 3820 | } | ||
| 3821 | |||
| 3822 | QueryState *copy = ts_query_cursor__copy_state(self, &child_state); | ||
| 3823 | if (copy) { | ||
| 3824 | LOG( | ||
| 3825 | " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", | ||
| 3826 | copy->pattern_index, | ||
| 3827 | copy->step_index, | ||
| 3828 | next_step->alternative_index, | ||
| 3829 | next_step->alternative_is_immediate, | ||
| 3830 | capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size | ||
| 3831 | ); | ||
| 3832 | end_index++; | ||
| 3833 | copy_count++; | ||
| 3834 | copy->step_index = child_step->alternative_index; | ||
| 3835 | if (child_step->alternative_is_immediate) { | ||
| 3836 | copy->seeking_immediate_match = true; | ||
| 3837 | } | ||
| 3838 | } | ||
| 3839 | } | ||
| 3840 | } | ||
| 3841 | } | ||
| 3842 | |||
| 3843 | for (unsigned j = 0; j < self->states.size; j++) { | ||
| 3844 | QueryState *state = &self->states.contents[j]; | ||
| 3845 | if (state->dead) { | ||
| 3846 | array_erase(&self->states, j); | ||
| 3847 | j--; | ||
| 3848 | continue; | ||
| 3849 | } | ||
| 3850 | |||
| 3851 | // Enfore the longest-match criteria. When a query pattern contains optional or | ||
| 3852 | // repeated nodes, this is necessary to avoid multiple redundant states, where | ||
| 3853 | // one state has a strict subset of another state's captures. | ||
| 3854 | bool did_remove = false; | ||
| 3855 | for (unsigned k = j + 1; k < self->states.size; k++) { | ||
| 3856 | QueryState *other_state = &self->states.contents[k]; | ||
| 3857 | |||
| 3858 | // Query states are kept in ascending order of start_depth and pattern_index. | ||
| 3859 | // Since the longest-match criteria is only used for deduping matches of the same | ||
| 3860 | // pattern and root node, we only need to perform pairwise comparisons within a | ||
| 3861 | // small slice of the states array. | ||
| 3862 | if ( | ||
| 3863 | other_state->start_depth != state->start_depth || | ||
| 3864 | other_state->pattern_index != state->pattern_index | ||
| 3865 | ) break; | ||
| 3866 | |||
| 3867 | bool left_contains_right, right_contains_left; | ||
| 3868 | ts_query_cursor__compare_captures( | ||
| 3869 | self, | ||
| 3870 | state, | ||
| 3871 | other_state, | ||
| 3872 | &left_contains_right, | ||
| 3873 | &right_contains_left | ||
| 3874 | ); | ||
| 3875 | if (left_contains_right) { | ||
| 3876 | if (state->step_index == other_state->step_index) { | ||
| 3877 | LOG( | ||
| 3878 | " drop shorter state. pattern: %u, step_index: %u\n", | ||
| 3879 | state->pattern_index, | ||
| 3880 | state->step_index | ||
| 3881 | ); | ||
| 3882 | capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); | ||
| 3883 | array_erase(&self->states, k); | ||
| 3884 | k--; | ||
| 3885 | continue; | ||
| 3886 | } | ||
| 3887 | other_state->has_in_progress_alternatives = true; | ||
| 3888 | } | ||
| 3889 | if (right_contains_left) { | ||
| 3890 | if (state->step_index == other_state->step_index) { | ||
| 3891 | LOG( | ||
| 3892 | " drop shorter state. pattern: %u, step_index: %u\n", | ||
| 3893 | state->pattern_index, | ||
| 3894 | state->step_index | ||
| 3895 | ); | ||
| 3896 | capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); | ||
| 3897 | array_erase(&self->states, j); | ||
| 3898 | j--; | ||
| 3899 | did_remove = true; | ||
| 3900 | break; | ||
| 3901 | } | ||
| 3902 | state->has_in_progress_alternatives = true; | ||
| 3903 | } | ||
| 3904 | } | ||
| 3905 | |||
| 3906 | // If the state is at the end of its pattern, remove it from the list | ||
| 3907 | // of in-progress states and add it to the list of finished states. | ||
| 3908 | if (!did_remove) { | ||
| 3909 | LOG( | ||
| 3910 | " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", | ||
| 3911 | state->pattern_index, | ||
| 3912 | state->start_depth, | ||
| 3913 | state->step_index, | ||
| 3914 | capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size | ||
| 3915 | ); | ||
| 3916 | QueryStep *next_step = &self->query->steps.contents[state->step_index]; | ||
| 3917 | if (next_step->depth == PATTERN_DONE_MARKER) { | ||
| 3918 | if (state->has_in_progress_alternatives) { | ||
| 3919 | LOG(" defer finishing pattern %u\n", state->pattern_index); | ||
| 3920 | } else { | ||
| 3921 | LOG(" finish pattern %u\n", state->pattern_index); | ||
| 3922 | array_push(&self->finished_states, *state); | ||
| 3923 | array_erase(&self->states, (uint32_t)(state - self->states.contents)); | ||
| 3924 | did_match = true; | ||
| 3925 | j--; | ||
| 3926 | } | ||
| 3927 | } | ||
| 3928 | } | ||
| 3929 | } | ||
| 3930 | } | ||
| 3931 | |||
| 3932 | if (ts_query_cursor__should_descend(self, node_intersects_range)) { | ||
| 3933 | switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) { | ||
| 3934 | case TreeCursorStepVisible: | ||
| 3935 | self->depth++; | ||
| 3936 | self->on_visible_node = true; | ||
| 3937 | continue; | ||
| 3938 | case TreeCursorStepHidden: | ||
| 3939 | self->on_visible_node = false; | ||
| 3940 | continue; | ||
| 3941 | default: | ||
| 3942 | break; | ||
| 3943 | } | ||
| 3944 | } | ||
| 3945 | |||
| 3946 | self->ascending = true; | ||
| 3947 | } | ||
| 3948 | } | ||
| 3949 | } | ||
| 3950 | |||
| 3951 | bool ts_query_cursor_next_match( | ||
| 3952 | TSQueryCursor *self, | ||
| 3953 | TSQueryMatch *match | ||
| 3954 | ) { | ||
| 3955 | if (self->finished_states.size == 0) { | ||
| 3956 | if (!ts_query_cursor__advance(self, false)) { | ||
| 3957 | return false; | ||
| 3958 | } | ||
| 3959 | } | ||
| 3960 | |||
| 3961 | QueryState *state = &self->finished_states.contents[0]; | ||
| 3962 | if (state->id == UINT32_MAX) state->id = self->next_state_id++; | ||
| 3963 | match->id = state->id; | ||
| 3964 | match->pattern_index = state->pattern_index; | ||
| 3965 | const CaptureList *captures = capture_list_pool_get( | ||
| 3966 | &self->capture_list_pool, | ||
| 3967 | state->capture_list_id | ||
| 3968 | ); | ||
| 3969 | match->captures = captures->contents; | ||
| 3970 | match->capture_count = captures->size; | ||
| 3971 | capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); | ||
| 3972 | array_erase(&self->finished_states, 0); | ||
| 3973 | return true; | ||
| 3974 | } | ||
| 3975 | |||
| 3976 | void ts_query_cursor_remove_match( | ||
| 3977 | TSQueryCursor *self, | ||
| 3978 | uint32_t match_id | ||
| 3979 | ) { | ||
| 3980 | for (unsigned i = 0; i < self->finished_states.size; i++) { | ||
| 3981 | const QueryState *state = &self->finished_states.contents[i]; | ||
| 3982 | if (state->id == match_id) { | ||
| 3983 | capture_list_pool_release( | ||
| 3984 | &self->capture_list_pool, | ||
| 3985 | state->capture_list_id | ||
| 3986 | ); | ||
| 3987 | array_erase(&self->finished_states, i); | ||
| 3988 | return; | ||
| 3989 | } | ||
| 3990 | } | ||
| 3991 | |||
| 3992 | // Remove unfinished query states as well to prevent future | ||
| 3993 | // captures for a match being removed. | ||
| 3994 | for (unsigned i = 0; i < self->states.size; i++) { | ||
| 3995 | const QueryState *state = &self->states.contents[i]; | ||
| 3996 | if (state->id == match_id) { | ||
| 3997 | capture_list_pool_release( | ||
| 3998 | &self->capture_list_pool, | ||
| 3999 | state->capture_list_id | ||
| 4000 | ); | ||
| 4001 | array_erase(&self->states, i); | ||
| 4002 | return; | ||
| 4003 | } | ||
| 4004 | } | ||
| 4005 | } | ||
| 4006 | |||
| 4007 | bool ts_query_cursor_next_capture( | ||
| 4008 | TSQueryCursor *self, | ||
| 4009 | TSQueryMatch *match, | ||
| 4010 | uint32_t *capture_index | ||
| 4011 | ) { | ||
| 4012 | // The goal here is to return captures in order, even though they may not | ||
| 4013 | // be discovered in order, because patterns can overlap. Search for matches | ||
| 4014 | // until there is a finished capture that is before any unfinished capture. | ||
| 4015 | for (;;) { | ||
| 4016 | // First, find the earliest capture in an unfinished match. | ||
| 4017 | uint32_t first_unfinished_capture_byte; | ||
| 4018 | uint32_t first_unfinished_pattern_index; | ||
| 4019 | uint32_t first_unfinished_state_index; | ||
| 4020 | bool first_unfinished_state_is_definite = false; | ||
| 4021 | ts_query_cursor__first_in_progress_capture( | ||
| 4022 | self, | ||
| 4023 | &first_unfinished_state_index, | ||
| 4024 | &first_unfinished_capture_byte, | ||
| 4025 | &first_unfinished_pattern_index, | ||
| 4026 | &first_unfinished_state_is_definite | ||
| 4027 | ); | ||
| 4028 | |||
| 4029 | // Then find the earliest capture in a finished match. It must occur | ||
| 4030 | // before the first capture in an *unfinished* match. | ||
| 4031 | QueryState *first_finished_state = NULL; | ||
| 4032 | uint32_t first_finished_capture_byte = first_unfinished_capture_byte; | ||
| 4033 | uint32_t first_finished_pattern_index = first_unfinished_pattern_index; | ||
| 4034 | for (unsigned i = 0; i < self->finished_states.size;) { | ||
| 4035 | QueryState *state = &self->finished_states.contents[i]; | ||
| 4036 | const CaptureList *captures = capture_list_pool_get( | ||
| 4037 | &self->capture_list_pool, | ||
| 4038 | state->capture_list_id | ||
| 4039 | ); | ||
| 4040 | |||
| 4041 | // Remove states whose captures are all consumed. | ||
| 4042 | if (state->consumed_capture_count >= captures->size) { | ||
| 4043 | capture_list_pool_release( | ||
| 4044 | &self->capture_list_pool, | ||
| 4045 | state->capture_list_id | ||
| 4046 | ); | ||
| 4047 | array_erase(&self->finished_states, i); | ||
| 4048 | continue; | ||
| 4049 | } | ||
| 4050 | |||
| 4051 | // Skip captures that precede the cursor's start byte. | ||
| 4052 | TSNode node = captures->contents[state->consumed_capture_count].node; | ||
| 4053 | if (ts_node_end_byte(node) <= self->start_byte) { | ||
| 4054 | state->consumed_capture_count++; | ||
| 4055 | continue; | ||
| 4056 | } | ||
| 4057 | |||
| 4058 | uint32_t node_start_byte = ts_node_start_byte(node); | ||
| 4059 | if ( | ||
| 4060 | node_start_byte < first_finished_capture_byte || | ||
| 4061 | ( | ||
| 4062 | node_start_byte == first_finished_capture_byte && | ||
| 4063 | state->pattern_index < first_finished_pattern_index | ||
| 4064 | ) | ||
| 4065 | ) { | ||
| 4066 | first_finished_state = state; | ||
| 4067 | first_finished_capture_byte = node_start_byte; | ||
| 4068 | first_finished_pattern_index = state->pattern_index; | ||
| 4069 | } | ||
| 4070 | i++; | ||
| 4071 | } | ||
| 4072 | |||
| 4073 | // If there is finished capture that is clearly before any unfinished | ||
| 4074 | // capture, then return its match, and its capture index. Internally | ||
| 4075 | // record the fact that the capture has been 'consumed'. | ||
| 4076 | QueryState *state; | ||
| 4077 | if (first_finished_state) { | ||
| 4078 | state = first_finished_state; | ||
| 4079 | } else if (first_unfinished_state_is_definite) { | ||
| 4080 | state = &self->states.contents[first_unfinished_state_index]; | ||
| 4081 | } else { | ||
| 4082 | state = NULL; | ||
| 4083 | } | ||
| 4084 | |||
| 4085 | if (state) { | ||
| 4086 | if (state->id == UINT32_MAX) state->id = self->next_state_id++; | ||
| 4087 | match->id = state->id; | ||
| 4088 | match->pattern_index = state->pattern_index; | ||
| 4089 | const CaptureList *captures = capture_list_pool_get( | ||
| 4090 | &self->capture_list_pool, | ||
| 4091 | state->capture_list_id | ||
| 4092 | ); | ||
| 4093 | match->captures = captures->contents; | ||
| 4094 | match->capture_count = captures->size; | ||
| 4095 | *capture_index = state->consumed_capture_count; | ||
| 4096 | state->consumed_capture_count++; | ||
| 4097 | return true; | ||
| 4098 | } | ||
| 4099 | |||
| 4100 | if (capture_list_pool_is_empty(&self->capture_list_pool)) { | ||
| 4101 | LOG( | ||
| 4102 | " abandon state. index:%u, pattern:%u, offset:%u.\n", | ||
| 4103 | first_unfinished_state_index, | ||
| 4104 | first_unfinished_pattern_index, | ||
| 4105 | first_unfinished_capture_byte | ||
| 4106 | ); | ||
| 4107 | capture_list_pool_release( | ||
| 4108 | &self->capture_list_pool, | ||
| 4109 | self->states.contents[first_unfinished_state_index].capture_list_id | ||
| 4110 | ); | ||
| 4111 | array_erase(&self->states, first_unfinished_state_index); | ||
| 4112 | } | ||
| 4113 | |||
| 4114 | // If there are no finished matches that are ready to be returned, then | ||
| 4115 | // continue finding more matches. | ||
| 4116 | if ( | ||
| 4117 | !ts_query_cursor__advance(self, true) && | ||
| 4118 | self->finished_states.size == 0 | ||
| 4119 | ) return false; | ||
| 4120 | } | ||
| 4121 | } | ||
| 4122 | |||
| 4123 | void ts_query_cursor_set_max_start_depth( | ||
| 4124 | TSQueryCursor *self, | ||
| 4125 | uint32_t max_start_depth | ||
| 4126 | ) { | ||
| 4127 | self->max_start_depth = max_start_depth; | ||
| 4128 | } | ||
| 4129 | |||
| 4130 | #undef LOG | ||
diff --git a/vendor/tree-sitter/lib/src/reduce_action.h b/vendor/tree-sitter/lib/src/reduce_action.h new file mode 100644 index 0000000..72aff08 --- /dev/null +++ b/vendor/tree-sitter/lib/src/reduce_action.h | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | #ifndef TREE_SITTER_REDUCE_ACTION_H_ | ||
| 2 | #define TREE_SITTER_REDUCE_ACTION_H_ | ||
| 3 | |||
| 4 | #ifdef __cplusplus | ||
| 5 | extern "C" { | ||
| 6 | #endif | ||
| 7 | |||
| 8 | #include "./array.h" | ||
| 9 | #include "tree_sitter/api.h" | ||
| 10 | |||
| 11 | typedef struct { | ||
| 12 | uint32_t count; | ||
| 13 | TSSymbol symbol; | ||
| 14 | int dynamic_precedence; | ||
| 15 | unsigned short production_id; | ||
| 16 | } ReduceAction; | ||
| 17 | |||
| 18 | typedef Array(ReduceAction) ReduceActionSet; | ||
| 19 | |||
| 20 | static inline void ts_reduce_action_set_add(ReduceActionSet *self, | ||
| 21 | ReduceAction new_action) { | ||
| 22 | for (uint32_t i = 0; i < self->size; i++) { | ||
| 23 | ReduceAction action = self->contents[i]; | ||
| 24 | if (action.symbol == new_action.symbol && action.count == new_action.count) | ||
| 25 | return; | ||
| 26 | } | ||
| 27 | array_push(self, new_action); | ||
| 28 | } | ||
| 29 | |||
| 30 | #ifdef __cplusplus | ||
| 31 | } | ||
| 32 | #endif | ||
| 33 | |||
| 34 | #endif // TREE_SITTER_REDUCE_ACTION_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/reusable_node.h b/vendor/tree-sitter/lib/src/reusable_node.h new file mode 100644 index 0000000..63fe3c1 --- /dev/null +++ b/vendor/tree-sitter/lib/src/reusable_node.h | |||
| @@ -0,0 +1,95 @@ | |||
| 1 | #include "./subtree.h" | ||
| 2 | |||
| 3 | typedef struct { | ||
| 4 | Subtree tree; | ||
| 5 | uint32_t child_index; | ||
| 6 | uint32_t byte_offset; | ||
| 7 | } StackEntry; | ||
| 8 | |||
| 9 | typedef struct { | ||
| 10 | Array(StackEntry) stack; | ||
| 11 | Subtree last_external_token; | ||
| 12 | } ReusableNode; | ||
| 13 | |||
| 14 | static inline ReusableNode reusable_node_new(void) { | ||
| 15 | return (ReusableNode) {array_new(), NULL_SUBTREE}; | ||
| 16 | } | ||
| 17 | |||
| 18 | static inline void reusable_node_clear(ReusableNode *self) { | ||
| 19 | array_clear(&self->stack); | ||
| 20 | self->last_external_token = NULL_SUBTREE; | ||
| 21 | } | ||
| 22 | |||
| 23 | static inline Subtree reusable_node_tree(ReusableNode *self) { | ||
| 24 | return self->stack.size > 0 | ||
| 25 | ? self->stack.contents[self->stack.size - 1].tree | ||
| 26 | : NULL_SUBTREE; | ||
| 27 | } | ||
| 28 | |||
| 29 | static inline uint32_t reusable_node_byte_offset(ReusableNode *self) { | ||
| 30 | return self->stack.size > 0 | ||
| 31 | ? self->stack.contents[self->stack.size - 1].byte_offset | ||
| 32 | : UINT32_MAX; | ||
| 33 | } | ||
| 34 | |||
| 35 | static inline void reusable_node_delete(ReusableNode *self) { | ||
| 36 | array_delete(&self->stack); | ||
| 37 | } | ||
| 38 | |||
| 39 | static inline void reusable_node_advance(ReusableNode *self) { | ||
| 40 | StackEntry last_entry = *array_back(&self->stack); | ||
| 41 | uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); | ||
| 42 | if (ts_subtree_has_external_tokens(last_entry.tree)) { | ||
| 43 | self->last_external_token = ts_subtree_last_external_token(last_entry.tree); | ||
| 44 | } | ||
| 45 | |||
| 46 | Subtree tree; | ||
| 47 | uint32_t next_index; | ||
| 48 | do { | ||
| 49 | StackEntry popped_entry = array_pop(&self->stack); | ||
| 50 | next_index = popped_entry.child_index + 1; | ||
| 51 | if (self->stack.size == 0) return; | ||
| 52 | tree = array_back(&self->stack)->tree; | ||
| 53 | } while (ts_subtree_child_count(tree) <= next_index); | ||
| 54 | |||
| 55 | array_push(&self->stack, ((StackEntry) { | ||
| 56 | .tree = ts_subtree_children(tree)[next_index], | ||
| 57 | .child_index = next_index, | ||
| 58 | .byte_offset = byte_offset, | ||
| 59 | })); | ||
| 60 | } | ||
| 61 | |||
| 62 | static inline bool reusable_node_descend(ReusableNode *self) { | ||
| 63 | StackEntry last_entry = *array_back(&self->stack); | ||
| 64 | if (ts_subtree_child_count(last_entry.tree) > 0) { | ||
| 65 | array_push(&self->stack, ((StackEntry) { | ||
| 66 | .tree = ts_subtree_children(last_entry.tree)[0], | ||
| 67 | .child_index = 0, | ||
| 68 | .byte_offset = last_entry.byte_offset, | ||
| 69 | })); | ||
| 70 | return true; | ||
| 71 | } else { | ||
| 72 | return false; | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | static inline void reusable_node_advance_past_leaf(ReusableNode *self) { | ||
| 77 | while (reusable_node_descend(self)) {} | ||
| 78 | reusable_node_advance(self); | ||
| 79 | } | ||
| 80 | |||
| 81 | static inline void reusable_node_reset(ReusableNode *self, Subtree tree) { | ||
| 82 | reusable_node_clear(self); | ||
| 83 | array_push(&self->stack, ((StackEntry) { | ||
| 84 | .tree = tree, | ||
| 85 | .child_index = 0, | ||
| 86 | .byte_offset = 0, | ||
| 87 | })); | ||
| 88 | |||
| 89 | // Never reuse the root node, because it has a non-standard internal structure | ||
| 90 | // due to transformations that are applied when it is accepted: adding the EOF | ||
| 91 | // child and any extra children. | ||
| 92 | if (!reusable_node_descend(self)) { | ||
| 93 | reusable_node_clear(self); | ||
| 94 | } | ||
| 95 | } | ||
diff --git a/vendor/tree-sitter/lib/src/stack.c b/vendor/tree-sitter/lib/src/stack.c new file mode 100644 index 0000000..3484635 --- /dev/null +++ b/vendor/tree-sitter/lib/src/stack.c | |||
| @@ -0,0 +1,897 @@ | |||
| 1 | #include "./alloc.h" | ||
| 2 | #include "./language.h" | ||
| 3 | #include "./subtree.h" | ||
| 4 | #include "./array.h" | ||
| 5 | #include "./stack.h" | ||
| 6 | #include "./length.h" | ||
| 7 | #include <assert.h> | ||
| 8 | #include <stdio.h> | ||
| 9 | |||
| 10 | #define MAX_LINK_COUNT 8 | ||
| 11 | #define MAX_NODE_POOL_SIZE 50 | ||
| 12 | #define MAX_ITERATOR_COUNT 64 | ||
| 13 | |||
| 14 | #if defined _WIN32 && !defined __GNUC__ | ||
| 15 | #define inline __forceinline | ||
| 16 | #else | ||
| 17 | #define inline static inline __attribute__((always_inline)) | ||
| 18 | #endif | ||
| 19 | |||
| 20 | typedef struct StackNode StackNode; | ||
| 21 | |||
| 22 | typedef struct { | ||
| 23 | StackNode *node; | ||
| 24 | Subtree subtree; | ||
| 25 | bool is_pending; | ||
| 26 | } StackLink; | ||
| 27 | |||
| 28 | struct StackNode { | ||
| 29 | TSStateId state; | ||
| 30 | Length position; | ||
| 31 | StackLink links[MAX_LINK_COUNT]; | ||
| 32 | short unsigned int link_count; | ||
| 33 | uint32_t ref_count; | ||
| 34 | unsigned error_cost; | ||
| 35 | unsigned node_count; | ||
| 36 | int dynamic_precedence; | ||
| 37 | }; | ||
| 38 | |||
| 39 | typedef struct { | ||
| 40 | StackNode *node; | ||
| 41 | SubtreeArray subtrees; | ||
| 42 | uint32_t subtree_count; | ||
| 43 | bool is_pending; | ||
| 44 | } StackIterator; | ||
| 45 | |||
| 46 | typedef Array(StackNode *) StackNodeArray; | ||
| 47 | |||
| 48 | typedef enum { | ||
| 49 | StackStatusActive, | ||
| 50 | StackStatusPaused, | ||
| 51 | StackStatusHalted, | ||
| 52 | } StackStatus; | ||
| 53 | |||
| 54 | typedef struct { | ||
| 55 | StackNode *node; | ||
| 56 | StackSummary *summary; | ||
| 57 | unsigned node_count_at_last_error; | ||
| 58 | Subtree last_external_token; | ||
| 59 | Subtree lookahead_when_paused; | ||
| 60 | StackStatus status; | ||
| 61 | } StackHead; | ||
| 62 | |||
| 63 | struct Stack { | ||
| 64 | Array(StackHead) heads; | ||
| 65 | StackSliceArray slices; | ||
| 66 | Array(StackIterator) iterators; | ||
| 67 | StackNodeArray node_pool; | ||
| 68 | StackNode *base_node; | ||
| 69 | SubtreePool *subtree_pool; | ||
| 70 | }; | ||
| 71 | |||
| 72 | typedef unsigned StackAction; | ||
| 73 | enum { | ||
| 74 | StackActionNone, | ||
| 75 | StackActionStop = 1, | ||
| 76 | StackActionPop = 2, | ||
| 77 | }; | ||
| 78 | |||
| 79 | typedef StackAction (*StackCallback)(void *, const StackIterator *); | ||
| 80 | |||
| 81 | static void stack_node_retain(StackNode *self) { | ||
| 82 | if (!self) | ||
| 83 | return; | ||
| 84 | assert(self->ref_count > 0); | ||
| 85 | self->ref_count++; | ||
| 86 | assert(self->ref_count != 0); | ||
| 87 | } | ||
| 88 | |||
| 89 | static void stack_node_release( | ||
| 90 | StackNode *self, | ||
| 91 | StackNodeArray *pool, | ||
| 92 | SubtreePool *subtree_pool | ||
| 93 | ) { | ||
| 94 | recur: | ||
| 95 | assert(self->ref_count != 0); | ||
| 96 | self->ref_count--; | ||
| 97 | if (self->ref_count > 0) return; | ||
| 98 | |||
| 99 | StackNode *first_predecessor = NULL; | ||
| 100 | if (self->link_count > 0) { | ||
| 101 | for (unsigned i = self->link_count - 1; i > 0; i--) { | ||
| 102 | StackLink link = self->links[i]; | ||
| 103 | if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); | ||
| 104 | stack_node_release(link.node, pool, subtree_pool); | ||
| 105 | } | ||
| 106 | StackLink link = self->links[0]; | ||
| 107 | if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); | ||
| 108 | first_predecessor = self->links[0].node; | ||
| 109 | } | ||
| 110 | |||
| 111 | if (pool->size < MAX_NODE_POOL_SIZE) { | ||
| 112 | array_push(pool, self); | ||
| 113 | } else { | ||
| 114 | ts_free(self); | ||
| 115 | } | ||
| 116 | |||
| 117 | if (first_predecessor) { | ||
| 118 | self = first_predecessor; | ||
| 119 | goto recur; | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Get the number of nodes in the subtree, for the purpose of measuring | ||
| 124 | /// how much progress has been made by a given version of the stack. | ||
| 125 | static uint32_t stack__subtree_node_count(Subtree subtree) { | ||
| 126 | uint32_t count = ts_subtree_visible_descendant_count(subtree); | ||
| 127 | if (ts_subtree_visible(subtree)) count++; | ||
| 128 | |||
| 129 | // Count intermediate error nodes even though they are not visible, | ||
| 130 | // because a stack version's node count is used to check whether it | ||
| 131 | // has made any progress since the last time it encountered an error. | ||
| 132 | if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++; | ||
| 133 | |||
| 134 | return count; | ||
| 135 | } | ||
| 136 | |||
| 137 | static StackNode *stack_node_new( | ||
| 138 | StackNode *previous_node, | ||
| 139 | Subtree subtree, | ||
| 140 | bool is_pending, | ||
| 141 | TSStateId state, | ||
| 142 | StackNodeArray *pool | ||
| 143 | ) { | ||
| 144 | StackNode *node = pool->size > 0 | ||
| 145 | ? array_pop(pool) | ||
| 146 | : ts_malloc(sizeof(StackNode)); | ||
| 147 | *node = (StackNode) { | ||
| 148 | .ref_count = 1, | ||
| 149 | .link_count = 0, | ||
| 150 | .state = state | ||
| 151 | }; | ||
| 152 | |||
| 153 | if (previous_node) { | ||
| 154 | node->link_count = 1; | ||
| 155 | node->links[0] = (StackLink) { | ||
| 156 | .node = previous_node, | ||
| 157 | .subtree = subtree, | ||
| 158 | .is_pending = is_pending, | ||
| 159 | }; | ||
| 160 | |||
| 161 | node->position = previous_node->position; | ||
| 162 | node->error_cost = previous_node->error_cost; | ||
| 163 | node->dynamic_precedence = previous_node->dynamic_precedence; | ||
| 164 | node->node_count = previous_node->node_count; | ||
| 165 | |||
| 166 | if (subtree.ptr) { | ||
| 167 | node->error_cost += ts_subtree_error_cost(subtree); | ||
| 168 | node->position = length_add(node->position, ts_subtree_total_size(subtree)); | ||
| 169 | node->node_count += stack__subtree_node_count(subtree); | ||
| 170 | node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree); | ||
| 171 | } | ||
| 172 | } else { | ||
| 173 | node->position = length_zero(); | ||
| 174 | node->error_cost = 0; | ||
| 175 | } | ||
| 176 | |||
| 177 | return node; | ||
| 178 | } | ||
| 179 | |||
| 180 | static bool stack__subtree_is_equivalent(Subtree left, Subtree right) { | ||
| 181 | if (left.ptr == right.ptr) return true; | ||
| 182 | if (!left.ptr || !right.ptr) return false; | ||
| 183 | |||
| 184 | // Symbols must match | ||
| 185 | if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false; | ||
| 186 | |||
| 187 | // If both have errors, don't bother keeping both. | ||
| 188 | if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true; | ||
| 189 | |||
| 190 | return ( | ||
| 191 | ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes && | ||
| 192 | ts_subtree_size(left).bytes == ts_subtree_size(right).bytes && | ||
| 193 | ts_subtree_child_count(left) == ts_subtree_child_count(right) && | ||
| 194 | ts_subtree_extra(left) == ts_subtree_extra(right) && | ||
| 195 | ts_subtree_external_scanner_state_eq(left, right) | ||
| 196 | ); | ||
| 197 | } | ||
| 198 | |||
| 199 | static void stack_node_add_link( | ||
| 200 | StackNode *self, | ||
| 201 | StackLink link, | ||
| 202 | SubtreePool *subtree_pool | ||
| 203 | ) { | ||
| 204 | if (link.node == self) return; | ||
| 205 | |||
| 206 | for (int i = 0; i < self->link_count; i++) { | ||
| 207 | StackLink *existing_link = &self->links[i]; | ||
| 208 | if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) { | ||
| 209 | // In general, we preserve ambiguities until they are removed from the stack | ||
| 210 | // during a pop operation where multiple paths lead to the same node. But in | ||
| 211 | // the special case where two links directly connect the same pair of nodes, | ||
| 212 | // we can safely remove the ambiguity ahead of time without changing behavior. | ||
| 213 | if (existing_link->node == link.node) { | ||
| 214 | if ( | ||
| 215 | ts_subtree_dynamic_precedence(link.subtree) > | ||
| 216 | ts_subtree_dynamic_precedence(existing_link->subtree) | ||
| 217 | ) { | ||
| 218 | ts_subtree_retain(link.subtree); | ||
| 219 | ts_subtree_release(subtree_pool, existing_link->subtree); | ||
| 220 | existing_link->subtree = link.subtree; | ||
| 221 | self->dynamic_precedence = | ||
| 222 | link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree); | ||
| 223 | } | ||
| 224 | return; | ||
| 225 | } | ||
| 226 | |||
| 227 | // If the previous nodes are mergeable, merge them recursively. | ||
| 228 | if ( | ||
| 229 | existing_link->node->state == link.node->state && | ||
| 230 | existing_link->node->position.bytes == link.node->position.bytes | ||
| 231 | ) { | ||
| 232 | for (int j = 0; j < link.node->link_count; j++) { | ||
| 233 | stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); | ||
| 234 | } | ||
| 235 | int32_t dynamic_precedence = link.node->dynamic_precedence; | ||
| 236 | if (link.subtree.ptr) { | ||
| 237 | dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); | ||
| 238 | } | ||
| 239 | if (dynamic_precedence > self->dynamic_precedence) { | ||
| 240 | self->dynamic_precedence = dynamic_precedence; | ||
| 241 | } | ||
| 242 | return; | ||
| 243 | } | ||
| 244 | } | ||
| 245 | } | ||
| 246 | |||
| 247 | if (self->link_count == MAX_LINK_COUNT) return; | ||
| 248 | |||
| 249 | stack_node_retain(link.node); | ||
| 250 | unsigned node_count = link.node->node_count; | ||
| 251 | int dynamic_precedence = link.node->dynamic_precedence; | ||
| 252 | self->links[self->link_count++] = link; | ||
| 253 | |||
| 254 | if (link.subtree.ptr) { | ||
| 255 | ts_subtree_retain(link.subtree); | ||
| 256 | node_count += stack__subtree_node_count(link.subtree); | ||
| 257 | dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); | ||
| 258 | } | ||
| 259 | |||
| 260 | if (node_count > self->node_count) self->node_count = node_count; | ||
| 261 | if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence; | ||
| 262 | } | ||
| 263 | |||
| 264 | static void stack_head_delete( | ||
| 265 | StackHead *self, | ||
| 266 | StackNodeArray *pool, | ||
| 267 | SubtreePool *subtree_pool | ||
| 268 | ) { | ||
| 269 | if (self->node) { | ||
| 270 | if (self->last_external_token.ptr) { | ||
| 271 | ts_subtree_release(subtree_pool, self->last_external_token); | ||
| 272 | } | ||
| 273 | if (self->lookahead_when_paused.ptr) { | ||
| 274 | ts_subtree_release(subtree_pool, self->lookahead_when_paused); | ||
| 275 | } | ||
| 276 | if (self->summary) { | ||
| 277 | array_delete(self->summary); | ||
| 278 | ts_free(self->summary); | ||
| 279 | } | ||
| 280 | stack_node_release(self->node, pool, subtree_pool); | ||
| 281 | } | ||
| 282 | } | ||
| 283 | |||
| 284 | static StackVersion ts_stack__add_version( | ||
| 285 | Stack *self, | ||
| 286 | StackVersion original_version, | ||
| 287 | StackNode *node | ||
| 288 | ) { | ||
| 289 | StackHead head = { | ||
| 290 | .node = node, | ||
| 291 | .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error, | ||
| 292 | .last_external_token = self->heads.contents[original_version].last_external_token, | ||
| 293 | .status = StackStatusActive, | ||
| 294 | .lookahead_when_paused = NULL_SUBTREE, | ||
| 295 | }; | ||
| 296 | array_push(&self->heads, head); | ||
| 297 | stack_node_retain(node); | ||
| 298 | if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token); | ||
| 299 | return (StackVersion)(self->heads.size - 1); | ||
| 300 | } | ||
| 301 | |||
| 302 | static void ts_stack__add_slice( | ||
| 303 | Stack *self, | ||
| 304 | StackVersion original_version, | ||
| 305 | StackNode *node, | ||
| 306 | SubtreeArray *subtrees | ||
| 307 | ) { | ||
| 308 | for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { | ||
| 309 | StackVersion version = self->slices.contents[i].version; | ||
| 310 | if (self->heads.contents[version].node == node) { | ||
| 311 | StackSlice slice = {*subtrees, version}; | ||
| 312 | array_insert(&self->slices, i + 1, slice); | ||
| 313 | return; | ||
| 314 | } | ||
| 315 | } | ||
| 316 | |||
| 317 | StackVersion version = ts_stack__add_version(self, original_version, node); | ||
| 318 | StackSlice slice = { *subtrees, version }; | ||
| 319 | array_push(&self->slices, slice); | ||
| 320 | } | ||
| 321 | |||
| 322 | static StackSliceArray stack__iter( | ||
| 323 | Stack *self, | ||
| 324 | StackVersion version, | ||
| 325 | StackCallback callback, | ||
| 326 | void *payload, | ||
| 327 | int goal_subtree_count | ||
| 328 | ) { | ||
| 329 | array_clear(&self->slices); | ||
| 330 | array_clear(&self->iterators); | ||
| 331 | |||
| 332 | StackHead *head = array_get(&self->heads, version); | ||
| 333 | StackIterator new_iterator = { | ||
| 334 | .node = head->node, | ||
| 335 | .subtrees = array_new(), | ||
| 336 | .subtree_count = 0, | ||
| 337 | .is_pending = true, | ||
| 338 | }; | ||
| 339 | |||
| 340 | bool include_subtrees = false; | ||
| 341 | if (goal_subtree_count >= 0) { | ||
| 342 | include_subtrees = true; | ||
| 343 | array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); | ||
| 344 | } | ||
| 345 | |||
| 346 | array_push(&self->iterators, new_iterator); | ||
| 347 | |||
| 348 | while (self->iterators.size > 0) { | ||
| 349 | for (uint32_t i = 0, size = self->iterators.size; i < size; i++) { | ||
| 350 | StackIterator *iterator = &self->iterators.contents[i]; | ||
| 351 | StackNode *node = iterator->node; | ||
| 352 | |||
| 353 | StackAction action = callback(payload, iterator); | ||
| 354 | bool should_pop = action & StackActionPop; | ||
| 355 | bool should_stop = action & StackActionStop || node->link_count == 0; | ||
| 356 | |||
| 357 | if (should_pop) { | ||
| 358 | SubtreeArray subtrees = iterator->subtrees; | ||
| 359 | if (!should_stop) { | ||
| 360 | ts_subtree_array_copy(subtrees, &subtrees); | ||
| 361 | } | ||
| 362 | ts_subtree_array_reverse(&subtrees); | ||
| 363 | ts_stack__add_slice( | ||
| 364 | self, | ||
| 365 | version, | ||
| 366 | node, | ||
| 367 | &subtrees | ||
| 368 | ); | ||
| 369 | } | ||
| 370 | |||
| 371 | if (should_stop) { | ||
| 372 | if (!should_pop) { | ||
| 373 | ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees); | ||
| 374 | } | ||
| 375 | array_erase(&self->iterators, i); | ||
| 376 | i--, size--; | ||
| 377 | continue; | ||
| 378 | } | ||
| 379 | |||
| 380 | for (uint32_t j = 1; j <= node->link_count; j++) { | ||
| 381 | StackIterator *next_iterator; | ||
| 382 | StackLink link; | ||
| 383 | if (j == node->link_count) { | ||
| 384 | link = node->links[0]; | ||
| 385 | next_iterator = &self->iterators.contents[i]; | ||
| 386 | } else { | ||
| 387 | if (self->iterators.size >= MAX_ITERATOR_COUNT) continue; | ||
| 388 | link = node->links[j]; | ||
| 389 | StackIterator current_iterator = self->iterators.contents[i]; | ||
| 390 | array_push(&self->iterators, current_iterator); | ||
| 391 | next_iterator = array_back(&self->iterators); | ||
| 392 | ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees); | ||
| 393 | } | ||
| 394 | |||
| 395 | next_iterator->node = link.node; | ||
| 396 | if (link.subtree.ptr) { | ||
| 397 | if (include_subtrees) { | ||
| 398 | array_push(&next_iterator->subtrees, link.subtree); | ||
| 399 | ts_subtree_retain(link.subtree); | ||
| 400 | } | ||
| 401 | |||
| 402 | if (!ts_subtree_extra(link.subtree)) { | ||
| 403 | next_iterator->subtree_count++; | ||
| 404 | if (!link.is_pending) { | ||
| 405 | next_iterator->is_pending = false; | ||
| 406 | } | ||
| 407 | } | ||
| 408 | } else { | ||
| 409 | next_iterator->subtree_count++; | ||
| 410 | next_iterator->is_pending = false; | ||
| 411 | } | ||
| 412 | } | ||
| 413 | } | ||
| 414 | } | ||
| 415 | |||
| 416 | return self->slices; | ||
| 417 | } | ||
| 418 | |||
| 419 | Stack *ts_stack_new(SubtreePool *subtree_pool) { | ||
| 420 | Stack *self = ts_calloc(1, sizeof(Stack)); | ||
| 421 | |||
| 422 | array_init(&self->heads); | ||
| 423 | array_init(&self->slices); | ||
| 424 | array_init(&self->iterators); | ||
| 425 | array_init(&self->node_pool); | ||
| 426 | array_reserve(&self->heads, 4); | ||
| 427 | array_reserve(&self->slices, 4); | ||
| 428 | array_reserve(&self->iterators, 4); | ||
| 429 | array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE); | ||
| 430 | |||
| 431 | self->subtree_pool = subtree_pool; | ||
| 432 | self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool); | ||
| 433 | ts_stack_clear(self); | ||
| 434 | |||
| 435 | return self; | ||
| 436 | } | ||
| 437 | |||
| 438 | void ts_stack_delete(Stack *self) { | ||
| 439 | if (self->slices.contents) | ||
| 440 | array_delete(&self->slices); | ||
| 441 | if (self->iterators.contents) | ||
| 442 | array_delete(&self->iterators); | ||
| 443 | stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); | ||
| 444 | for (uint32_t i = 0; i < self->heads.size; i++) { | ||
| 445 | stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); | ||
| 446 | } | ||
| 447 | array_clear(&self->heads); | ||
| 448 | if (self->node_pool.contents) { | ||
| 449 | for (uint32_t i = 0; i < self->node_pool.size; i++) | ||
| 450 | ts_free(self->node_pool.contents[i]); | ||
| 451 | array_delete(&self->node_pool); | ||
| 452 | } | ||
| 453 | array_delete(&self->heads); | ||
| 454 | ts_free(self); | ||
| 455 | } | ||
| 456 | |||
| 457 | uint32_t ts_stack_version_count(const Stack *self) { | ||
| 458 | return self->heads.size; | ||
| 459 | } | ||
| 460 | |||
| 461 | TSStateId ts_stack_state(const Stack *self, StackVersion version) { | ||
| 462 | return array_get(&self->heads, version)->node->state; | ||
| 463 | } | ||
| 464 | |||
| 465 | Length ts_stack_position(const Stack *self, StackVersion version) { | ||
| 466 | return array_get(&self->heads, version)->node->position; | ||
| 467 | } | ||
| 468 | |||
| 469 | Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) { | ||
| 470 | return array_get(&self->heads, version)->last_external_token; | ||
| 471 | } | ||
| 472 | |||
| 473 | void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) { | ||
| 474 | StackHead *head = array_get(&self->heads, version); | ||
| 475 | if (token.ptr) ts_subtree_retain(token); | ||
| 476 | if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token); | ||
| 477 | head->last_external_token = token; | ||
| 478 | } | ||
| 479 | |||
| 480 | unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { | ||
| 481 | StackHead *head = array_get(&self->heads, version); | ||
| 482 | unsigned result = head->node->error_cost; | ||
| 483 | if ( | ||
| 484 | head->status == StackStatusPaused || | ||
| 485 | (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) { | ||
| 486 | result += ERROR_COST_PER_RECOVERY; | ||
| 487 | } | ||
| 488 | return result; | ||
| 489 | } | ||
| 490 | |||
| 491 | unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { | ||
| 492 | StackHead *head = array_get(&self->heads, version); | ||
| 493 | if (head->node->node_count < head->node_count_at_last_error) { | ||
| 494 | head->node_count_at_last_error = head->node->node_count; | ||
| 495 | } | ||
| 496 | return head->node->node_count - head->node_count_at_last_error; | ||
| 497 | } | ||
| 498 | |||
| 499 | void ts_stack_push( | ||
| 500 | Stack *self, | ||
| 501 | StackVersion version, | ||
| 502 | Subtree subtree, | ||
| 503 | bool pending, | ||
| 504 | TSStateId state | ||
| 505 | ) { | ||
| 506 | StackHead *head = array_get(&self->heads, version); | ||
| 507 | StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); | ||
| 508 | if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count; | ||
| 509 | head->node = new_node; | ||
| 510 | } | ||
| 511 | |||
| 512 | inline StackAction pop_count_callback(void *payload, const StackIterator *iterator) { | ||
| 513 | unsigned *goal_subtree_count = payload; | ||
| 514 | if (iterator->subtree_count == *goal_subtree_count) { | ||
| 515 | return StackActionPop | StackActionStop; | ||
| 516 | } else { | ||
| 517 | return StackActionNone; | ||
| 518 | } | ||
| 519 | } | ||
| 520 | |||
| 521 | StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) { | ||
| 522 | return stack__iter(self, version, pop_count_callback, &count, (int)count); | ||
| 523 | } | ||
| 524 | |||
| 525 | inline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) { | ||
| 526 | (void)payload; | ||
| 527 | if (iterator->subtree_count >= 1) { | ||
| 528 | if (iterator->is_pending) { | ||
| 529 | return StackActionPop | StackActionStop; | ||
| 530 | } else { | ||
| 531 | return StackActionStop; | ||
| 532 | } | ||
| 533 | } else { | ||
| 534 | return StackActionNone; | ||
| 535 | } | ||
| 536 | } | ||
| 537 | |||
| 538 | StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) { | ||
| 539 | StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0); | ||
| 540 | if (pop.size > 0) { | ||
| 541 | ts_stack_renumber_version(self, pop.contents[0].version, version); | ||
| 542 | pop.contents[0].version = version; | ||
| 543 | } | ||
| 544 | return pop; | ||
| 545 | } | ||
| 546 | |||
| 547 | inline StackAction pop_error_callback(void *payload, const StackIterator *iterator) { | ||
| 548 | if (iterator->subtrees.size > 0) { | ||
| 549 | bool *found_error = payload; | ||
| 550 | if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) { | ||
| 551 | *found_error = true; | ||
| 552 | return StackActionPop | StackActionStop; | ||
| 553 | } else { | ||
| 554 | return StackActionStop; | ||
| 555 | } | ||
| 556 | } else { | ||
| 557 | return StackActionNone; | ||
| 558 | } | ||
| 559 | } | ||
| 560 | |||
| 561 | SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) { | ||
| 562 | StackNode *node = array_get(&self->heads, version)->node; | ||
| 563 | for (unsigned i = 0; i < node->link_count; i++) { | ||
| 564 | if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) { | ||
| 565 | bool found_error = false; | ||
| 566 | StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1); | ||
| 567 | if (pop.size > 0) { | ||
| 568 | assert(pop.size == 1); | ||
| 569 | ts_stack_renumber_version(self, pop.contents[0].version, version); | ||
| 570 | return pop.contents[0].subtrees; | ||
| 571 | } | ||
| 572 | break; | ||
| 573 | } | ||
| 574 | } | ||
| 575 | return (SubtreeArray) {.size = 0}; | ||
| 576 | } | ||
| 577 | |||
| 578 | inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { | ||
| 579 | (void)payload; | ||
| 580 | return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; | ||
| 581 | } | ||
| 582 | |||
| 583 | StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) { | ||
| 584 | return stack__iter(self, version, pop_all_callback, NULL, 0); | ||
| 585 | } | ||
| 586 | |||
| 587 | typedef struct { | ||
| 588 | StackSummary *summary; | ||
| 589 | unsigned max_depth; | ||
| 590 | } SummarizeStackSession; | ||
| 591 | |||
| 592 | inline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { | ||
| 593 | SummarizeStackSession *session = payload; | ||
| 594 | TSStateId state = iterator->node->state; | ||
| 595 | unsigned depth = iterator->subtree_count; | ||
| 596 | if (depth > session->max_depth) return StackActionStop; | ||
| 597 | for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { | ||
| 598 | StackSummaryEntry entry = session->summary->contents[i]; | ||
| 599 | if (entry.depth < depth) break; | ||
| 600 | if (entry.depth == depth && entry.state == state) return StackActionNone; | ||
| 601 | } | ||
| 602 | array_push(session->summary, ((StackSummaryEntry) { | ||
| 603 | .position = iterator->node->position, | ||
| 604 | .depth = depth, | ||
| 605 | .state = state, | ||
| 606 | })); | ||
| 607 | return StackActionNone; | ||
| 608 | } | ||
| 609 | |||
| 610 | void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) { | ||
| 611 | SummarizeStackSession session = { | ||
| 612 | .summary = ts_malloc(sizeof(StackSummary)), | ||
| 613 | .max_depth = max_depth | ||
| 614 | }; | ||
| 615 | array_init(session.summary); | ||
| 616 | stack__iter(self, version, summarize_stack_callback, &session, -1); | ||
| 617 | StackHead *head = &self->heads.contents[version]; | ||
| 618 | if (head->summary) { | ||
| 619 | array_delete(head->summary); | ||
| 620 | ts_free(head->summary); | ||
| 621 | } | ||
| 622 | head->summary = session.summary; | ||
| 623 | } | ||
| 624 | |||
| 625 | StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { | ||
| 626 | return array_get(&self->heads, version)->summary; | ||
| 627 | } | ||
| 628 | |||
| 629 | int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { | ||
| 630 | return array_get(&self->heads, version)->node->dynamic_precedence; | ||
| 631 | } | ||
| 632 | |||
| 633 | bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) { | ||
| 634 | const StackHead *head = array_get(&self->heads, version); | ||
| 635 | const StackNode *node = head->node; | ||
| 636 | if (node->error_cost == 0) return true; | ||
| 637 | while (node) { | ||
| 638 | if (node->link_count > 0) { | ||
| 639 | Subtree subtree = node->links[0].subtree; | ||
| 640 | if (subtree.ptr) { | ||
| 641 | if (ts_subtree_total_bytes(subtree) > 0) { | ||
| 642 | return true; | ||
| 643 | } else if ( | ||
| 644 | node->node_count > head->node_count_at_last_error && | ||
| 645 | ts_subtree_error_cost(subtree) == 0 | ||
| 646 | ) { | ||
| 647 | node = node->links[0].node; | ||
| 648 | continue; | ||
| 649 | } | ||
| 650 | } | ||
| 651 | } | ||
| 652 | break; | ||
| 653 | } | ||
| 654 | return false; | ||
| 655 | } | ||
| 656 | |||
| 657 | void ts_stack_remove_version(Stack *self, StackVersion version) { | ||
| 658 | stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool); | ||
| 659 | array_erase(&self->heads, version); | ||
| 660 | } | ||
| 661 | |||
| 662 | void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { | ||
| 663 | if (v1 == v2) return; | ||
| 664 | assert(v2 < v1); | ||
| 665 | assert((uint32_t)v1 < self->heads.size); | ||
| 666 | StackHead *source_head = &self->heads.contents[v1]; | ||
| 667 | StackHead *target_head = &self->heads.contents[v2]; | ||
| 668 | if (target_head->summary && !source_head->summary) { | ||
| 669 | source_head->summary = target_head->summary; | ||
| 670 | target_head->summary = NULL; | ||
| 671 | } | ||
| 672 | stack_head_delete(target_head, &self->node_pool, self->subtree_pool); | ||
| 673 | *target_head = *source_head; | ||
| 674 | array_erase(&self->heads, v1); | ||
| 675 | } | ||
| 676 | |||
| 677 | void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) { | ||
| 678 | StackHead temporary_head = self->heads.contents[v1]; | ||
| 679 | self->heads.contents[v1] = self->heads.contents[v2]; | ||
| 680 | self->heads.contents[v2] = temporary_head; | ||
| 681 | } | ||
| 682 | |||
| 683 | StackVersion ts_stack_copy_version(Stack *self, StackVersion version) { | ||
| 684 | assert(version < self->heads.size); | ||
| 685 | array_push(&self->heads, self->heads.contents[version]); | ||
| 686 | StackHead *head = array_back(&self->heads); | ||
| 687 | stack_node_retain(head->node); | ||
| 688 | if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token); | ||
| 689 | head->summary = NULL; | ||
| 690 | return self->heads.size - 1; | ||
| 691 | } | ||
| 692 | |||
| 693 | bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) { | ||
| 694 | if (!ts_stack_can_merge(self, version1, version2)) return false; | ||
| 695 | StackHead *head1 = &self->heads.contents[version1]; | ||
| 696 | StackHead *head2 = &self->heads.contents[version2]; | ||
| 697 | for (uint32_t i = 0; i < head2->node->link_count; i++) { | ||
| 698 | stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); | ||
| 699 | } | ||
| 700 | if (head1->node->state == ERROR_STATE) { | ||
| 701 | head1->node_count_at_last_error = head1->node->node_count; | ||
| 702 | } | ||
| 703 | ts_stack_remove_version(self, version2); | ||
| 704 | return true; | ||
| 705 | } | ||
| 706 | |||
| 707 | bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) { | ||
| 708 | StackHead *head1 = &self->heads.contents[version1]; | ||
| 709 | StackHead *head2 = &self->heads.contents[version2]; | ||
| 710 | return | ||
| 711 | head1->status == StackStatusActive && | ||
| 712 | head2->status == StackStatusActive && | ||
| 713 | head1->node->state == head2->node->state && | ||
| 714 | head1->node->position.bytes == head2->node->position.bytes && | ||
| 715 | head1->node->error_cost == head2->node->error_cost && | ||
| 716 | ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token); | ||
| 717 | } | ||
| 718 | |||
| 719 | void ts_stack_halt(Stack *self, StackVersion version) { | ||
| 720 | array_get(&self->heads, version)->status = StackStatusHalted; | ||
| 721 | } | ||
| 722 | |||
| 723 | void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) { | ||
| 724 | StackHead *head = array_get(&self->heads, version); | ||
| 725 | head->status = StackStatusPaused; | ||
| 726 | head->lookahead_when_paused = lookahead; | ||
| 727 | head->node_count_at_last_error = head->node->node_count; | ||
| 728 | } | ||
| 729 | |||
| 730 | bool ts_stack_is_active(const Stack *self, StackVersion version) { | ||
| 731 | return array_get(&self->heads, version)->status == StackStatusActive; | ||
| 732 | } | ||
| 733 | |||
| 734 | bool ts_stack_is_halted(const Stack *self, StackVersion version) { | ||
| 735 | return array_get(&self->heads, version)->status == StackStatusHalted; | ||
| 736 | } | ||
| 737 | |||
| 738 | bool ts_stack_is_paused(const Stack *self, StackVersion version) { | ||
| 739 | return array_get(&self->heads, version)->status == StackStatusPaused; | ||
| 740 | } | ||
| 741 | |||
| 742 | Subtree ts_stack_resume(Stack *self, StackVersion version) { | ||
| 743 | StackHead *head = array_get(&self->heads, version); | ||
| 744 | assert(head->status == StackStatusPaused); | ||
| 745 | Subtree result = head->lookahead_when_paused; | ||
| 746 | head->status = StackStatusActive; | ||
| 747 | head->lookahead_when_paused = NULL_SUBTREE; | ||
| 748 | return result; | ||
| 749 | } | ||
| 750 | |||
| 751 | void ts_stack_clear(Stack *self) { | ||
| 752 | stack_node_retain(self->base_node); | ||
| 753 | for (uint32_t i = 0; i < self->heads.size; i++) { | ||
| 754 | stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); | ||
| 755 | } | ||
| 756 | array_clear(&self->heads); | ||
| 757 | array_push(&self->heads, ((StackHead) { | ||
| 758 | .node = self->base_node, | ||
| 759 | .status = StackStatusActive, | ||
| 760 | .last_external_token = NULL_SUBTREE, | ||
| 761 | .lookahead_when_paused = NULL_SUBTREE, | ||
| 762 | })); | ||
| 763 | } | ||
| 764 | |||
| 765 | bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) { | ||
| 766 | array_reserve(&self->iterators, 32); | ||
| 767 | if (!f) f = stderr; | ||
| 768 | |||
| 769 | fprintf(f, "digraph stack {\n"); | ||
| 770 | fprintf(f, "rankdir=\"RL\";\n"); | ||
| 771 | fprintf(f, "edge [arrowhead=none]\n"); | ||
| 772 | |||
| 773 | Array(StackNode *) visited_nodes = array_new(); | ||
| 774 | |||
| 775 | array_clear(&self->iterators); | ||
| 776 | for (uint32_t i = 0; i < self->heads.size; i++) { | ||
| 777 | StackHead *head = &self->heads.contents[i]; | ||
| 778 | if (head->status == StackStatusHalted) continue; | ||
| 779 | |||
| 780 | fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); | ||
| 781 | fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node); | ||
| 782 | |||
| 783 | if (head->status == StackStatusPaused) { | ||
| 784 | fprintf(f, "color=red "); | ||
| 785 | } | ||
| 786 | fprintf(f, | ||
| 787 | "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u", | ||
| 788 | i, | ||
| 789 | ts_stack_node_count_since_error(self, i), | ||
| 790 | ts_stack_error_cost(self, i) | ||
| 791 | ); | ||
| 792 | |||
| 793 | if (head->summary) { | ||
| 794 | fprintf(f, "\nsummary:"); | ||
| 795 | for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state); | ||
| 796 | } | ||
| 797 | |||
| 798 | if (head->last_external_token.ptr) { | ||
| 799 | const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state; | ||
| 800 | const char *data = ts_external_scanner_state_data(state); | ||
| 801 | fprintf(f, "\nexternal_scanner_state:"); | ||
| 802 | for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]); | ||
| 803 | } | ||
| 804 | |||
| 805 | fprintf(f, "\"]\n"); | ||
| 806 | array_push(&self->iterators, ((StackIterator) { | ||
| 807 | .node = head->node | ||
| 808 | })); | ||
| 809 | } | ||
| 810 | |||
| 811 | bool all_iterators_done = false; | ||
| 812 | while (!all_iterators_done) { | ||
| 813 | all_iterators_done = true; | ||
| 814 | |||
| 815 | for (uint32_t i = 0; i < self->iterators.size; i++) { | ||
| 816 | StackIterator iterator = self->iterators.contents[i]; | ||
| 817 | StackNode *node = iterator.node; | ||
| 818 | |||
| 819 | for (uint32_t j = 0; j < visited_nodes.size; j++) { | ||
| 820 | if (visited_nodes.contents[j] == node) { | ||
| 821 | node = NULL; | ||
| 822 | break; | ||
| 823 | } | ||
| 824 | } | ||
| 825 | |||
| 826 | if (!node) continue; | ||
| 827 | all_iterators_done = false; | ||
| 828 | |||
| 829 | fprintf(f, "node_%p [", (void *)node); | ||
| 830 | if (node->state == ERROR_STATE) { | ||
| 831 | fprintf(f, "label=\"?\""); | ||
| 832 | } else if ( | ||
| 833 | node->link_count == 1 && | ||
| 834 | node->links[0].subtree.ptr && | ||
| 835 | ts_subtree_extra(node->links[0].subtree) | ||
| 836 | ) { | ||
| 837 | fprintf(f, "shape=point margin=0 label=\"\""); | ||
| 838 | } else { | ||
| 839 | fprintf(f, "label=\"%d\"", node->state); | ||
| 840 | } | ||
| 841 | |||
| 842 | fprintf( | ||
| 843 | f, | ||
| 844 | " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n", | ||
| 845 | node->position.extent.row + 1, | ||
| 846 | node->position.extent.column, | ||
| 847 | node->node_count, | ||
| 848 | node->error_cost, | ||
| 849 | node->dynamic_precedence | ||
| 850 | ); | ||
| 851 | |||
| 852 | for (int j = 0; j < node->link_count; j++) { | ||
| 853 | StackLink link = node->links[j]; | ||
| 854 | fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node); | ||
| 855 | if (link.is_pending) fprintf(f, "style=dashed "); | ||
| 856 | if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray "); | ||
| 857 | |||
| 858 | if (!link.subtree.ptr) { | ||
| 859 | fprintf(f, "color=red"); | ||
| 860 | } else { | ||
| 861 | fprintf(f, "label=\""); | ||
| 862 | bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree); | ||
| 863 | if (quoted) fprintf(f, "'"); | ||
| 864 | ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree)); | ||
| 865 | if (quoted) fprintf(f, "'"); | ||
| 866 | fprintf(f, "\""); | ||
| 867 | fprintf( | ||
| 868 | f, | ||
| 869 | "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"", | ||
| 870 | ts_subtree_error_cost(link.subtree), | ||
| 871 | ts_subtree_dynamic_precedence(link.subtree) | ||
| 872 | ); | ||
| 873 | } | ||
| 874 | |||
| 875 | fprintf(f, "];\n"); | ||
| 876 | |||
| 877 | StackIterator *next_iterator; | ||
| 878 | if (j == 0) { | ||
| 879 | next_iterator = &self->iterators.contents[i]; | ||
| 880 | } else { | ||
| 881 | array_push(&self->iterators, iterator); | ||
| 882 | next_iterator = array_back(&self->iterators); | ||
| 883 | } | ||
| 884 | next_iterator->node = link.node; | ||
| 885 | } | ||
| 886 | |||
| 887 | array_push(&visited_nodes, node); | ||
| 888 | } | ||
| 889 | } | ||
| 890 | |||
| 891 | fprintf(f, "}\n"); | ||
| 892 | |||
| 893 | array_delete(&visited_nodes); | ||
| 894 | return true; | ||
| 895 | } | ||
| 896 | |||
| 897 | #undef inline | ||
diff --git a/vendor/tree-sitter/lib/src/stack.h b/vendor/tree-sitter/lib/src/stack.h new file mode 100644 index 0000000..86abbc9 --- /dev/null +++ b/vendor/tree-sitter/lib/src/stack.h | |||
| @@ -0,0 +1,133 @@ | |||
| 1 | #ifndef TREE_SITTER_PARSE_STACK_H_ | ||
| 2 | #define TREE_SITTER_PARSE_STACK_H_ | ||
| 3 | |||
| 4 | #ifdef __cplusplus | ||
| 5 | extern "C" { | ||
| 6 | #endif | ||
| 7 | |||
| 8 | #include "./array.h" | ||
| 9 | #include "./subtree.h" | ||
| 10 | #include "./error_costs.h" | ||
| 11 | #include <stdio.h> | ||
| 12 | |||
| 13 | typedef struct Stack Stack; | ||
| 14 | |||
| 15 | typedef unsigned StackVersion; | ||
| 16 | #define STACK_VERSION_NONE ((StackVersion)-1) | ||
| 17 | |||
| 18 | typedef struct { | ||
| 19 | SubtreeArray subtrees; | ||
| 20 | StackVersion version; | ||
| 21 | } StackSlice; | ||
| 22 | typedef Array(StackSlice) StackSliceArray; | ||
| 23 | |||
| 24 | typedef struct { | ||
| 25 | Length position; | ||
| 26 | unsigned depth; | ||
| 27 | TSStateId state; | ||
| 28 | } StackSummaryEntry; | ||
| 29 | typedef Array(StackSummaryEntry) StackSummary; | ||
| 30 | |||
| 31 | // Create a stack. | ||
| 32 | Stack *ts_stack_new(SubtreePool *); | ||
| 33 | |||
| 34 | // Release the memory reserved for a given stack. | ||
| 35 | void ts_stack_delete(Stack *); | ||
| 36 | |||
| 37 | // Get the stack's current number of versions. | ||
| 38 | uint32_t ts_stack_version_count(const Stack *); | ||
| 39 | |||
| 40 | // Get the state at the top of the given version of the stack. If the stack is | ||
| 41 | // empty, this returns the initial state, 0. | ||
| 42 | TSStateId ts_stack_state(const Stack *, StackVersion); | ||
| 43 | |||
| 44 | // Get the last external token associated with a given version of the stack. | ||
| 45 | Subtree ts_stack_last_external_token(const Stack *, StackVersion); | ||
| 46 | |||
| 47 | // Set the last external token associated with a given version of the stack. | ||
| 48 | void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree ); | ||
| 49 | |||
| 50 | // Get the position of the given version of the stack within the document. | ||
| 51 | Length ts_stack_position(const Stack *, StackVersion); | ||
| 52 | |||
| 53 | // Push a tree and state onto the given version of the stack. | ||
| 54 | // | ||
| 55 | // This transfers ownership of the tree to the Stack. Callers that | ||
| 56 | // need to retain ownership of the tree for their own purposes should | ||
| 57 | // first retain the tree. | ||
| 58 | void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId); | ||
| 59 | |||
| 60 | // Pop the given number of entries from the given version of the stack. This | ||
| 61 | // operation can increase the number of stack versions by revealing multiple | ||
| 62 | // versions which had previously been merged. It returns an array that | ||
| 63 | // specifies the index of each revealed version and the trees that were | ||
| 64 | // removed from that version. | ||
| 65 | StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count); | ||
| 66 | |||
| 67 | // Remove an error at the top of the given version of the stack. | ||
| 68 | SubtreeArray ts_stack_pop_error(Stack *, StackVersion); | ||
| 69 | |||
| 70 | // Remove any pending trees from the top of the given version of the stack. | ||
| 71 | StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); | ||
| 72 | |||
| 73 | // Remove any all trees from the given version of the stack. | ||
| 74 | StackSliceArray ts_stack_pop_all(Stack *, StackVersion); | ||
| 75 | |||
| 76 | // Get the maximum number of tree nodes reachable from this version of the stack | ||
| 77 | // since the last error was detected. | ||
| 78 | unsigned ts_stack_node_count_since_error(const Stack *, StackVersion); | ||
| 79 | |||
| 80 | int ts_stack_dynamic_precedence(Stack *, StackVersion); | ||
| 81 | |||
| 82 | bool ts_stack_has_advanced_since_error(const Stack *, StackVersion); | ||
| 83 | |||
| 84 | // Compute a summary of all the parse states near the top of the given | ||
| 85 | // version of the stack and store the summary for later retrieval. | ||
| 86 | void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth); | ||
| 87 | |||
| 88 | // Retrieve a summary of all the parse states near the top of the | ||
| 89 | // given version of the stack. | ||
| 90 | StackSummary *ts_stack_get_summary(Stack *, StackVersion); | ||
| 91 | |||
| 92 | // Get the total cost of all errors on the given version of the stack. | ||
| 93 | unsigned ts_stack_error_cost(const Stack *, StackVersion version); | ||
| 94 | |||
| 95 | // Merge the given two stack versions if possible, returning true | ||
| 96 | // if they were successfully merged and false otherwise. | ||
| 97 | bool ts_stack_merge(Stack *, StackVersion, StackVersion); | ||
| 98 | |||
| 99 | // Determine whether the given two stack versions can be merged. | ||
| 100 | bool ts_stack_can_merge(Stack *, StackVersion, StackVersion); | ||
| 101 | |||
| 102 | Subtree ts_stack_resume(Stack *, StackVersion); | ||
| 103 | |||
| 104 | void ts_stack_pause(Stack *, StackVersion, Subtree); | ||
| 105 | |||
| 106 | void ts_stack_halt(Stack *, StackVersion); | ||
| 107 | |||
| 108 | bool ts_stack_is_active(const Stack *, StackVersion); | ||
| 109 | |||
| 110 | bool ts_stack_is_paused(const Stack *, StackVersion); | ||
| 111 | |||
| 112 | bool ts_stack_is_halted(const Stack *, StackVersion); | ||
| 113 | |||
| 114 | void ts_stack_renumber_version(Stack *, StackVersion, StackVersion); | ||
| 115 | |||
| 116 | void ts_stack_swap_versions(Stack *, StackVersion, StackVersion); | ||
| 117 | |||
| 118 | StackVersion ts_stack_copy_version(Stack *, StackVersion); | ||
| 119 | |||
| 120 | // Remove the given version from the stack. | ||
| 121 | void ts_stack_remove_version(Stack *, StackVersion); | ||
| 122 | |||
| 123 | void ts_stack_clear(Stack *); | ||
| 124 | |||
| 125 | bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *); | ||
| 126 | |||
| 127 | typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t); | ||
| 128 | |||
| 129 | #ifdef __cplusplus | ||
| 130 | } | ||
| 131 | #endif | ||
| 132 | |||
| 133 | #endif // TREE_SITTER_PARSE_STACK_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/subtree.c b/vendor/tree-sitter/lib/src/subtree.c new file mode 100644 index 0000000..51bc2ef --- /dev/null +++ b/vendor/tree-sitter/lib/src/subtree.c | |||
| @@ -0,0 +1,1039 @@ | |||
| 1 | #include <assert.h> | ||
| 2 | #include <ctype.h> | ||
| 3 | #include <limits.h> | ||
| 4 | #include <stdbool.h> | ||
| 5 | #include <string.h> | ||
| 6 | #include <stdio.h> | ||
| 7 | #include "./alloc.h" | ||
| 8 | #include "./atomic.h" | ||
| 9 | #include "./subtree.h" | ||
| 10 | #include "./length.h" | ||
| 11 | #include "./language.h" | ||
| 12 | #include "./error_costs.h" | ||
| 13 | #include <stddef.h> | ||
| 14 | |||
| 15 | typedef struct { | ||
| 16 | Length start; | ||
| 17 | Length old_end; | ||
| 18 | Length new_end; | ||
| 19 | } Edit; | ||
| 20 | |||
| 21 | #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX | ||
| 22 | #define TS_MAX_TREE_POOL_SIZE 32 | ||
| 23 | |||
| 24 | // ExternalScannerState | ||
| 25 | |||
| 26 | void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) { | ||
| 27 | self->length = length; | ||
| 28 | if (length > sizeof(self->short_data)) { | ||
| 29 | self->long_data = ts_malloc(length); | ||
| 30 | memcpy(self->long_data, data, length); | ||
| 31 | } else { | ||
| 32 | memcpy(self->short_data, data, length); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) { | ||
| 37 | ExternalScannerState result = *self; | ||
| 38 | if (self->length > sizeof(self->short_data)) { | ||
| 39 | result.long_data = ts_malloc(self->length); | ||
| 40 | memcpy(result.long_data, self->long_data, self->length); | ||
| 41 | } | ||
| 42 | return result; | ||
| 43 | } | ||
| 44 | |||
| 45 | void ts_external_scanner_state_delete(ExternalScannerState *self) { | ||
| 46 | if (self->length > sizeof(self->short_data)) { | ||
| 47 | ts_free(self->long_data); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | const char *ts_external_scanner_state_data(const ExternalScannerState *self) { | ||
| 52 | if (self->length > sizeof(self->short_data)) { | ||
| 53 | return self->long_data; | ||
| 54 | } else { | ||
| 55 | return self->short_data; | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) { | ||
| 60 | return | ||
| 61 | self->length == length && | ||
| 62 | memcmp(ts_external_scanner_state_data(self), buffer, length) == 0; | ||
| 63 | } | ||
| 64 | |||
| 65 | // SubtreeArray | ||
| 66 | |||
| 67 | void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) { | ||
| 68 | dest->size = self.size; | ||
| 69 | dest->capacity = self.capacity; | ||
| 70 | dest->contents = self.contents; | ||
| 71 | if (self.capacity > 0) { | ||
| 72 | dest->contents = ts_calloc(self.capacity, sizeof(Subtree)); | ||
| 73 | memcpy(dest->contents, self.contents, self.size * sizeof(Subtree)); | ||
| 74 | for (uint32_t i = 0; i < self.size; i++) { | ||
| 75 | ts_subtree_retain(dest->contents[i]); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) { | ||
| 81 | for (uint32_t i = 0; i < self->size; i++) { | ||
| 82 | ts_subtree_release(pool, self->contents[i]); | ||
| 83 | } | ||
| 84 | array_clear(self); | ||
| 85 | } | ||
| 86 | |||
| 87 | void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { | ||
| 88 | ts_subtree_array_clear(pool, self); | ||
| 89 | array_delete(self); | ||
| 90 | } | ||
| 91 | |||
| 92 | void ts_subtree_array_remove_trailing_extras( | ||
| 93 | SubtreeArray *self, | ||
| 94 | SubtreeArray *destination | ||
| 95 | ) { | ||
| 96 | array_clear(destination); | ||
| 97 | while (self->size > 0) { | ||
| 98 | Subtree last = self->contents[self->size - 1]; | ||
| 99 | if (ts_subtree_extra(last)) { | ||
| 100 | self->size--; | ||
| 101 | array_push(destination, last); | ||
| 102 | } else { | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | ts_subtree_array_reverse(destination); | ||
| 107 | } | ||
| 108 | |||
| 109 | void ts_subtree_array_reverse(SubtreeArray *self) { | ||
| 110 | for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) { | ||
| 111 | size_t reverse_index = self->size - 1 - i; | ||
| 112 | Subtree swap = self->contents[i]; | ||
| 113 | self->contents[i] = self->contents[reverse_index]; | ||
| 114 | self->contents[reverse_index] = swap; | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | // SubtreePool | ||
| 119 | |||
| 120 | SubtreePool ts_subtree_pool_new(uint32_t capacity) { | ||
| 121 | SubtreePool self = {array_new(), array_new()}; | ||
| 122 | array_reserve(&self.free_trees, capacity); | ||
| 123 | return self; | ||
| 124 | } | ||
| 125 | |||
| 126 | void ts_subtree_pool_delete(SubtreePool *self) { | ||
| 127 | if (self->free_trees.contents) { | ||
| 128 | for (unsigned i = 0; i < self->free_trees.size; i++) { | ||
| 129 | ts_free(self->free_trees.contents[i].ptr); | ||
| 130 | } | ||
| 131 | array_delete(&self->free_trees); | ||
| 132 | } | ||
| 133 | if (self->tree_stack.contents) array_delete(&self->tree_stack); | ||
| 134 | } | ||
| 135 | |||
| 136 | static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) { | ||
| 137 | if (self->free_trees.size > 0) { | ||
| 138 | return array_pop(&self->free_trees).ptr; | ||
| 139 | } else { | ||
| 140 | return ts_malloc(sizeof(SubtreeHeapData)); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) { | ||
| 145 | if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) { | ||
| 146 | array_push(&self->free_trees, (MutableSubtree) {.ptr = tree}); | ||
| 147 | } else { | ||
| 148 | ts_free(tree); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | // Subtree | ||
| 153 | |||
| 154 | static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) { | ||
| 155 | return | ||
| 156 | padding.bytes < TS_MAX_INLINE_TREE_LENGTH && | ||
| 157 | padding.extent.row < 16 && | ||
| 158 | padding.extent.column < TS_MAX_INLINE_TREE_LENGTH && | ||
| 159 | size.extent.row == 0 && | ||
| 160 | size.extent.column < TS_MAX_INLINE_TREE_LENGTH && | ||
| 161 | lookahead_bytes < 16; | ||
| 162 | } | ||
| 163 | |||
| 164 | Subtree ts_subtree_new_leaf( | ||
| 165 | SubtreePool *pool, TSSymbol symbol, Length padding, Length size, | ||
| 166 | uint32_t lookahead_bytes, TSStateId parse_state, | ||
| 167 | bool has_external_tokens, bool depends_on_column, | ||
| 168 | bool is_keyword, const TSLanguage *language | ||
| 169 | ) { | ||
| 170 | TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); | ||
| 171 | bool extra = symbol == ts_builtin_sym_end; | ||
| 172 | |||
| 173 | bool is_inline = ( | ||
| 174 | symbol <= UINT8_MAX && | ||
| 175 | !has_external_tokens && | ||
| 176 | ts_subtree_can_inline(padding, size, lookahead_bytes) | ||
| 177 | ); | ||
| 178 | |||
| 179 | if (is_inline) { | ||
| 180 | return (Subtree) {{ | ||
| 181 | .parse_state = parse_state, | ||
| 182 | .symbol = symbol, | ||
| 183 | .padding_bytes = padding.bytes, | ||
| 184 | .padding_rows = padding.extent.row, | ||
| 185 | .padding_columns = padding.extent.column, | ||
| 186 | .size_bytes = size.bytes, | ||
| 187 | .lookahead_bytes = lookahead_bytes, | ||
| 188 | .visible = metadata.visible, | ||
| 189 | .named = metadata.named, | ||
| 190 | .extra = extra, | ||
| 191 | .has_changes = false, | ||
| 192 | .is_missing = false, | ||
| 193 | .is_keyword = is_keyword, | ||
| 194 | .is_inline = true, | ||
| 195 | }}; | ||
| 196 | } else { | ||
| 197 | SubtreeHeapData *data = ts_subtree_pool_allocate(pool); | ||
| 198 | *data = (SubtreeHeapData) { | ||
| 199 | .ref_count = 1, | ||
| 200 | .padding = padding, | ||
| 201 | .size = size, | ||
| 202 | .lookahead_bytes = lookahead_bytes, | ||
| 203 | .error_cost = 0, | ||
| 204 | .child_count = 0, | ||
| 205 | .symbol = symbol, | ||
| 206 | .parse_state = parse_state, | ||
| 207 | .visible = metadata.visible, | ||
| 208 | .named = metadata.named, | ||
| 209 | .extra = extra, | ||
| 210 | .fragile_left = false, | ||
| 211 | .fragile_right = false, | ||
| 212 | .has_changes = false, | ||
| 213 | .has_external_tokens = has_external_tokens, | ||
| 214 | .has_external_scanner_state_change = false, | ||
| 215 | .depends_on_column = depends_on_column, | ||
| 216 | .is_missing = false, | ||
| 217 | .is_keyword = is_keyword, | ||
| 218 | {{.first_leaf = {.symbol = 0, .parse_state = 0}}} | ||
| 219 | }; | ||
| 220 | return (Subtree) {.ptr = data}; | ||
| 221 | } | ||
| 222 | } | ||
| 223 | |||
| 224 | void ts_subtree_set_symbol( | ||
| 225 | MutableSubtree *self, | ||
| 226 | TSSymbol symbol, | ||
| 227 | const TSLanguage *language | ||
| 228 | ) { | ||
| 229 | TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); | ||
| 230 | if (self->data.is_inline) { | ||
| 231 | assert(symbol < UINT8_MAX); | ||
| 232 | self->data.symbol = symbol; | ||
| 233 | self->data.named = metadata.named; | ||
| 234 | self->data.visible = metadata.visible; | ||
| 235 | } else { | ||
| 236 | self->ptr->symbol = symbol; | ||
| 237 | self->ptr->named = metadata.named; | ||
| 238 | self->ptr->visible = metadata.visible; | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | Subtree ts_subtree_new_error( | ||
| 243 | SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, | ||
| 244 | uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language | ||
| 245 | ) { | ||
| 246 | Subtree result = ts_subtree_new_leaf( | ||
| 247 | pool, ts_builtin_sym_error, padding, size, bytes_scanned, | ||
| 248 | parse_state, false, false, false, language | ||
| 249 | ); | ||
| 250 | SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; | ||
| 251 | data->fragile_left = true; | ||
| 252 | data->fragile_right = true; | ||
| 253 | data->lookahead_char = lookahead_char; | ||
| 254 | return result; | ||
| 255 | } | ||
| 256 | |||
| 257 | // Clone a subtree. | ||
| 258 | MutableSubtree ts_subtree_clone(Subtree self) { | ||
| 259 | size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); | ||
| 260 | Subtree *new_children = ts_malloc(alloc_size); | ||
| 261 | Subtree *old_children = ts_subtree_children(self); | ||
| 262 | memcpy(new_children, old_children, alloc_size); | ||
| 263 | SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count]; | ||
| 264 | if (self.ptr->child_count > 0) { | ||
| 265 | for (uint32_t i = 0; i < self.ptr->child_count; i++) { | ||
| 266 | ts_subtree_retain(new_children[i]); | ||
| 267 | } | ||
| 268 | } else if (self.ptr->has_external_tokens) { | ||
| 269 | result->external_scanner_state = ts_external_scanner_state_copy( | ||
| 270 | &self.ptr->external_scanner_state | ||
| 271 | ); | ||
| 272 | } | ||
| 273 | result->ref_count = 1; | ||
| 274 | return (MutableSubtree) {.ptr = result}; | ||
| 275 | } | ||
| 276 | |||
| 277 | // Get mutable version of a subtree. | ||
| 278 | // | ||
| 279 | // This takes ownership of the subtree. If the subtree has only one owner, | ||
| 280 | // this will directly convert it into a mutable version. Otherwise, it will | ||
| 281 | // perform a copy. | ||
| 282 | MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { | ||
| 283 | if (self.data.is_inline) return (MutableSubtree) {self.data}; | ||
| 284 | if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); | ||
| 285 | MutableSubtree result = ts_subtree_clone(self); | ||
| 286 | ts_subtree_release(pool, self); | ||
| 287 | return result; | ||
| 288 | } | ||
| 289 | |||
| 290 | static void ts_subtree__compress( | ||
| 291 | MutableSubtree self, | ||
| 292 | unsigned count, | ||
| 293 | const TSLanguage *language, | ||
| 294 | MutableSubtreeArray *stack | ||
| 295 | ) { | ||
| 296 | unsigned initial_stack_size = stack->size; | ||
| 297 | |||
| 298 | MutableSubtree tree = self; | ||
| 299 | TSSymbol symbol = tree.ptr->symbol; | ||
| 300 | for (unsigned i = 0; i < count; i++) { | ||
| 301 | if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break; | ||
| 302 | |||
| 303 | MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); | ||
| 304 | if ( | ||
| 305 | child.data.is_inline || | ||
| 306 | child.ptr->child_count < 2 || | ||
| 307 | child.ptr->ref_count > 1 || | ||
| 308 | child.ptr->symbol != symbol | ||
| 309 | ) break; | ||
| 310 | |||
| 311 | MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); | ||
| 312 | if ( | ||
| 313 | grandchild.data.is_inline || | ||
| 314 | grandchild.ptr->child_count < 2 || | ||
| 315 | grandchild.ptr->ref_count > 1 || | ||
| 316 | grandchild.ptr->symbol != symbol | ||
| 317 | ) break; | ||
| 318 | |||
| 319 | ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); | ||
| 320 | ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1]; | ||
| 321 | ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child); | ||
| 322 | array_push(stack, tree); | ||
| 323 | tree = grandchild; | ||
| 324 | } | ||
| 325 | |||
| 326 | while (stack->size > initial_stack_size) { | ||
| 327 | tree = array_pop(stack); | ||
| 328 | MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); | ||
| 329 | MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]); | ||
| 330 | ts_subtree_summarize_children(grandchild, language); | ||
| 331 | ts_subtree_summarize_children(child, language); | ||
| 332 | ts_subtree_summarize_children(tree, language); | ||
| 333 | } | ||
| 334 | } | ||
| 335 | |||
| 336 | void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) { | ||
| 337 | array_clear(&pool->tree_stack); | ||
| 338 | |||
| 339 | if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) { | ||
| 340 | array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); | ||
| 341 | } | ||
| 342 | |||
| 343 | while (pool->tree_stack.size > 0) { | ||
| 344 | MutableSubtree tree = array_pop(&pool->tree_stack); | ||
| 345 | |||
| 346 | if (tree.ptr->repeat_depth > 0) { | ||
| 347 | Subtree child1 = ts_subtree_children(tree)[0]; | ||
| 348 | Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1]; | ||
| 349 | long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2); | ||
| 350 | if (repeat_delta > 0) { | ||
| 351 | unsigned n = (unsigned)repeat_delta; | ||
| 352 | for (unsigned i = n / 2; i > 0; i /= 2) { | ||
| 353 | ts_subtree__compress(tree, i, language, &pool->tree_stack); | ||
| 354 | n -= i; | ||
| 355 | } | ||
| 356 | } | ||
| 357 | } | ||
| 358 | |||
| 359 | for (uint32_t i = 0; i < tree.ptr->child_count; i++) { | ||
| 360 | Subtree child = ts_subtree_children(tree)[i]; | ||
| 361 | if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) { | ||
| 362 | array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); | ||
| 363 | } | ||
| 364 | } | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | // Assign all of the node's properties that depend on its children. | ||
| 369 | void ts_subtree_summarize_children( | ||
| 370 | MutableSubtree self, | ||
| 371 | const TSLanguage *language | ||
| 372 | ) { | ||
| 373 | assert(!self.data.is_inline); | ||
| 374 | |||
| 375 | self.ptr->named_child_count = 0; | ||
| 376 | self.ptr->visible_child_count = 0; | ||
| 377 | self.ptr->error_cost = 0; | ||
| 378 | self.ptr->repeat_depth = 0; | ||
| 379 | self.ptr->visible_descendant_count = 0; | ||
| 380 | self.ptr->has_external_tokens = false; | ||
| 381 | self.ptr->depends_on_column = false; | ||
| 382 | self.ptr->has_external_scanner_state_change = false; | ||
| 383 | self.ptr->dynamic_precedence = 0; | ||
| 384 | |||
| 385 | uint32_t structural_index = 0; | ||
| 386 | const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); | ||
| 387 | uint32_t lookahead_end_byte = 0; | ||
| 388 | |||
| 389 | const Subtree *children = ts_subtree_children(self); | ||
| 390 | for (uint32_t i = 0; i < self.ptr->child_count; i++) { | ||
| 391 | Subtree child = children[i]; | ||
| 392 | |||
| 393 | if ( | ||
| 394 | self.ptr->size.extent.row == 0 && | ||
| 395 | ts_subtree_depends_on_column(child) | ||
| 396 | ) { | ||
| 397 | self.ptr->depends_on_column = true; | ||
| 398 | } | ||
| 399 | |||
| 400 | if (ts_subtree_has_external_scanner_state_change(child)) { | ||
| 401 | self.ptr->has_external_scanner_state_change = true; | ||
| 402 | } | ||
| 403 | |||
| 404 | if (i == 0) { | ||
| 405 | self.ptr->padding = ts_subtree_padding(child); | ||
| 406 | self.ptr->size = ts_subtree_size(child); | ||
| 407 | } else { | ||
| 408 | self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child)); | ||
| 409 | } | ||
| 410 | |||
| 411 | uint32_t child_lookahead_end_byte = | ||
| 412 | self.ptr->padding.bytes + | ||
| 413 | self.ptr->size.bytes + | ||
| 414 | ts_subtree_lookahead_bytes(child); | ||
| 415 | if (child_lookahead_end_byte > lookahead_end_byte) { | ||
| 416 | lookahead_end_byte = child_lookahead_end_byte; | ||
| 417 | } | ||
| 418 | |||
| 419 | if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) { | ||
| 420 | self.ptr->error_cost += ts_subtree_error_cost(child); | ||
| 421 | } | ||
| 422 | |||
| 423 | uint32_t grandchild_count = ts_subtree_child_count(child); | ||
| 424 | if ( | ||
| 425 | self.ptr->symbol == ts_builtin_sym_error || | ||
| 426 | self.ptr->symbol == ts_builtin_sym_error_repeat | ||
| 427 | ) { | ||
| 428 | if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { | ||
| 429 | if (ts_subtree_visible(child)) { | ||
| 430 | self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; | ||
| 431 | } else if (grandchild_count > 0) { | ||
| 432 | self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; | ||
| 433 | } | ||
| 434 | } | ||
| 435 | } | ||
| 436 | |||
| 437 | self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); | ||
| 438 | self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child); | ||
| 439 | |||
| 440 | if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) { | ||
| 441 | self.ptr->visible_descendant_count++; | ||
| 442 | self.ptr->visible_child_count++; | ||
| 443 | if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) { | ||
| 444 | self.ptr->named_child_count++; | ||
| 445 | } | ||
| 446 | } else if (ts_subtree_visible(child)) { | ||
| 447 | self.ptr->visible_descendant_count++; | ||
| 448 | self.ptr->visible_child_count++; | ||
| 449 | if (ts_subtree_named(child)) self.ptr->named_child_count++; | ||
| 450 | } else if (grandchild_count > 0) { | ||
| 451 | self.ptr->visible_child_count += child.ptr->visible_child_count; | ||
| 452 | self.ptr->named_child_count += child.ptr->named_child_count; | ||
| 453 | } | ||
| 454 | |||
| 455 | if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true; | ||
| 456 | |||
| 457 | if (ts_subtree_is_error(child)) { | ||
| 458 | self.ptr->fragile_left = self.ptr->fragile_right = true; | ||
| 459 | self.ptr->parse_state = TS_TREE_STATE_NONE; | ||
| 460 | } | ||
| 461 | |||
| 462 | if (!ts_subtree_extra(child)) structural_index++; | ||
| 463 | } | ||
| 464 | |||
| 465 | self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; | ||
| 466 | |||
| 467 | if ( | ||
| 468 | self.ptr->symbol == ts_builtin_sym_error || | ||
| 469 | self.ptr->symbol == ts_builtin_sym_error_repeat | ||
| 470 | ) { | ||
| 471 | self.ptr->error_cost += | ||
| 472 | ERROR_COST_PER_RECOVERY + | ||
| 473 | ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + | ||
| 474 | ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row; | ||
| 475 | } | ||
| 476 | |||
| 477 | if (self.ptr->child_count > 0) { | ||
| 478 | Subtree first_child = children[0]; | ||
| 479 | Subtree last_child = children[self.ptr->child_count - 1]; | ||
| 480 | |||
| 481 | self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child); | ||
| 482 | self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child); | ||
| 483 | |||
| 484 | if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true; | ||
| 485 | if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true; | ||
| 486 | |||
| 487 | if ( | ||
| 488 | self.ptr->child_count >= 2 && | ||
| 489 | !self.ptr->visible && | ||
| 490 | !self.ptr->named && | ||
| 491 | ts_subtree_symbol(first_child) == self.ptr->symbol | ||
| 492 | ) { | ||
| 493 | if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) { | ||
| 494 | self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1; | ||
| 495 | } else { | ||
| 496 | self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1; | ||
| 497 | } | ||
| 498 | } | ||
| 499 | } | ||
| 500 | } | ||
| 501 | |||
| 502 | // Create a new parent node with the given children. | ||
| 503 | // | ||
| 504 | // This takes ownership of the children array. | ||
| 505 | MutableSubtree ts_subtree_new_node( | ||
| 506 | TSSymbol symbol, | ||
| 507 | SubtreeArray *children, | ||
| 508 | unsigned production_id, | ||
| 509 | const TSLanguage *language | ||
| 510 | ) { | ||
| 511 | TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); | ||
| 512 | bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; | ||
| 513 | |||
| 514 | // Allocate the node's data at the end of the array of children. | ||
| 515 | size_t new_byte_size = ts_subtree_alloc_size(children->size); | ||
| 516 | if (children->capacity * sizeof(Subtree) < new_byte_size) { | ||
| 517 | children->contents = ts_realloc(children->contents, new_byte_size); | ||
| 518 | children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); | ||
| 519 | } | ||
| 520 | SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; | ||
| 521 | |||
| 522 | *data = (SubtreeHeapData) { | ||
| 523 | .ref_count = 1, | ||
| 524 | .symbol = symbol, | ||
| 525 | .child_count = children->size, | ||
| 526 | .visible = metadata.visible, | ||
| 527 | .named = metadata.named, | ||
| 528 | .has_changes = false, | ||
| 529 | .has_external_scanner_state_change = false, | ||
| 530 | .fragile_left = fragile, | ||
| 531 | .fragile_right = fragile, | ||
| 532 | .is_keyword = false, | ||
| 533 | {{ | ||
| 534 | .visible_descendant_count = 0, | ||
| 535 | .production_id = production_id, | ||
| 536 | .first_leaf = {.symbol = 0, .parse_state = 0}, | ||
| 537 | }} | ||
| 538 | }; | ||
| 539 | MutableSubtree result = {.ptr = data}; | ||
| 540 | ts_subtree_summarize_children(result, language); | ||
| 541 | return result; | ||
| 542 | } | ||
| 543 | |||
| 544 | // Create a new error node containing the given children. | ||
| 545 | // | ||
| 546 | // This node is treated as 'extra'. Its children are prevented from having | ||
| 547 | // having any effect on the parse state. | ||
| 548 | Subtree ts_subtree_new_error_node( | ||
| 549 | SubtreeArray *children, | ||
| 550 | bool extra, | ||
| 551 | const TSLanguage *language | ||
| 552 | ) { | ||
| 553 | MutableSubtree result = ts_subtree_new_node( | ||
| 554 | ts_builtin_sym_error, children, 0, language | ||
| 555 | ); | ||
| 556 | result.ptr->extra = extra; | ||
| 557 | return ts_subtree_from_mut(result); | ||
| 558 | } | ||
| 559 | |||
| 560 | // Create a new 'missing leaf' node. | ||
| 561 | // | ||
| 562 | // This node is treated as 'extra'. Its children are prevented from having | ||
| 563 | // having any effect on the parse state. | ||
| 564 | Subtree ts_subtree_new_missing_leaf( | ||
| 565 | SubtreePool *pool, | ||
| 566 | TSSymbol symbol, | ||
| 567 | Length padding, | ||
| 568 | uint32_t lookahead_bytes, | ||
| 569 | const TSLanguage *language | ||
| 570 | ) { | ||
| 571 | Subtree result = ts_subtree_new_leaf( | ||
| 572 | pool, symbol, padding, length_zero(), lookahead_bytes, | ||
| 573 | 0, false, false, false, language | ||
| 574 | ); | ||
| 575 | if (result.data.is_inline) { | ||
| 576 | result.data.is_missing = true; | ||
| 577 | } else { | ||
| 578 | ((SubtreeHeapData *)result.ptr)->is_missing = true; | ||
| 579 | } | ||
| 580 | return result; | ||
| 581 | } | ||
| 582 | |||
| 583 | void ts_subtree_retain(Subtree self) { | ||
| 584 | if (self.data.is_inline) return; | ||
| 585 | assert(self.ptr->ref_count > 0); | ||
| 586 | atomic_inc((volatile uint32_t *)&self.ptr->ref_count); | ||
| 587 | assert(self.ptr->ref_count != 0); | ||
| 588 | } | ||
| 589 | |||
| 590 | void ts_subtree_release(SubtreePool *pool, Subtree self) { | ||
| 591 | if (self.data.is_inline) return; | ||
| 592 | array_clear(&pool->tree_stack); | ||
| 593 | |||
| 594 | assert(self.ptr->ref_count > 0); | ||
| 595 | if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) { | ||
| 596 | array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); | ||
| 597 | } | ||
| 598 | |||
| 599 | while (pool->tree_stack.size > 0) { | ||
| 600 | MutableSubtree tree = array_pop(&pool->tree_stack); | ||
| 601 | if (tree.ptr->child_count > 0) { | ||
| 602 | Subtree *children = ts_subtree_children(tree); | ||
| 603 | for (uint32_t i = 0; i < tree.ptr->child_count; i++) { | ||
| 604 | Subtree child = children[i]; | ||
| 605 | if (child.data.is_inline) continue; | ||
| 606 | assert(child.ptr->ref_count > 0); | ||
| 607 | if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) { | ||
| 608 | array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); | ||
| 609 | } | ||
| 610 | } | ||
| 611 | ts_free(children); | ||
| 612 | } else { | ||
| 613 | if (tree.ptr->has_external_tokens) { | ||
| 614 | ts_external_scanner_state_delete(&tree.ptr->external_scanner_state); | ||
| 615 | } | ||
| 616 | ts_subtree_pool_free(pool, tree.ptr); | ||
| 617 | } | ||
| 618 | } | ||
| 619 | } | ||
| 620 | |||
| 621 | int ts_subtree_compare(Subtree left, Subtree right) { | ||
| 622 | if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) return -1; | ||
| 623 | if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) return 1; | ||
| 624 | if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1; | ||
| 625 | if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1; | ||
| 626 | for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) { | ||
| 627 | Subtree left_child = ts_subtree_children(left)[i]; | ||
| 628 | Subtree right_child = ts_subtree_children(right)[i]; | ||
| 629 | switch (ts_subtree_compare(left_child, right_child)) { | ||
| 630 | case -1: return -1; | ||
| 631 | case 1: return 1; | ||
| 632 | default: break; | ||
| 633 | } | ||
| 634 | } | ||
| 635 | return 0; | ||
| 636 | } | ||
| 637 | |||
| 638 | static inline void ts_subtree_set_has_changes(MutableSubtree *self) { | ||
| 639 | if (self->data.is_inline) { | ||
| 640 | self->data.has_changes = true; | ||
| 641 | } else { | ||
| 642 | self->ptr->has_changes = true; | ||
| 643 | } | ||
| 644 | } | ||
| 645 | |||
| 646 | Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) { | ||
| 647 | typedef struct { | ||
| 648 | Subtree *tree; | ||
| 649 | Edit edit; | ||
| 650 | } EditEntry; | ||
| 651 | |||
| 652 | Array(EditEntry) stack = array_new(); | ||
| 653 | array_push(&stack, ((EditEntry) { | ||
| 654 | .tree = &self, | ||
| 655 | .edit = (Edit) { | ||
| 656 | .start = {input_edit->start_byte, input_edit->start_point}, | ||
| 657 | .old_end = {input_edit->old_end_byte, input_edit->old_end_point}, | ||
| 658 | .new_end = {input_edit->new_end_byte, input_edit->new_end_point}, | ||
| 659 | }, | ||
| 660 | })); | ||
| 661 | |||
| 662 | while (stack.size) { | ||
| 663 | EditEntry entry = array_pop(&stack); | ||
| 664 | Edit edit = entry.edit; | ||
| 665 | bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; | ||
| 666 | bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; | ||
| 667 | bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); | ||
| 668 | |||
| 669 | Length size = ts_subtree_size(*entry.tree); | ||
| 670 | Length padding = ts_subtree_padding(*entry.tree); | ||
| 671 | Length total_size = length_add(padding, size); | ||
| 672 | uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); | ||
| 673 | uint32_t end_byte = total_size.bytes + lookahead_bytes; | ||
| 674 | if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue; | ||
| 675 | |||
| 676 | // If the edit is entirely within the space before this subtree, then shift this | ||
| 677 | // subtree over according to the edit without changing its size. | ||
| 678 | if (edit.old_end.bytes <= padding.bytes) { | ||
| 679 | padding = length_add(edit.new_end, length_sub(padding, edit.old_end)); | ||
| 680 | } | ||
| 681 | |||
| 682 | // If the edit starts in the space before this subtree and extends into this subtree, | ||
| 683 | // shrink the subtree's content to compensate for the change in the space before it. | ||
| 684 | else if (edit.start.bytes < padding.bytes) { | ||
| 685 | size = length_saturating_sub(size, length_sub(edit.old_end, padding)); | ||
| 686 | padding = edit.new_end; | ||
| 687 | } | ||
| 688 | |||
| 689 | // If the edit is a pure insertion right at the start of the subtree, | ||
| 690 | // shift the subtree over according to the insertion. | ||
| 691 | else if (edit.start.bytes == padding.bytes && is_pure_insertion) { | ||
| 692 | padding = edit.new_end; | ||
| 693 | } | ||
| 694 | |||
| 695 | // If the edit is within this subtree, resize the subtree to reflect the edit. | ||
| 696 | else if ( | ||
| 697 | edit.start.bytes < total_size.bytes || | ||
| 698 | (edit.start.bytes == total_size.bytes && is_pure_insertion) | ||
| 699 | ) { | ||
| 700 | size = length_add( | ||
| 701 | length_sub(edit.new_end, padding), | ||
| 702 | length_saturating_sub(total_size, edit.old_end) | ||
| 703 | ); | ||
| 704 | } | ||
| 705 | |||
| 706 | MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); | ||
| 707 | |||
| 708 | if (result.data.is_inline) { | ||
| 709 | if (ts_subtree_can_inline(padding, size, lookahead_bytes)) { | ||
| 710 | result.data.padding_bytes = padding.bytes; | ||
| 711 | result.data.padding_rows = padding.extent.row; | ||
| 712 | result.data.padding_columns = padding.extent.column; | ||
| 713 | result.data.size_bytes = size.bytes; | ||
| 714 | } else { | ||
| 715 | SubtreeHeapData *data = ts_subtree_pool_allocate(pool); | ||
| 716 | data->ref_count = 1; | ||
| 717 | data->padding = padding; | ||
| 718 | data->size = size; | ||
| 719 | data->lookahead_bytes = lookahead_bytes; | ||
| 720 | data->error_cost = 0; | ||
| 721 | data->child_count = 0; | ||
| 722 | data->symbol = result.data.symbol; | ||
| 723 | data->parse_state = result.data.parse_state; | ||
| 724 | data->visible = result.data.visible; | ||
| 725 | data->named = result.data.named; | ||
| 726 | data->extra = result.data.extra; | ||
| 727 | data->fragile_left = false; | ||
| 728 | data->fragile_right = false; | ||
| 729 | data->has_changes = false; | ||
| 730 | data->has_external_tokens = false; | ||
| 731 | data->depends_on_column = false; | ||
| 732 | data->is_missing = result.data.is_missing; | ||
| 733 | data->is_keyword = result.data.is_keyword; | ||
| 734 | result.ptr = data; | ||
| 735 | } | ||
| 736 | } else { | ||
| 737 | result.ptr->padding = padding; | ||
| 738 | result.ptr->size = size; | ||
| 739 | } | ||
| 740 | |||
| 741 | ts_subtree_set_has_changes(&result); | ||
| 742 | *entry.tree = ts_subtree_from_mut(result); | ||
| 743 | |||
| 744 | Length child_left, child_right = length_zero(); | ||
| 745 | for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) { | ||
| 746 | Subtree *child = &ts_subtree_children(*entry.tree)[i]; | ||
| 747 | Length child_size = ts_subtree_total_size(*child); | ||
| 748 | child_left = child_right; | ||
| 749 | child_right = length_add(child_left, child_size); | ||
| 750 | |||
| 751 | // If this child ends before the edit, it is not affected. | ||
| 752 | if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue; | ||
| 753 | |||
| 754 | // Keep editing child nodes until a node is reached that starts after the edit. | ||
| 755 | // Also, if this node's validity depends on its column position, then continue | ||
| 756 | // invaliditing child nodes until reaching a line break. | ||
| 757 | if (( | ||
| 758 | (child_left.bytes > edit.old_end.bytes) || | ||
| 759 | (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0) | ||
| 760 | ) && ( | ||
| 761 | !invalidate_first_row || | ||
| 762 | child_left.extent.row > entry.tree->ptr->padding.extent.row | ||
| 763 | )) { | ||
| 764 | break; | ||
| 765 | } | ||
| 766 | |||
| 767 | // Transform edit into the child's coordinate space. | ||
| 768 | Edit child_edit = { | ||
| 769 | .start = length_saturating_sub(edit.start, child_left), | ||
| 770 | .old_end = length_saturating_sub(edit.old_end, child_left), | ||
| 771 | .new_end = length_saturating_sub(edit.new_end, child_left), | ||
| 772 | }; | ||
| 773 | |||
| 774 | // Interpret all inserted text as applying to the *first* child that touches the edit. | ||
| 775 | // Subsequent children are only never have any text inserted into them; they are only | ||
| 776 | // shrunk to compensate for the edit. | ||
| 777 | if ( | ||
| 778 | child_right.bytes > edit.start.bytes || | ||
| 779 | (child_right.bytes == edit.start.bytes && is_pure_insertion) | ||
| 780 | ) { | ||
| 781 | edit.new_end = edit.start; | ||
| 782 | } | ||
| 783 | |||
| 784 | // Children that occur before the edit are not reshaped by the edit. | ||
| 785 | else { | ||
| 786 | child_edit.old_end = child_edit.start; | ||
| 787 | child_edit.new_end = child_edit.start; | ||
| 788 | } | ||
| 789 | |||
| 790 | // Queue processing of this child's subtree. | ||
| 791 | array_push(&stack, ((EditEntry) { | ||
| 792 | .tree = child, | ||
| 793 | .edit = child_edit, | ||
| 794 | })); | ||
| 795 | } | ||
| 796 | } | ||
| 797 | |||
| 798 | array_delete(&stack); | ||
| 799 | return self; | ||
| 800 | } | ||
| 801 | |||
| 802 | Subtree ts_subtree_last_external_token(Subtree tree) { | ||
| 803 | if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE; | ||
| 804 | while (tree.ptr->child_count > 0) { | ||
| 805 | for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) { | ||
| 806 | Subtree child = ts_subtree_children(tree)[i]; | ||
| 807 | if (ts_subtree_has_external_tokens(child)) { | ||
| 808 | tree = child; | ||
| 809 | break; | ||
| 810 | } | ||
| 811 | } | ||
| 812 | } | ||
| 813 | return tree; | ||
| 814 | } | ||
| 815 | |||
| 816 | static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) { | ||
| 817 | if (chr == -1) | ||
| 818 | return snprintf(str, n, "INVALID"); | ||
| 819 | else if (chr == '\0') | ||
| 820 | return snprintf(str, n, "'\\0'"); | ||
| 821 | else if (chr == '\n') | ||
| 822 | return snprintf(str, n, "'\\n'"); | ||
| 823 | else if (chr == '\t') | ||
| 824 | return snprintf(str, n, "'\\t'"); | ||
| 825 | else if (chr == '\r') | ||
| 826 | return snprintf(str, n, "'\\r'"); | ||
| 827 | else if (0 < chr && chr < 128 && isprint(chr)) | ||
| 828 | return snprintf(str, n, "'%c'", chr); | ||
| 829 | else | ||
| 830 | return snprintf(str, n, "%d", chr); | ||
| 831 | } | ||
| 832 | |||
| 833 | static const char *const ROOT_FIELD = "__ROOT__"; | ||
| 834 | |||
| 835 | static size_t ts_subtree__write_to_string( | ||
| 836 | Subtree self, char *string, size_t limit, | ||
| 837 | const TSLanguage *language, bool include_all, | ||
| 838 | TSSymbol alias_symbol, bool alias_is_named, const char *field_name | ||
| 839 | ) { | ||
| 840 | if (!self.ptr) return snprintf(string, limit, "(NULL)"); | ||
| 841 | |||
| 842 | char *cursor = string; | ||
| 843 | char **writer = (limit > 1) ? &cursor : &string; | ||
| 844 | bool is_root = field_name == ROOT_FIELD; | ||
| 845 | bool is_visible = | ||
| 846 | include_all || | ||
| 847 | ts_subtree_missing(self) || | ||
| 848 | ( | ||
| 849 | alias_symbol | ||
| 850 | ? alias_is_named | ||
| 851 | : ts_subtree_visible(self) && ts_subtree_named(self) | ||
| 852 | ); | ||
| 853 | |||
| 854 | if (is_visible) { | ||
| 855 | if (!is_root) { | ||
| 856 | cursor += snprintf(*writer, limit, " "); | ||
| 857 | if (field_name) { | ||
| 858 | cursor += snprintf(*writer, limit, "%s: ", field_name); | ||
| 859 | } | ||
| 860 | } | ||
| 861 | |||
| 862 | if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) { | ||
| 863 | cursor += snprintf(*writer, limit, "(UNEXPECTED "); | ||
| 864 | cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char); | ||
| 865 | } else { | ||
| 866 | TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); | ||
| 867 | const char *symbol_name = ts_language_symbol_name(language, symbol); | ||
| 868 | if (ts_subtree_missing(self)) { | ||
| 869 | cursor += snprintf(*writer, limit, "(MISSING "); | ||
| 870 | if (alias_is_named || ts_subtree_named(self)) { | ||
| 871 | cursor += snprintf(*writer, limit, "%s", symbol_name); | ||
| 872 | } else { | ||
| 873 | cursor += snprintf(*writer, limit, "\"%s\"", symbol_name); | ||
| 874 | } | ||
| 875 | } else { | ||
| 876 | cursor += snprintf(*writer, limit, "(%s", symbol_name); | ||
| 877 | } | ||
| 878 | } | ||
| 879 | } else if (is_root) { | ||
| 880 | TSSymbol symbol = ts_subtree_symbol(self); | ||
| 881 | const char *symbol_name = ts_language_symbol_name(language, symbol); | ||
| 882 | cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name); | ||
| 883 | } | ||
| 884 | |||
| 885 | if (ts_subtree_child_count(self)) { | ||
| 886 | const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); | ||
| 887 | const TSFieldMapEntry *field_map, *field_map_end; | ||
| 888 | ts_language_field_map( | ||
| 889 | language, | ||
| 890 | self.ptr->production_id, | ||
| 891 | &field_map, | ||
| 892 | &field_map_end | ||
| 893 | ); | ||
| 894 | |||
| 895 | uint32_t structural_child_index = 0; | ||
| 896 | for (uint32_t i = 0; i < self.ptr->child_count; i++) { | ||
| 897 | Subtree child = ts_subtree_children(self)[i]; | ||
| 898 | if (ts_subtree_extra(child)) { | ||
| 899 | cursor += ts_subtree__write_to_string( | ||
| 900 | child, *writer, limit, | ||
| 901 | language, include_all, | ||
| 902 | 0, false, NULL | ||
| 903 | ); | ||
| 904 | } else { | ||
| 905 | TSSymbol subtree_alias_symbol = alias_sequence | ||
| 906 | ? alias_sequence[structural_child_index] | ||
| 907 | : 0; | ||
| 908 | bool subtree_alias_is_named = subtree_alias_symbol | ||
| 909 | ? ts_language_symbol_metadata(language, subtree_alias_symbol).named | ||
| 910 | : false; | ||
| 911 | |||
| 912 | const char *child_field_name = is_visible ? NULL : field_name; | ||
| 913 | for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { | ||
| 914 | if (!map->inherited && map->child_index == structural_child_index) { | ||
| 915 | child_field_name = language->field_names[map->field_id]; | ||
| 916 | break; | ||
| 917 | } | ||
| 918 | } | ||
| 919 | |||
| 920 | cursor += ts_subtree__write_to_string( | ||
| 921 | child, *writer, limit, | ||
| 922 | language, include_all, | ||
| 923 | subtree_alias_symbol, subtree_alias_is_named, child_field_name | ||
| 924 | ); | ||
| 925 | structural_child_index++; | ||
| 926 | } | ||
| 927 | } | ||
| 928 | } | ||
| 929 | |||
| 930 | if (is_visible) cursor += snprintf(*writer, limit, ")"); | ||
| 931 | |||
| 932 | return cursor - string; | ||
| 933 | } | ||
| 934 | |||
| 935 | char *ts_subtree_string( | ||
| 936 | Subtree self, | ||
| 937 | const TSLanguage *language, | ||
| 938 | bool include_all | ||
| 939 | ) { | ||
| 940 | char scratch_string[1]; | ||
| 941 | size_t size = ts_subtree__write_to_string( | ||
| 942 | self, scratch_string, 1, | ||
| 943 | language, include_all, | ||
| 944 | 0, false, ROOT_FIELD | ||
| 945 | ) + 1; | ||
| 946 | char *result = ts_malloc(size * sizeof(char)); | ||
| 947 | ts_subtree__write_to_string( | ||
| 948 | self, result, size, | ||
| 949 | language, include_all, | ||
| 950 | 0, false, ROOT_FIELD | ||
| 951 | ); | ||
| 952 | return result; | ||
| 953 | } | ||
| 954 | |||
| 955 | void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, | ||
| 956 | const TSLanguage *language, TSSymbol alias_symbol, | ||
| 957 | FILE *f) { | ||
| 958 | TSSymbol subtree_symbol = ts_subtree_symbol(*self); | ||
| 959 | TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol; | ||
| 960 | uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); | ||
| 961 | fprintf(f, "tree_%p [label=\"", (void *)self); | ||
| 962 | ts_language_write_symbol_as_dot_string(language, f, symbol); | ||
| 963 | fprintf(f, "\""); | ||
| 964 | |||
| 965 | if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext"); | ||
| 966 | if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray"); | ||
| 967 | |||
| 968 | fprintf(f, ", tooltip=\"" | ||
| 969 | "range: %u - %u\n" | ||
| 970 | "state: %d\n" | ||
| 971 | "error-cost: %u\n" | ||
| 972 | "has-changes: %u\n" | ||
| 973 | "depends-on-column: %u\n" | ||
| 974 | "descendant-count: %u\n" | ||
| 975 | "repeat-depth: %u\n" | ||
| 976 | "lookahead-bytes: %u", | ||
| 977 | start_offset, end_offset, | ||
| 978 | ts_subtree_parse_state(*self), | ||
| 979 | ts_subtree_error_cost(*self), | ||
| 980 | ts_subtree_has_changes(*self), | ||
| 981 | ts_subtree_depends_on_column(*self), | ||
| 982 | ts_subtree_visible_descendant_count(*self), | ||
| 983 | ts_subtree_repeat_depth(*self), | ||
| 984 | ts_subtree_lookahead_bytes(*self) | ||
| 985 | ); | ||
| 986 | |||
| 987 | if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0) { | ||
| 988 | fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); | ||
| 989 | } | ||
| 990 | |||
| 991 | fprintf(f, "\"]\n"); | ||
| 992 | |||
| 993 | uint32_t child_start_offset = start_offset; | ||
| 994 | uint32_t child_info_offset = | ||
| 995 | language->max_alias_sequence_length * | ||
| 996 | ts_subtree_production_id(*self); | ||
| 997 | for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { | ||
| 998 | const Subtree *child = &ts_subtree_children(*self)[i]; | ||
| 999 | TSSymbol subtree_alias_symbol = 0; | ||
| 1000 | if (!ts_subtree_extra(*child) && child_info_offset) { | ||
| 1001 | subtree_alias_symbol = language->alias_sequences[child_info_offset]; | ||
| 1002 | child_info_offset++; | ||
| 1003 | } | ||
| 1004 | ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f); | ||
| 1005 | fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i); | ||
| 1006 | child_start_offset += ts_subtree_total_bytes(*child); | ||
| 1007 | } | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) { | ||
| 1011 | fprintf(f, "digraph tree {\n"); | ||
| 1012 | fprintf(f, "edge [arrowhead=none]\n"); | ||
| 1013 | ts_subtree__print_dot_graph(&self, 0, language, 0, f); | ||
| 1014 | fprintf(f, "}\n"); | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) { | ||
| 1018 | static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0}; | ||
| 1019 | if ( | ||
| 1020 | self.ptr && | ||
| 1021 | !self.data.is_inline && | ||
| 1022 | self.ptr->has_external_tokens && | ||
| 1023 | self.ptr->child_count == 0 | ||
| 1024 | ) { | ||
| 1025 | return &self.ptr->external_scanner_state; | ||
| 1026 | } else { | ||
| 1027 | return &empty_state; | ||
| 1028 | } | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) { | ||
| 1032 | const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self); | ||
| 1033 | const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other); | ||
| 1034 | return ts_external_scanner_state_eq( | ||
| 1035 | state_self, | ||
| 1036 | ts_external_scanner_state_data(state_other), | ||
| 1037 | state_other->length | ||
| 1038 | ); | ||
| 1039 | } | ||
diff --git a/vendor/tree-sitter/lib/src/subtree.h b/vendor/tree-sitter/lib/src/subtree.h new file mode 100644 index 0000000..cac657f --- /dev/null +++ b/vendor/tree-sitter/lib/src/subtree.h | |||
| @@ -0,0 +1,382 @@ | |||
| 1 | #ifndef TREE_SITTER_SUBTREE_H_ | ||
| 2 | #define TREE_SITTER_SUBTREE_H_ | ||
| 3 | |||
| 4 | #ifdef __cplusplus | ||
| 5 | extern "C" { | ||
| 6 | #endif | ||
| 7 | |||
| 8 | #include <limits.h> | ||
| 9 | #include <stdbool.h> | ||
| 10 | #include <stdio.h> | ||
| 11 | #include "./length.h" | ||
| 12 | #include "./array.h" | ||
| 13 | #include "./error_costs.h" | ||
| 14 | #include "./host.h" | ||
| 15 | #include "tree_sitter/api.h" | ||
| 16 | #include "tree_sitter/parser.h" | ||
| 17 | |||
| 18 | #define TS_TREE_STATE_NONE USHRT_MAX | ||
| 19 | #define NULL_SUBTREE ((Subtree) {.ptr = NULL}) | ||
| 20 | |||
| 21 | // The serialized state of an external scanner. | ||
| 22 | // | ||
| 23 | // Every time an external token subtree is created after a call to an | ||
| 24 | // external scanner, the scanner's `serialize` function is called to | ||
| 25 | // retrieve a serialized copy of its state. The bytes are then copied | ||
| 26 | // onto the subtree itself so that the scanner's state can later be | ||
| 27 | // restored using its `deserialize` function. | ||
| 28 | // | ||
| 29 | // Small byte arrays are stored inline, and long ones are allocated | ||
| 30 | // separately on the heap. | ||
| 31 | typedef struct { | ||
| 32 | union { | ||
| 33 | char *long_data; | ||
| 34 | char short_data[24]; | ||
| 35 | }; | ||
| 36 | uint32_t length; | ||
| 37 | } ExternalScannerState; | ||
| 38 | |||
| 39 | // A compact representation of a subtree. | ||
| 40 | // | ||
| 41 | // This representation is used for small leaf nodes that are not | ||
| 42 | // errors, and were not created by an external scanner. | ||
| 43 | // | ||
| 44 | // The idea behind the layout of this struct is that the `is_inline` | ||
| 45 | // bit will fall exactly into the same location as the least significant | ||
| 46 | // bit of the pointer in `Subtree` or `MutableSubtree`, respectively. | ||
| 47 | // Because of alignment, for any valid pointer this will be 0, giving | ||
| 48 | // us the opportunity to make use of this bit to signify whether to use | ||
| 49 | // the pointer or the inline struct. | ||
| 50 | typedef struct SubtreeInlineData SubtreeInlineData; | ||
| 51 | |||
| 52 | #define SUBTREE_BITS \ | ||
| 53 | bool visible : 1; \ | ||
| 54 | bool named : 1; \ | ||
| 55 | bool extra : 1; \ | ||
| 56 | bool has_changes : 1; \ | ||
| 57 | bool is_missing : 1; \ | ||
| 58 | bool is_keyword : 1; | ||
| 59 | |||
| 60 | #define SUBTREE_SIZE \ | ||
| 61 | uint8_t padding_columns; \ | ||
| 62 | uint8_t padding_rows : 4; \ | ||
| 63 | uint8_t lookahead_bytes : 4; \ | ||
| 64 | uint8_t padding_bytes; \ | ||
| 65 | uint8_t size_bytes; | ||
| 66 | |||
| 67 | #if TS_BIG_ENDIAN | ||
| 68 | #if TS_PTR_SIZE == 32 | ||
| 69 | |||
| 70 | struct SubtreeInlineData { | ||
| 71 | uint16_t parse_state; | ||
| 72 | uint8_t symbol; | ||
| 73 | SUBTREE_BITS | ||
| 74 | bool unused : 1; | ||
| 75 | bool is_inline : 1; | ||
| 76 | SUBTREE_SIZE | ||
| 77 | }; | ||
| 78 | |||
| 79 | #else | ||
| 80 | |||
| 81 | struct SubtreeInlineData { | ||
| 82 | SUBTREE_SIZE | ||
| 83 | uint16_t parse_state; | ||
| 84 | uint8_t symbol; | ||
| 85 | SUBTREE_BITS | ||
| 86 | bool unused : 1; | ||
| 87 | bool is_inline : 1; | ||
| 88 | }; | ||
| 89 | |||
| 90 | #endif | ||
| 91 | #else | ||
| 92 | |||
| 93 | struct SubtreeInlineData { | ||
| 94 | bool is_inline : 1; | ||
| 95 | SUBTREE_BITS | ||
| 96 | uint8_t symbol; | ||
| 97 | uint16_t parse_state; | ||
| 98 | SUBTREE_SIZE | ||
| 99 | }; | ||
| 100 | |||
| 101 | #endif | ||
| 102 | |||
| 103 | #undef SUBTREE_BITS | ||
| 104 | #undef SUBTREE_SIZE | ||
| 105 | |||
| 106 | // A heap-allocated representation of a subtree. | ||
| 107 | // | ||
| 108 | // This representation is used for parent nodes, external tokens, | ||
| 109 | // errors, and other leaf nodes whose data is too large to fit into | ||
| 110 | // the inline representation. | ||
| 111 | typedef struct { | ||
| 112 | volatile uint32_t ref_count; | ||
| 113 | Length padding; | ||
| 114 | Length size; | ||
| 115 | uint32_t lookahead_bytes; | ||
| 116 | uint32_t error_cost; | ||
| 117 | uint32_t child_count; | ||
| 118 | TSSymbol symbol; | ||
| 119 | TSStateId parse_state; | ||
| 120 | |||
| 121 | bool visible : 1; | ||
| 122 | bool named : 1; | ||
| 123 | bool extra : 1; | ||
| 124 | bool fragile_left : 1; | ||
| 125 | bool fragile_right : 1; | ||
| 126 | bool has_changes : 1; | ||
| 127 | bool has_external_tokens : 1; | ||
| 128 | bool has_external_scanner_state_change : 1; | ||
| 129 | bool depends_on_column: 1; | ||
| 130 | bool is_missing : 1; | ||
| 131 | bool is_keyword : 1; | ||
| 132 | |||
| 133 | union { | ||
| 134 | // Non-terminal subtrees (`child_count > 0`) | ||
| 135 | struct { | ||
| 136 | uint32_t visible_child_count; | ||
| 137 | uint32_t named_child_count; | ||
| 138 | uint32_t visible_descendant_count; | ||
| 139 | int32_t dynamic_precedence; | ||
| 140 | uint16_t repeat_depth; | ||
| 141 | uint16_t production_id; | ||
| 142 | struct { | ||
| 143 | TSSymbol symbol; | ||
| 144 | TSStateId parse_state; | ||
| 145 | } first_leaf; | ||
| 146 | }; | ||
| 147 | |||
| 148 | // External terminal subtrees (`child_count == 0 && has_external_tokens`) | ||
| 149 | ExternalScannerState external_scanner_state; | ||
| 150 | |||
| 151 | // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`) | ||
| 152 | int32_t lookahead_char; | ||
| 153 | }; | ||
| 154 | } SubtreeHeapData; | ||
| 155 | |||
| 156 | // The fundamental building block of a syntax tree. | ||
| 157 | typedef union { | ||
| 158 | SubtreeInlineData data; | ||
| 159 | const SubtreeHeapData *ptr; | ||
| 160 | } Subtree; | ||
| 161 | |||
| 162 | // Like Subtree, but mutable. | ||
| 163 | typedef union { | ||
| 164 | SubtreeInlineData data; | ||
| 165 | SubtreeHeapData *ptr; | ||
| 166 | } MutableSubtree; | ||
| 167 | |||
| 168 | typedef Array(Subtree) SubtreeArray; | ||
| 169 | typedef Array(MutableSubtree) MutableSubtreeArray; | ||
| 170 | |||
| 171 | typedef struct { | ||
| 172 | MutableSubtreeArray free_trees; | ||
| 173 | MutableSubtreeArray tree_stack; | ||
| 174 | } SubtreePool; | ||
| 175 | |||
| 176 | void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned); | ||
| 177 | const char *ts_external_scanner_state_data(const ExternalScannerState *); | ||
| 178 | bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned); | ||
| 179 | void ts_external_scanner_state_delete(ExternalScannerState *self); | ||
| 180 | |||
| 181 | void ts_subtree_array_copy(SubtreeArray, SubtreeArray *); | ||
| 182 | void ts_subtree_array_clear(SubtreePool *, SubtreeArray *); | ||
| 183 | void ts_subtree_array_delete(SubtreePool *, SubtreeArray *); | ||
| 184 | void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *); | ||
| 185 | void ts_subtree_array_reverse(SubtreeArray *); | ||
| 186 | |||
| 187 | SubtreePool ts_subtree_pool_new(uint32_t capacity); | ||
| 188 | void ts_subtree_pool_delete(SubtreePool *); | ||
| 189 | |||
| 190 | Subtree ts_subtree_new_leaf( | ||
| 191 | SubtreePool *, TSSymbol, Length, Length, uint32_t, | ||
| 192 | TSStateId, bool, bool, bool, const TSLanguage * | ||
| 193 | ); | ||
| 194 | Subtree ts_subtree_new_error( | ||
| 195 | SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage * | ||
| 196 | ); | ||
| 197 | MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *); | ||
| 198 | Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *); | ||
| 199 | Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *); | ||
| 200 | MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); | ||
| 201 | void ts_subtree_retain(Subtree); | ||
| 202 | void ts_subtree_release(SubtreePool *, Subtree); | ||
| 203 | int ts_subtree_compare(Subtree, Subtree); | ||
| 204 | void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); | ||
| 205 | void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *); | ||
| 206 | void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); | ||
| 207 | void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *); | ||
| 208 | Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *); | ||
| 209 | char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all); | ||
| 210 | void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); | ||
| 211 | Subtree ts_subtree_last_external_token(Subtree); | ||
| 212 | const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); | ||
| 213 | bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); | ||
| 214 | |||
| 215 | #define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name) | ||
| 216 | |||
| 217 | static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); } | ||
| 218 | static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); } | ||
| 219 | static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); } | ||
| 220 | static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); } | ||
| 221 | static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); } | ||
| 222 | static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); } | ||
| 223 | static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); } | ||
| 224 | static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); } | ||
| 225 | static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); } | ||
| 226 | |||
| 227 | #undef SUBTREE_GET | ||
| 228 | |||
| 229 | // Get the size needed to store a heap-allocated subtree with the given | ||
| 230 | // number of children. | ||
| 231 | static inline size_t ts_subtree_alloc_size(uint32_t child_count) { | ||
| 232 | return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); | ||
| 233 | } | ||
| 234 | |||
| 235 | // Get a subtree's children, which are allocated immediately before the | ||
| 236 | // tree's own heap data. | ||
| 237 | #define ts_subtree_children(self) \ | ||
| 238 | ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) | ||
| 239 | |||
| 240 | static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) { | ||
| 241 | if (self->data.is_inline) { | ||
| 242 | self->data.extra = is_extra; | ||
| 243 | } else { | ||
| 244 | self->ptr->extra = is_extra; | ||
| 245 | } | ||
| 246 | } | ||
| 247 | |||
| 248 | static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) { | ||
| 249 | if (self.data.is_inline) return self.data.symbol; | ||
| 250 | if (self.ptr->child_count == 0) return self.ptr->symbol; | ||
| 251 | return self.ptr->first_leaf.symbol; | ||
| 252 | } | ||
| 253 | |||
| 254 | static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) { | ||
| 255 | if (self.data.is_inline) return self.data.parse_state; | ||
| 256 | if (self.ptr->child_count == 0) return self.ptr->parse_state; | ||
| 257 | return self.ptr->first_leaf.parse_state; | ||
| 258 | } | ||
| 259 | |||
| 260 | static inline Length ts_subtree_padding(Subtree self) { | ||
| 261 | if (self.data.is_inline) { | ||
| 262 | Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}}; | ||
| 263 | return result; | ||
| 264 | } else { | ||
| 265 | return self.ptr->padding; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | |||
| 269 | static inline Length ts_subtree_size(Subtree self) { | ||
| 270 | if (self.data.is_inline) { | ||
| 271 | Length result = {self.data.size_bytes, {0, self.data.size_bytes}}; | ||
| 272 | return result; | ||
| 273 | } else { | ||
| 274 | return self.ptr->size; | ||
| 275 | } | ||
| 276 | } | ||
| 277 | |||
| 278 | static inline Length ts_subtree_total_size(Subtree self) { | ||
| 279 | return length_add(ts_subtree_padding(self), ts_subtree_size(self)); | ||
| 280 | } | ||
| 281 | |||
| 282 | static inline uint32_t ts_subtree_total_bytes(Subtree self) { | ||
| 283 | return ts_subtree_total_size(self).bytes; | ||
| 284 | } | ||
| 285 | |||
| 286 | static inline uint32_t ts_subtree_child_count(Subtree self) { | ||
| 287 | return self.data.is_inline ? 0 : self.ptr->child_count; | ||
| 288 | } | ||
| 289 | |||
| 290 | static inline uint32_t ts_subtree_repeat_depth(Subtree self) { | ||
| 291 | return self.data.is_inline ? 0 : self.ptr->repeat_depth; | ||
| 292 | } | ||
| 293 | |||
| 294 | static inline uint32_t ts_subtree_is_repetition(Subtree self) { | ||
| 295 | return self.data.is_inline | ||
| 296 | ? 0 | ||
| 297 | : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0; | ||
| 298 | } | ||
| 299 | |||
| 300 | static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) { | ||
| 301 | return (self.data.is_inline || self.ptr->child_count == 0) | ||
| 302 | ? 0 | ||
| 303 | : self.ptr->visible_descendant_count; | ||
| 304 | } | ||
| 305 | |||
| 306 | static inline uint32_t ts_subtree_visible_child_count(Subtree self) { | ||
| 307 | if (ts_subtree_child_count(self) > 0) { | ||
| 308 | return self.ptr->visible_child_count; | ||
| 309 | } else { | ||
| 310 | return 0; | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 314 | static inline uint32_t ts_subtree_error_cost(Subtree self) { | ||
| 315 | if (ts_subtree_missing(self)) { | ||
| 316 | return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; | ||
| 317 | } else { | ||
| 318 | return self.data.is_inline ? 0 : self.ptr->error_cost; | ||
| 319 | } | ||
| 320 | } | ||
| 321 | |||
| 322 | static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { | ||
| 323 | return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; | ||
| 324 | } | ||
| 325 | |||
| 326 | static inline uint16_t ts_subtree_production_id(Subtree self) { | ||
| 327 | if (ts_subtree_child_count(self) > 0) { | ||
| 328 | return self.ptr->production_id; | ||
| 329 | } else { | ||
| 330 | return 0; | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 334 | static inline bool ts_subtree_fragile_left(Subtree self) { | ||
| 335 | return self.data.is_inline ? false : self.ptr->fragile_left; | ||
| 336 | } | ||
| 337 | |||
| 338 | static inline bool ts_subtree_fragile_right(Subtree self) { | ||
| 339 | return self.data.is_inline ? false : self.ptr->fragile_right; | ||
| 340 | } | ||
| 341 | |||
| 342 | static inline bool ts_subtree_has_external_tokens(Subtree self) { | ||
| 343 | return self.data.is_inline ? false : self.ptr->has_external_tokens; | ||
| 344 | } | ||
| 345 | |||
| 346 | static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) { | ||
| 347 | return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change; | ||
| 348 | } | ||
| 349 | |||
| 350 | static inline bool ts_subtree_depends_on_column(Subtree self) { | ||
| 351 | return self.data.is_inline ? false : self.ptr->depends_on_column; | ||
| 352 | } | ||
| 353 | |||
| 354 | static inline bool ts_subtree_is_fragile(Subtree self) { | ||
| 355 | return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right); | ||
| 356 | } | ||
| 357 | |||
| 358 | static inline bool ts_subtree_is_error(Subtree self) { | ||
| 359 | return ts_subtree_symbol(self) == ts_builtin_sym_error; | ||
| 360 | } | ||
| 361 | |||
| 362 | static inline bool ts_subtree_is_eof(Subtree self) { | ||
| 363 | return ts_subtree_symbol(self) == ts_builtin_sym_end; | ||
| 364 | } | ||
| 365 | |||
| 366 | static inline Subtree ts_subtree_from_mut(MutableSubtree self) { | ||
| 367 | Subtree result; | ||
| 368 | result.data = self.data; | ||
| 369 | return result; | ||
| 370 | } | ||
| 371 | |||
| 372 | static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) { | ||
| 373 | MutableSubtree result; | ||
| 374 | result.data = self.data; | ||
| 375 | return result; | ||
| 376 | } | ||
| 377 | |||
| 378 | #ifdef __cplusplus | ||
| 379 | } | ||
| 380 | #endif | ||
| 381 | |||
| 382 | #endif // TREE_SITTER_SUBTREE_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/tree.c b/vendor/tree-sitter/lib/src/tree.c new file mode 100644 index 0000000..784c51f --- /dev/null +++ b/vendor/tree-sitter/lib/src/tree.c | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | #include "tree_sitter/api.h" | ||
| 2 | #include "./array.h" | ||
| 3 | #include "./get_changed_ranges.h" | ||
| 4 | #include "./length.h" | ||
| 5 | #include "./subtree.h" | ||
| 6 | #include "./tree_cursor.h" | ||
| 7 | #include "./tree.h" | ||
| 8 | |||
| 9 | TSTree *ts_tree_new( | ||
| 10 | Subtree root, const TSLanguage *language, | ||
| 11 | const TSRange *included_ranges, unsigned included_range_count | ||
| 12 | ) { | ||
| 13 | TSTree *result = ts_malloc(sizeof(TSTree)); | ||
| 14 | result->root = root; | ||
| 15 | result->language = language; | ||
| 16 | result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); | ||
| 17 | memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); | ||
| 18 | result->included_range_count = included_range_count; | ||
| 19 | return result; | ||
| 20 | } | ||
| 21 | |||
| 22 | TSTree *ts_tree_copy(const TSTree *self) { | ||
| 23 | ts_subtree_retain(self->root); | ||
| 24 | return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); | ||
| 25 | } | ||
| 26 | |||
| 27 | void ts_tree_delete(TSTree *self) { | ||
| 28 | if (!self) return; | ||
| 29 | |||
| 30 | SubtreePool pool = ts_subtree_pool_new(0); | ||
| 31 | ts_subtree_release(&pool, self->root); | ||
| 32 | ts_subtree_pool_delete(&pool); | ||
| 33 | ts_free(self->included_ranges); | ||
| 34 | ts_free(self); | ||
| 35 | } | ||
| 36 | |||
| 37 | TSNode ts_tree_root_node(const TSTree *self) { | ||
| 38 | return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); | ||
| 39 | } | ||
| 40 | |||
| 41 | TSNode ts_tree_root_node_with_offset( | ||
| 42 | const TSTree *self, | ||
| 43 | uint32_t offset_bytes, | ||
| 44 | TSPoint offset_extent | ||
| 45 | ) { | ||
| 46 | Length offset = {offset_bytes, offset_extent}; | ||
| 47 | return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); | ||
| 48 | } | ||
| 49 | |||
| 50 | const TSLanguage *ts_tree_language(const TSTree *self) { | ||
| 51 | return self->language; | ||
| 52 | } | ||
| 53 | |||
| 54 | void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { | ||
| 55 | for (unsigned i = 0; i < self->included_range_count; i++) { | ||
| 56 | TSRange *range = &self->included_ranges[i]; | ||
| 57 | if (range->end_byte >= edit->old_end_byte) { | ||
| 58 | if (range->end_byte != UINT32_MAX) { | ||
| 59 | range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); | ||
| 60 | range->end_point = point_add( | ||
| 61 | edit->new_end_point, | ||
| 62 | point_sub(range->end_point, edit->old_end_point) | ||
| 63 | ); | ||
| 64 | if (range->end_byte < edit->new_end_byte) { | ||
| 65 | range->end_byte = UINT32_MAX; | ||
| 66 | range->end_point = POINT_MAX; | ||
| 67 | } | ||
| 68 | } | ||
| 69 | } else if (range->end_byte > edit->start_byte) { | ||
| 70 | range->end_byte = edit->start_byte; | ||
| 71 | range->end_point = edit->start_point; | ||
| 72 | } | ||
| 73 | if (range->start_byte >= edit->old_end_byte) { | ||
| 74 | range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte); | ||
| 75 | range->start_point = point_add( | ||
| 76 | edit->new_end_point, | ||
| 77 | point_sub(range->start_point, edit->old_end_point) | ||
| 78 | ); | ||
| 79 | if (range->start_byte < edit->new_end_byte) { | ||
| 80 | range->start_byte = UINT32_MAX; | ||
| 81 | range->start_point = POINT_MAX; | ||
| 82 | } | ||
| 83 | } else if (range->start_byte > edit->start_byte) { | ||
| 84 | range->start_byte = edit->start_byte; | ||
| 85 | range->start_point = edit->start_point; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | SubtreePool pool = ts_subtree_pool_new(0); | ||
| 90 | self->root = ts_subtree_edit(self->root, edit, &pool); | ||
| 91 | ts_subtree_pool_delete(&pool); | ||
| 92 | } | ||
| 93 | |||
| 94 | TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) { | ||
| 95 | *length = self->included_range_count; | ||
| 96 | TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange)); | ||
| 97 | memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange)); | ||
| 98 | return ranges; | ||
| 99 | } | ||
| 100 | |||
| 101 | TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) { | ||
| 102 | TreeCursor cursor1 = {NULL, array_new()}; | ||
| 103 | TreeCursor cursor2 = {NULL, array_new()}; | ||
| 104 | ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); | ||
| 105 | ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); | ||
| 106 | |||
| 107 | TSRangeArray included_range_differences = array_new(); | ||
| 108 | ts_range_array_get_changed_ranges( | ||
| 109 | old_tree->included_ranges, old_tree->included_range_count, | ||
| 110 | new_tree->included_ranges, new_tree->included_range_count, | ||
| 111 | &included_range_differences | ||
| 112 | ); | ||
| 113 | |||
| 114 | TSRange *result; | ||
| 115 | *length = ts_subtree_get_changed_ranges( | ||
| 116 | &old_tree->root, &new_tree->root, &cursor1, &cursor2, | ||
| 117 | old_tree->language, &included_range_differences, &result | ||
| 118 | ); | ||
| 119 | |||
| 120 | array_delete(&included_range_differences); | ||
| 121 | array_delete(&cursor1.stack); | ||
| 122 | array_delete(&cursor2.stack); | ||
| 123 | return result; | ||
| 124 | } | ||
| 125 | |||
| 126 | #ifdef _WIN32 | ||
| 127 | |||
| 128 | void ts_tree_print_dot_graph(const TSTree *self, int fd) { | ||
| 129 | (void)self; | ||
| 130 | (void)fd; | ||
| 131 | } | ||
| 132 | |||
| 133 | #else | ||
| 134 | |||
| 135 | #include <unistd.h> | ||
| 136 | |||
| 137 | void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) { | ||
| 138 | FILE *file = fdopen(dup(file_descriptor), "a"); | ||
| 139 | ts_subtree_print_dot_graph(self->root, self->language, file); | ||
| 140 | fclose(file); | ||
| 141 | } | ||
| 142 | |||
| 143 | #endif | ||
diff --git a/vendor/tree-sitter/lib/src/tree.h b/vendor/tree-sitter/lib/src/tree.h new file mode 100644 index 0000000..f012f88 --- /dev/null +++ b/vendor/tree-sitter/lib/src/tree.h | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | #ifndef TREE_SITTER_TREE_H_ | ||
| 2 | #define TREE_SITTER_TREE_H_ | ||
| 3 | |||
| 4 | #include "./subtree.h" | ||
| 5 | |||
| 6 | #ifdef __cplusplus | ||
| 7 | extern "C" { | ||
| 8 | #endif | ||
| 9 | |||
| 10 | typedef struct { | ||
| 11 | const Subtree *child; | ||
| 12 | const Subtree *parent; | ||
| 13 | Length position; | ||
| 14 | TSSymbol alias_symbol; | ||
| 15 | } ParentCacheEntry; | ||
| 16 | |||
| 17 | struct TSTree { | ||
| 18 | Subtree root; | ||
| 19 | const TSLanguage *language; | ||
| 20 | TSRange *included_ranges; | ||
| 21 | unsigned included_range_count; | ||
| 22 | }; | ||
| 23 | |||
| 24 | TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned); | ||
| 25 | TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol); | ||
| 26 | |||
| 27 | #ifdef __cplusplus | ||
| 28 | } | ||
| 29 | #endif | ||
| 30 | |||
| 31 | #endif // TREE_SITTER_TREE_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/tree_cursor.c b/vendor/tree-sitter/lib/src/tree_cursor.c new file mode 100644 index 0000000..63d22c8 --- /dev/null +++ b/vendor/tree-sitter/lib/src/tree_cursor.c | |||
| @@ -0,0 +1,712 @@ | |||
| 1 | #include "tree_sitter/api.h" | ||
| 2 | #include "./alloc.h" | ||
| 3 | #include "./tree_cursor.h" | ||
| 4 | #include "./language.h" | ||
| 5 | #include "./tree.h" | ||
| 6 | |||
| 7 | typedef struct { | ||
| 8 | Subtree parent; | ||
| 9 | const TSTree *tree; | ||
| 10 | Length position; | ||
| 11 | uint32_t child_index; | ||
| 12 | uint32_t structural_child_index; | ||
| 13 | uint32_t descendant_index; | ||
| 14 | const TSSymbol *alias_sequence; | ||
| 15 | } CursorChildIterator; | ||
| 16 | |||
| 17 | // CursorChildIterator | ||
| 18 | |||
| 19 | static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) { | ||
| 20 | TreeCursorEntry *entry = &self->stack.contents[index]; | ||
| 21 | if (index == 0 || ts_subtree_visible(*entry->subtree)) { | ||
| 22 | return true; | ||
| 23 | } else if (!ts_subtree_extra(*entry->subtree)) { | ||
| 24 | TreeCursorEntry *parent_entry = &self->stack.contents[index - 1]; | ||
| 25 | return ts_language_alias_at( | ||
| 26 | self->tree->language, | ||
| 27 | parent_entry->subtree->ptr->production_id, | ||
| 28 | entry->structural_child_index | ||
| 29 | ); | ||
| 30 | } else { | ||
| 31 | return false; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { | ||
| 36 | TreeCursorEntry *last_entry = array_back(&self->stack); | ||
| 37 | if (ts_subtree_child_count(*last_entry->subtree) == 0) { | ||
| 38 | return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; | ||
| 39 | } | ||
| 40 | const TSSymbol *alias_sequence = ts_language_alias_sequence( | ||
| 41 | self->tree->language, | ||
| 42 | last_entry->subtree->ptr->production_id | ||
| 43 | ); | ||
| 44 | |||
| 45 | uint32_t descendant_index = last_entry->descendant_index; | ||
| 46 | if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) { | ||
| 47 | descendant_index += 1; | ||
| 48 | } | ||
| 49 | |||
| 50 | return (CursorChildIterator) { | ||
| 51 | .tree = self->tree, | ||
| 52 | .parent = *last_entry->subtree, | ||
| 53 | .position = last_entry->position, | ||
| 54 | .child_index = 0, | ||
| 55 | .structural_child_index = 0, | ||
| 56 | .descendant_index = descendant_index, | ||
| 57 | .alias_sequence = alias_sequence, | ||
| 58 | }; | ||
| 59 | } | ||
| 60 | |||
| 61 | static inline bool ts_tree_cursor_child_iterator_next( | ||
| 62 | CursorChildIterator *self, | ||
| 63 | TreeCursorEntry *result, | ||
| 64 | bool *visible | ||
| 65 | ) { | ||
| 66 | if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; | ||
| 67 | const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; | ||
| 68 | *result = (TreeCursorEntry) { | ||
| 69 | .subtree = child, | ||
| 70 | .position = self->position, | ||
| 71 | .child_index = self->child_index, | ||
| 72 | .structural_child_index = self->structural_child_index, | ||
| 73 | .descendant_index = self->descendant_index, | ||
| 74 | }; | ||
| 75 | *visible = ts_subtree_visible(*child); | ||
| 76 | bool extra = ts_subtree_extra(*child); | ||
| 77 | if (!extra) { | ||
| 78 | if (self->alias_sequence) { | ||
| 79 | *visible |= self->alias_sequence[self->structural_child_index]; | ||
| 80 | } | ||
| 81 | self->structural_child_index++; | ||
| 82 | } | ||
| 83 | |||
| 84 | self->descendant_index += ts_subtree_visible_descendant_count(*child); | ||
| 85 | if (*visible) { | ||
| 86 | self->descendant_index += 1; | ||
| 87 | } | ||
| 88 | |||
| 89 | self->position = length_add(self->position, ts_subtree_size(*child)); | ||
| 90 | self->child_index++; | ||
| 91 | |||
| 92 | if (self->child_index < self->parent.ptr->child_count) { | ||
| 93 | Subtree next_child = ts_subtree_children(self->parent)[self->child_index]; | ||
| 94 | self->position = length_add(self->position, ts_subtree_padding(next_child)); | ||
| 95 | } | ||
| 96 | |||
| 97 | return true; | ||
| 98 | } | ||
| 99 | |||
| 100 | // Return a position that, when `b` is added to it, yields `a`. This | ||
| 101 | // can only be computed if `b` has zero rows. Otherwise, this function | ||
| 102 | // returns `LENGTH_UNDEFINED`, and the caller needs to recompute | ||
| 103 | // the position some other way. | ||
| 104 | static inline Length length_backtrack(Length a, Length b) { | ||
| 105 | if (length_is_undefined(a) || b.extent.row != 0) { | ||
| 106 | return LENGTH_UNDEFINED; | ||
| 107 | } | ||
| 108 | |||
| 109 | Length result; | ||
| 110 | result.bytes = a.bytes - b.bytes; | ||
| 111 | result.extent.row = a.extent.row; | ||
| 112 | result.extent.column = a.extent.column - b.extent.column; | ||
| 113 | return result; | ||
| 114 | } | ||
| 115 | |||
| 116 | static inline bool ts_tree_cursor_child_iterator_previous( | ||
| 117 | CursorChildIterator *self, | ||
| 118 | TreeCursorEntry *result, | ||
| 119 | bool *visible | ||
| 120 | ) { | ||
| 121 | // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into | ||
| 122 | // account unsigned underflow | ||
| 123 | if (!self->parent.ptr || (int8_t)self->child_index == -1) return false; | ||
| 124 | const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; | ||
| 125 | *result = (TreeCursorEntry) { | ||
| 126 | .subtree = child, | ||
| 127 | .position = self->position, | ||
| 128 | .child_index = self->child_index, | ||
| 129 | .structural_child_index = self->structural_child_index, | ||
| 130 | }; | ||
| 131 | *visible = ts_subtree_visible(*child); | ||
| 132 | bool extra = ts_subtree_extra(*child); | ||
| 133 | if (!extra && self->alias_sequence) { | ||
| 134 | *visible |= self->alias_sequence[self->structural_child_index]; | ||
| 135 | self->structural_child_index--; | ||
| 136 | } | ||
| 137 | |||
| 138 | self->position = length_backtrack(self->position, ts_subtree_padding(*child)); | ||
| 139 | self->child_index--; | ||
| 140 | |||
| 141 | // unsigned can underflow so compare it to child_count | ||
| 142 | if (self->child_index < self->parent.ptr->child_count) { | ||
| 143 | Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; | ||
| 144 | Length size = ts_subtree_size(previous_child); | ||
| 145 | self->position = length_backtrack(self->position, size); | ||
| 146 | } | ||
| 147 | |||
| 148 | return true; | ||
| 149 | } | ||
| 150 | |||
| 151 | // TSTreeCursor - lifecycle | ||
| 152 | |||
| 153 | TSTreeCursor ts_tree_cursor_new(TSNode node) { | ||
| 154 | TSTreeCursor self = {NULL, NULL, {0, 0}}; | ||
| 155 | ts_tree_cursor_init((TreeCursor *)&self, node); | ||
| 156 | return self; | ||
| 157 | } | ||
| 158 | |||
| 159 | void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) { | ||
| 160 | ts_tree_cursor_init((TreeCursor *)_self, node); | ||
| 161 | } | ||
| 162 | |||
| 163 | void ts_tree_cursor_init(TreeCursor *self, TSNode node) { | ||
| 164 | self->tree = node.tree; | ||
| 165 | array_clear(&self->stack); | ||
| 166 | array_push(&self->stack, ((TreeCursorEntry) { | ||
| 167 | .subtree = (const Subtree *)node.id, | ||
| 168 | .position = { | ||
| 169 | ts_node_start_byte(node), | ||
| 170 | ts_node_start_point(node) | ||
| 171 | }, | ||
| 172 | .child_index = 0, | ||
| 173 | .structural_child_index = 0, | ||
| 174 | .descendant_index = 0, | ||
| 175 | })); | ||
| 176 | } | ||
| 177 | |||
| 178 | void ts_tree_cursor_delete(TSTreeCursor *_self) { | ||
| 179 | TreeCursor *self = (TreeCursor *)_self; | ||
| 180 | array_delete(&self->stack); | ||
| 181 | } | ||
| 182 | |||
| 183 | // TSTreeCursor - walking the tree | ||
| 184 | |||
| 185 | TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) { | ||
| 186 | TreeCursor *self = (TreeCursor *)_self; | ||
| 187 | bool visible; | ||
| 188 | TreeCursorEntry entry; | ||
| 189 | CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); | ||
| 190 | while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { | ||
| 191 | if (visible) { | ||
| 192 | array_push(&self->stack, entry); | ||
| 193 | return TreeCursorStepVisible; | ||
| 194 | } | ||
| 195 | if (ts_subtree_visible_child_count(*entry.subtree) > 0) { | ||
| 196 | array_push(&self->stack, entry); | ||
| 197 | return TreeCursorStepHidden; | ||
| 198 | } | ||
| 199 | } | ||
| 200 | return TreeCursorStepNone; | ||
| 201 | } | ||
| 202 | |||
| 203 | bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { | ||
| 204 | for (;;) { | ||
| 205 | switch (ts_tree_cursor_goto_first_child_internal(self)) { | ||
| 206 | case TreeCursorStepHidden: | ||
| 207 | continue; | ||
| 208 | case TreeCursorStepVisible: | ||
| 209 | return true; | ||
| 210 | default: | ||
| 211 | return false; | ||
| 212 | } | ||
| 213 | } | ||
| 214 | return false; | ||
| 215 | } | ||
| 216 | |||
| 217 | TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) { | ||
| 218 | TreeCursor *self = (TreeCursor *)_self; | ||
| 219 | bool visible; | ||
| 220 | TreeCursorEntry entry; | ||
| 221 | CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); | ||
| 222 | if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone; | ||
| 223 | |||
| 224 | TreeCursorEntry last_entry; | ||
| 225 | TreeCursorStep last_step = TreeCursorStepNone; | ||
| 226 | while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { | ||
| 227 | if (visible) { | ||
| 228 | last_entry = entry; | ||
| 229 | last_step = TreeCursorStepVisible; | ||
| 230 | } | ||
| 231 | else if (ts_subtree_visible_child_count(*entry.subtree) > 0) { | ||
| 232 | last_entry = entry; | ||
| 233 | last_step = TreeCursorStepHidden; | ||
| 234 | } | ||
| 235 | } | ||
| 236 | if (last_entry.subtree) { | ||
| 237 | array_push(&self->stack, last_entry); | ||
| 238 | return last_step; | ||
| 239 | } | ||
| 240 | |||
| 241 | return TreeCursorStepNone; | ||
| 242 | } | ||
| 243 | |||
| 244 | bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) { | ||
| 245 | for (;;) { | ||
| 246 | switch (ts_tree_cursor_goto_last_child_internal(self)) { | ||
| 247 | case TreeCursorStepHidden: | ||
| 248 | continue; | ||
| 249 | case TreeCursorStepVisible: | ||
| 250 | return true; | ||
| 251 | default: | ||
| 252 | return false; | ||
| 253 | } | ||
| 254 | } | ||
| 255 | return false; | ||
| 256 | } | ||
| 257 | |||
| 258 | static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( | ||
| 259 | TSTreeCursor *_self, | ||
| 260 | uint32_t goal_byte, | ||
| 261 | TSPoint goal_point | ||
| 262 | ) { | ||
| 263 | TreeCursor *self = (TreeCursor *)_self; | ||
| 264 | uint32_t initial_size = self->stack.size; | ||
| 265 | uint32_t visible_child_index = 0; | ||
| 266 | |||
| 267 | bool did_descend; | ||
| 268 | do { | ||
| 269 | did_descend = false; | ||
| 270 | |||
| 271 | bool visible; | ||
| 272 | TreeCursorEntry entry; | ||
| 273 | CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); | ||
| 274 | while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { | ||
| 275 | Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); | ||
| 276 | bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point); | ||
| 277 | uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); | ||
| 278 | if (at_goal) { | ||
| 279 | if (visible) { | ||
| 280 | array_push(&self->stack, entry); | ||
| 281 | return visible_child_index; | ||
| 282 | } | ||
| 283 | if (visible_child_count > 0) { | ||
| 284 | array_push(&self->stack, entry); | ||
| 285 | did_descend = true; | ||
| 286 | break; | ||
| 287 | } | ||
| 288 | } else if (visible) { | ||
| 289 | visible_child_index++; | ||
| 290 | } else { | ||
| 291 | visible_child_index += visible_child_count; | ||
| 292 | } | ||
| 293 | } | ||
| 294 | } while (did_descend); | ||
| 295 | |||
| 296 | self->stack.size = initial_size; | ||
| 297 | return -1; | ||
| 298 | } | ||
| 299 | |||
| 300 | int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) { | ||
| 301 | return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO); | ||
| 302 | } | ||
| 303 | |||
| 304 | int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) { | ||
| 305 | return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); | ||
| 306 | } | ||
| 307 | |||
| 308 | TreeCursorStep ts_tree_cursor_goto_sibling_internal( | ||
| 309 | TSTreeCursor *_self, | ||
| 310 | bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) { | ||
| 311 | TreeCursor *self = (TreeCursor *)_self; | ||
| 312 | uint32_t initial_size = self->stack.size; | ||
| 313 | |||
| 314 | while (self->stack.size > 1) { | ||
| 315 | TreeCursorEntry entry = array_pop(&self->stack); | ||
| 316 | CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); | ||
| 317 | iterator.child_index = entry.child_index; | ||
| 318 | iterator.structural_child_index = entry.structural_child_index; | ||
| 319 | iterator.position = entry.position; | ||
| 320 | iterator.descendant_index = entry.descendant_index; | ||
| 321 | |||
| 322 | bool visible = false; | ||
| 323 | advance(&iterator, &entry, &visible); | ||
| 324 | if (visible && self->stack.size + 1 < initial_size) break; | ||
| 325 | |||
| 326 | while (advance(&iterator, &entry, &visible)) { | ||
| 327 | if (visible) { | ||
| 328 | array_push(&self->stack, entry); | ||
| 329 | return TreeCursorStepVisible; | ||
| 330 | } | ||
| 331 | |||
| 332 | if (ts_subtree_visible_child_count(*entry.subtree)) { | ||
| 333 | array_push(&self->stack, entry); | ||
| 334 | return TreeCursorStepHidden; | ||
| 335 | } | ||
| 336 | } | ||
| 337 | } | ||
| 338 | |||
| 339 | self->stack.size = initial_size; | ||
| 340 | return TreeCursorStepNone; | ||
| 341 | } | ||
| 342 | |||
| 343 | TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { | ||
| 344 | return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); | ||
| 345 | } | ||
| 346 | |||
| 347 | bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { | ||
| 348 | switch (ts_tree_cursor_goto_next_sibling_internal(self)) { | ||
| 349 | case TreeCursorStepHidden: | ||
| 350 | ts_tree_cursor_goto_first_child(self); | ||
| 351 | return true; | ||
| 352 | case TreeCursorStepVisible: | ||
| 353 | return true; | ||
| 354 | default: | ||
| 355 | return false; | ||
| 356 | } | ||
| 357 | } | ||
| 358 | |||
| 359 | TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) { | ||
| 360 | // since subtracting across row loses column information, we may have to | ||
| 361 | // restore it | ||
| 362 | TreeCursor *self = (TreeCursor *)_self; | ||
| 363 | |||
| 364 | // for that, save current position before traversing | ||
| 365 | Length position = array_back(&self->stack)->position; | ||
| 366 | TreeCursorStep step = ts_tree_cursor_goto_sibling_internal( | ||
| 367 | _self, ts_tree_cursor_child_iterator_previous); | ||
| 368 | if (step == TreeCursorStepNone) | ||
| 369 | return step; | ||
| 370 | |||
| 371 | // if length is already valid, there's no need to recompute it | ||
| 372 | if (!length_is_undefined(array_back(&self->stack)->position)) | ||
| 373 | return step; | ||
| 374 | |||
| 375 | // restore position from the parent node | ||
| 376 | const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; | ||
| 377 | position = parent->position; | ||
| 378 | uint32_t child_index = array_back(&self->stack)->child_index; | ||
| 379 | const Subtree *children = ts_subtree_children((*(parent->subtree))); | ||
| 380 | |||
| 381 | if (child_index > 0) { | ||
| 382 | // skip first child padding since its position should match the position of the parent | ||
| 383 | position = length_add(position, ts_subtree_size(children[0])); | ||
| 384 | for (uint32_t i = 1; i < child_index; ++i) { | ||
| 385 | position = length_add(position, ts_subtree_total_size(children[i])); | ||
| 386 | } | ||
| 387 | position = length_add(position, ts_subtree_padding(children[child_index])); | ||
| 388 | } | ||
| 389 | |||
| 390 | array_back(&self->stack)->position = position; | ||
| 391 | |||
| 392 | return step; | ||
| 393 | } | ||
| 394 | |||
| 395 | bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) { | ||
| 396 | switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { | ||
| 397 | case TreeCursorStepHidden: | ||
| 398 | ts_tree_cursor_goto_last_child(self); | ||
| 399 | return true; | ||
| 400 | case TreeCursorStepVisible: | ||
| 401 | return true; | ||
| 402 | default: | ||
| 403 | return false; | ||
| 404 | } | ||
| 405 | } | ||
| 406 | |||
| 407 | bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { | ||
| 408 | TreeCursor *self = (TreeCursor *)_self; | ||
| 409 | for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { | ||
| 410 | if (ts_tree_cursor_is_entry_visible(self, i)) { | ||
| 411 | self->stack.size = i + 1; | ||
| 412 | return true; | ||
| 413 | } | ||
| 414 | } | ||
| 415 | return false; | ||
| 416 | } | ||
| 417 | |||
| 418 | void ts_tree_cursor_goto_descendant( | ||
| 419 | TSTreeCursor *_self, | ||
| 420 | uint32_t goal_descendant_index | ||
| 421 | ) { | ||
| 422 | TreeCursor *self = (TreeCursor *)_self; | ||
| 423 | |||
| 424 | // Ascend to the lowest ancestor that contains the goal node. | ||
| 425 | for (;;) { | ||
| 426 | uint32_t i = self->stack.size - 1; | ||
| 427 | TreeCursorEntry *entry = &self->stack.contents[i]; | ||
| 428 | uint32_t next_descendant_index = | ||
| 429 | entry->descendant_index + | ||
| 430 | (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) + | ||
| 431 | ts_subtree_visible_descendant_count(*entry->subtree); | ||
| 432 | if ( | ||
| 433 | (entry->descendant_index <= goal_descendant_index) && | ||
| 434 | (next_descendant_index > goal_descendant_index) | ||
| 435 | ) { | ||
| 436 | break; | ||
| 437 | } else if (self->stack.size <= 1) { | ||
| 438 | return; | ||
| 439 | } else { | ||
| 440 | self->stack.size--; | ||
| 441 | } | ||
| 442 | } | ||
| 443 | |||
| 444 | // Descend to the goal node. | ||
| 445 | bool did_descend = true; | ||
| 446 | do { | ||
| 447 | did_descend = false; | ||
| 448 | bool visible; | ||
| 449 | TreeCursorEntry entry; | ||
| 450 | CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); | ||
| 451 | if (iterator.descendant_index > goal_descendant_index) { | ||
| 452 | return; | ||
| 453 | } | ||
| 454 | |||
| 455 | while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { | ||
| 456 | if (iterator.descendant_index > goal_descendant_index) { | ||
| 457 | array_push(&self->stack, entry); | ||
| 458 | if (visible && entry.descendant_index == goal_descendant_index) { | ||
| 459 | return; | ||
| 460 | } else { | ||
| 461 | did_descend = true; | ||
| 462 | break; | ||
| 463 | } | ||
| 464 | } | ||
| 465 | } | ||
| 466 | } while (did_descend); | ||
| 467 | } | ||
| 468 | |||
| 469 | uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) { | ||
| 470 | const TreeCursor *self = (const TreeCursor *)_self; | ||
| 471 | TreeCursorEntry *last_entry = array_back(&self->stack); | ||
| 472 | return last_entry->descendant_index; | ||
| 473 | } | ||
| 474 | |||
| 475 | TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { | ||
| 476 | const TreeCursor *self = (const TreeCursor *)_self; | ||
| 477 | TreeCursorEntry *last_entry = array_back(&self->stack); | ||
| 478 | TSSymbol alias_symbol = 0; | ||
| 479 | if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { | ||
| 480 | TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; | ||
| 481 | alias_symbol = ts_language_alias_at( | ||
| 482 | self->tree->language, | ||
| 483 | parent_entry->subtree->ptr->production_id, | ||
| 484 | last_entry->structural_child_index | ||
| 485 | ); | ||
| 486 | } | ||
| 487 | return ts_node_new( | ||
| 488 | self->tree, | ||
| 489 | last_entry->subtree, | ||
| 490 | last_entry->position, | ||
| 491 | alias_symbol | ||
| 492 | ); | ||
| 493 | } | ||
| 494 | |||
| 495 | // Private - Get various facts about the current node that are needed | ||
| 496 | // when executing tree queries. | ||
| 497 | void ts_tree_cursor_current_status( | ||
| 498 | const TSTreeCursor *_self, | ||
| 499 | TSFieldId *field_id, | ||
| 500 | bool *has_later_siblings, | ||
| 501 | bool *has_later_named_siblings, | ||
| 502 | bool *can_have_later_siblings_with_this_field, | ||
| 503 | TSSymbol *supertypes, | ||
| 504 | unsigned *supertype_count | ||
| 505 | ) { | ||
| 506 | const TreeCursor *self = (const TreeCursor *)_self; | ||
| 507 | unsigned max_supertypes = *supertype_count; | ||
| 508 | *field_id = 0; | ||
| 509 | *supertype_count = 0; | ||
| 510 | *has_later_siblings = false; | ||
| 511 | *has_later_named_siblings = false; | ||
| 512 | *can_have_later_siblings_with_this_field = false; | ||
| 513 | |||
| 514 | // Walk up the tree, visiting the current node and its invisible ancestors, | ||
| 515 | // because fields can refer to nodes through invisible *wrapper* nodes, | ||
| 516 | for (unsigned i = self->stack.size - 1; i > 0; i--) { | ||
| 517 | TreeCursorEntry *entry = &self->stack.contents[i]; | ||
| 518 | TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; | ||
| 519 | |||
| 520 | const TSSymbol *alias_sequence = ts_language_alias_sequence( | ||
| 521 | self->tree->language, | ||
| 522 | parent_entry->subtree->ptr->production_id | ||
| 523 | ); | ||
| 524 | |||
| 525 | #define subtree_symbol(subtree, structural_child_index) \ | ||
| 526 | (( \ | ||
| 527 | !ts_subtree_extra(subtree) && \ | ||
| 528 | alias_sequence && \ | ||
| 529 | alias_sequence[structural_child_index] \ | ||
| 530 | ) ? \ | ||
| 531 | alias_sequence[structural_child_index] : \ | ||
| 532 | ts_subtree_symbol(subtree)) | ||
| 533 | |||
| 534 | // Stop walking up when a visible ancestor is found. | ||
| 535 | TSSymbol entry_symbol = subtree_symbol( | ||
| 536 | *entry->subtree, | ||
| 537 | entry->structural_child_index | ||
| 538 | ); | ||
| 539 | TSSymbolMetadata entry_metadata = ts_language_symbol_metadata( | ||
| 540 | self->tree->language, | ||
| 541 | entry_symbol | ||
| 542 | ); | ||
| 543 | if (i != self->stack.size - 1 && entry_metadata.visible) break; | ||
| 544 | |||
| 545 | // Record any supertypes | ||
| 546 | if (entry_metadata.supertype && *supertype_count < max_supertypes) { | ||
| 547 | supertypes[*supertype_count] = entry_symbol; | ||
| 548 | (*supertype_count)++; | ||
| 549 | } | ||
| 550 | |||
| 551 | // Determine if the current node has later siblings. | ||
| 552 | if (!*has_later_siblings) { | ||
| 553 | unsigned sibling_count = parent_entry->subtree->ptr->child_count; | ||
| 554 | unsigned structural_child_index = entry->structural_child_index; | ||
| 555 | if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; | ||
| 556 | for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { | ||
| 557 | Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j]; | ||
| 558 | TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( | ||
| 559 | self->tree->language, | ||
| 560 | subtree_symbol(sibling, structural_child_index) | ||
| 561 | ); | ||
| 562 | if (sibling_metadata.visible) { | ||
| 563 | *has_later_siblings = true; | ||
| 564 | if (*has_later_named_siblings) break; | ||
| 565 | if (sibling_metadata.named) { | ||
| 566 | *has_later_named_siblings = true; | ||
| 567 | break; | ||
| 568 | } | ||
| 569 | } else if (ts_subtree_visible_child_count(sibling) > 0) { | ||
| 570 | *has_later_siblings = true; | ||
| 571 | if (*has_later_named_siblings) break; | ||
| 572 | if (sibling.ptr->named_child_count > 0) { | ||
| 573 | *has_later_named_siblings = true; | ||
| 574 | break; | ||
| 575 | } | ||
| 576 | } | ||
| 577 | if (!ts_subtree_extra(sibling)) structural_child_index++; | ||
| 578 | } | ||
| 579 | } | ||
| 580 | |||
| 581 | #undef subtree_symbol | ||
| 582 | |||
| 583 | if (!ts_subtree_extra(*entry->subtree)) { | ||
| 584 | const TSFieldMapEntry *field_map, *field_map_end; | ||
| 585 | ts_language_field_map( | ||
| 586 | self->tree->language, | ||
| 587 | parent_entry->subtree->ptr->production_id, | ||
| 588 | &field_map, &field_map_end | ||
| 589 | ); | ||
| 590 | |||
| 591 | // Look for a field name associated with the current node. | ||
| 592 | if (!*field_id) { | ||
| 593 | for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { | ||
| 594 | if (!map->inherited && map->child_index == entry->structural_child_index) { | ||
| 595 | *field_id = map->field_id; | ||
| 596 | break; | ||
| 597 | } | ||
| 598 | } | ||
| 599 | } | ||
| 600 | |||
| 601 | // Determine if the current node can have later siblings with the same field name. | ||
| 602 | if (*field_id) { | ||
| 603 | for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { | ||
| 604 | if ( | ||
| 605 | map->field_id == *field_id && | ||
| 606 | map->child_index > entry->structural_child_index | ||
| 607 | ) { | ||
| 608 | *can_have_later_siblings_with_this_field = true; | ||
| 609 | break; | ||
| 610 | } | ||
| 611 | } | ||
| 612 | } | ||
| 613 | } | ||
| 614 | } | ||
| 615 | } | ||
| 616 | |||
| 617 | uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) { | ||
| 618 | const TreeCursor *self = (const TreeCursor *)_self; | ||
| 619 | uint32_t depth = 0; | ||
| 620 | for (unsigned i = 1; i < self->stack.size; i++) { | ||
| 621 | if (ts_tree_cursor_is_entry_visible(self, i)) { | ||
| 622 | depth++; | ||
| 623 | } | ||
| 624 | } | ||
| 625 | return depth; | ||
| 626 | } | ||
| 627 | |||
| 628 | TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) { | ||
| 629 | const TreeCursor *self = (const TreeCursor *)_self; | ||
| 630 | for (int i = (int)self->stack.size - 2; i >= 0; i--) { | ||
| 631 | TreeCursorEntry *entry = &self->stack.contents[i]; | ||
| 632 | bool is_visible = true; | ||
| 633 | TSSymbol alias_symbol = 0; | ||
| 634 | if (i > 0) { | ||
| 635 | TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; | ||
| 636 | alias_symbol = ts_language_alias_at( | ||
| 637 | self->tree->language, | ||
| 638 | parent_entry->subtree->ptr->production_id, | ||
| 639 | entry->structural_child_index | ||
| 640 | ); | ||
| 641 | is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree); | ||
| 642 | } | ||
| 643 | if (is_visible) { | ||
| 644 | return ts_node_new( | ||
| 645 | self->tree, | ||
| 646 | entry->subtree, | ||
| 647 | entry->position, | ||
| 648 | alias_symbol | ||
| 649 | ); | ||
| 650 | } | ||
| 651 | } | ||
| 652 | return ts_node_new(NULL, NULL, length_zero(), 0); | ||
| 653 | } | ||
| 654 | |||
| 655 | TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { | ||
| 656 | const TreeCursor *self = (const TreeCursor *)_self; | ||
| 657 | |||
| 658 | // Walk up the tree, visiting the current node and its invisible ancestors. | ||
| 659 | for (unsigned i = self->stack.size - 1; i > 0; i--) { | ||
| 660 | TreeCursorEntry *entry = &self->stack.contents[i]; | ||
| 661 | TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; | ||
| 662 | |||
| 663 | // Stop walking up when another visible node is found. | ||
| 664 | if ( | ||
| 665 | i != self->stack.size - 1 && | ||
| 666 | ts_tree_cursor_is_entry_visible(self, i) | ||
| 667 | ) break; | ||
| 668 | |||
| 669 | if (ts_subtree_extra(*entry->subtree)) break; | ||
| 670 | |||
| 671 | const TSFieldMapEntry *field_map, *field_map_end; | ||
| 672 | ts_language_field_map( | ||
| 673 | self->tree->language, | ||
| 674 | parent_entry->subtree->ptr->production_id, | ||
| 675 | &field_map, &field_map_end | ||
| 676 | ); | ||
| 677 | for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { | ||
| 678 | if (!map->inherited && map->child_index == entry->structural_child_index) { | ||
| 679 | return map->field_id; | ||
| 680 | } | ||
| 681 | } | ||
| 682 | } | ||
| 683 | return 0; | ||
| 684 | } | ||
| 685 | |||
| 686 | const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { | ||
| 687 | TSFieldId id = ts_tree_cursor_current_field_id(_self); | ||
| 688 | if (id) { | ||
| 689 | const TreeCursor *self = (const TreeCursor *)_self; | ||
| 690 | return self->tree->language->field_names[id]; | ||
| 691 | } else { | ||
| 692 | return NULL; | ||
| 693 | } | ||
| 694 | } | ||
| 695 | |||
| 696 | TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) { | ||
| 697 | const TreeCursor *cursor = (const TreeCursor *)_cursor; | ||
| 698 | TSTreeCursor res = {NULL, NULL, {0, 0}}; | ||
| 699 | TreeCursor *copy = (TreeCursor *)&res; | ||
| 700 | copy->tree = cursor->tree; | ||
| 701 | array_init(©->stack); | ||
| 702 | array_push_all(©->stack, &cursor->stack); | ||
| 703 | return res; | ||
| 704 | } | ||
| 705 | |||
| 706 | void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) { | ||
| 707 | const TreeCursor *cursor = (const TreeCursor *)_src; | ||
| 708 | TreeCursor *copy = (TreeCursor *)_dst; | ||
| 709 | copy->tree = cursor->tree; | ||
| 710 | array_clear(©->stack); | ||
| 711 | array_push_all(©->stack, &cursor->stack); | ||
| 712 | } | ||
diff --git a/vendor/tree-sitter/lib/src/tree_cursor.h b/vendor/tree-sitter/lib/src/tree_cursor.h new file mode 100644 index 0000000..6d4c688 --- /dev/null +++ b/vendor/tree-sitter/lib/src/tree_cursor.h | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | #ifndef TREE_SITTER_TREE_CURSOR_H_ | ||
| 2 | #define TREE_SITTER_TREE_CURSOR_H_ | ||
| 3 | |||
| 4 | #include "./subtree.h" | ||
| 5 | |||
| 6 | typedef struct { | ||
| 7 | const Subtree *subtree; | ||
| 8 | Length position; | ||
| 9 | uint32_t child_index; | ||
| 10 | uint32_t structural_child_index; | ||
| 11 | uint32_t descendant_index; | ||
| 12 | } TreeCursorEntry; | ||
| 13 | |||
| 14 | typedef struct { | ||
| 15 | const TSTree *tree; | ||
| 16 | Array(TreeCursorEntry) stack; | ||
| 17 | } TreeCursor; | ||
| 18 | |||
| 19 | typedef enum { | ||
| 20 | TreeCursorStepNone, | ||
| 21 | TreeCursorStepHidden, | ||
| 22 | TreeCursorStepVisible, | ||
| 23 | } TreeCursorStep; | ||
| 24 | |||
| 25 | void ts_tree_cursor_init(TreeCursor *, TSNode); | ||
| 26 | void ts_tree_cursor_current_status( | ||
| 27 | const TSTreeCursor *, | ||
| 28 | TSFieldId *, | ||
| 29 | bool *, | ||
| 30 | bool *, | ||
| 31 | bool *, | ||
| 32 | TSSymbol *, | ||
| 33 | unsigned * | ||
| 34 | ); | ||
| 35 | |||
| 36 | TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *); | ||
| 37 | TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *); | ||
| 38 | |||
| 39 | static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) { | ||
| 40 | const TreeCursor *self = (const TreeCursor *)_self; | ||
| 41 | TreeCursorEntry *last_entry = array_back(&self->stack); | ||
| 42 | return *last_entry->subtree; | ||
| 43 | } | ||
| 44 | |||
| 45 | TSNode ts_tree_cursor_parent_node(const TSTreeCursor *); | ||
| 46 | |||
| 47 | #endif // TREE_SITTER_TREE_CURSOR_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/unicode.h b/vendor/tree-sitter/lib/src/unicode.h new file mode 100644 index 0000000..0fba56a --- /dev/null +++ b/vendor/tree-sitter/lib/src/unicode.h | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | #ifndef TREE_SITTER_UNICODE_H_ | ||
| 2 | #define TREE_SITTER_UNICODE_H_ | ||
| 3 | |||
| 4 | #ifdef __cplusplus | ||
| 5 | extern "C" { | ||
| 6 | #endif | ||
| 7 | |||
| 8 | #include <limits.h> | ||
| 9 | #include <stdint.h> | ||
| 10 | |||
| 11 | #define U_EXPORT | ||
| 12 | #define U_EXPORT2 | ||
| 13 | #include "unicode/utf8.h" | ||
| 14 | #include "unicode/utf16.h" | ||
| 15 | |||
| 16 | static const int32_t TS_DECODE_ERROR = U_SENTINEL; | ||
| 17 | |||
| 18 | // These functions read one unicode code point from the given string, | ||
| 19 | // returning the number of bytes consumed. | ||
| 20 | typedef uint32_t (*UnicodeDecodeFunction)( | ||
| 21 | const uint8_t *string, | ||
| 22 | uint32_t length, | ||
| 23 | int32_t *code_point | ||
| 24 | ); | ||
| 25 | |||
| 26 | static inline uint32_t ts_decode_utf8( | ||
| 27 | const uint8_t *string, | ||
| 28 | uint32_t length, | ||
| 29 | int32_t *code_point | ||
| 30 | ) { | ||
| 31 | uint32_t i = 0; | ||
| 32 | U8_NEXT(string, i, length, *code_point); | ||
| 33 | return i; | ||
| 34 | } | ||
| 35 | |||
| 36 | static inline uint32_t ts_decode_utf16( | ||
| 37 | const uint8_t *string, | ||
| 38 | uint32_t length, | ||
| 39 | int32_t *code_point | ||
| 40 | ) { | ||
| 41 | uint32_t i = 0; | ||
| 42 | U16_NEXT(((uint16_t *)string), i, length, *code_point); | ||
| 43 | return i * 2; | ||
| 44 | } | ||
| 45 | |||
| 46 | #ifdef __cplusplus | ||
| 47 | } | ||
| 48 | #endif | ||
| 49 | |||
| 50 | #endif // TREE_SITTER_UNICODE_H_ | ||
diff --git a/vendor/tree-sitter/lib/src/unicode/ICU_SHA b/vendor/tree-sitter/lib/src/unicode/ICU_SHA new file mode 100644 index 0000000..3622283 --- /dev/null +++ b/vendor/tree-sitter/lib/src/unicode/ICU_SHA | |||
| @@ -0,0 +1 @@ | |||
| 552b01f61127d30d6589aa4bf99468224979b661 | |||
diff --git a/vendor/tree-sitter/lib/src/unicode/LICENSE b/vendor/tree-sitter/lib/src/unicode/LICENSE new file mode 100644 index 0000000..2e01e36 --- /dev/null +++ b/vendor/tree-sitter/lib/src/unicode/LICENSE | |||
| @@ -0,0 +1,414 @@ | |||
| 1 | COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) | ||
| 2 | |||
| 3 | Copyright © 1991-2019 Unicode, Inc. All rights reserved. | ||
| 4 | Distributed under the Terms of Use in https://www.unicode.org/copyright.html. | ||
| 5 | |||
| 6 | Permission is hereby granted, free of charge, to any person obtaining | ||
| 7 | a copy of the Unicode data files and any associated documentation | ||
| 8 | (the "Data Files") or Unicode software and any associated documentation | ||
| 9 | (the "Software") to deal in the Data Files or Software | ||
| 10 | without restriction, including without limitation the rights to use, | ||
| 11 | copy, modify, merge, publish, distribute, and/or sell copies of | ||
| 12 | the Data Files or Software, and to permit persons to whom the Data Files | ||
| 13 | or Software are furnished to do so, provided that either | ||
| 14 | (a) this copyright and permission notice appear with all copies | ||
| 15 | of the Data Files or Software, or | ||
| 16 | (b) this copyright and permission notice appear in associated | ||
| 17 | Documentation. | ||
| 18 | |||
| 19 | THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF | ||
| 20 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE | ||
| 21 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 22 | NONINFRINGEMENT OF THIRD PARTY RIGHTS. | ||
| 23 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS | ||
| 24 | NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL | ||
| 25 | DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, | ||
| 26 | DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | ||
| 27 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | ||
| 28 | PERFORMANCE OF THE DATA FILES OR SOFTWARE. | ||
| 29 | |||
| 30 | Except as contained in this notice, the name of a copyright holder | ||
| 31 | shall not be used in advertising or otherwise to promote the sale, | ||
| 32 | use or other dealings in these Data Files or Software without prior | ||
| 33 | written authorization of the copyright holder. | ||
| 34 | |||
| 35 | --------------------- | ||
| 36 | |||
| 37 | Third-Party Software Licenses | ||
| 38 | |||
| 39 | This section contains third-party software notices and/or additional | ||
| 40 | terms for licensed third-party software components included within ICU | ||
| 41 | libraries. | ||
| 42 | |||
| 43 | 1. ICU License - ICU 1.8.1 to ICU 57.1 | ||
| 44 | |||
| 45 | COPYRIGHT AND PERMISSION NOTICE | ||
| 46 | |||
| 47 | Copyright (c) 1995-2016 International Business Machines Corporation and others | ||
| 48 | All rights reserved. | ||
| 49 | |||
| 50 | Permission is hereby granted, free of charge, to any person obtaining | ||
| 51 | a copy of this software and associated documentation files (the | ||
| 52 | "Software"), to deal in the Software without restriction, including | ||
| 53 | without limitation the rights to use, copy, modify, merge, publish, | ||
| 54 | distribute, and/or sell copies of the Software, and to permit persons | ||
| 55 | to whom the Software is furnished to do so, provided that the above | ||
| 56 | copyright notice(s) and this permission notice appear in all copies of | ||
| 57 | the Software and that both the above copyright notice(s) and this | ||
| 58 | permission notice appear in supporting documentation. | ||
| 59 | |||
| 60 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
| 61 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
| 62 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT | ||
| 63 | OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR | ||
| 64 | HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY | ||
| 65 | SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER | ||
| 66 | RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF | ||
| 67 | CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | ||
| 68 | CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 69 | |||
| 70 | Except as contained in this notice, the name of a copyright holder | ||
| 71 | shall not be used in advertising or otherwise to promote the sale, use | ||
| 72 | or other dealings in this Software without prior written authorization | ||
| 73 | of the copyright holder. | ||
| 74 | |||
| 75 | All trademarks and registered trademarks mentioned herein are the | ||
| 76 | property of their respective owners. | ||
| 77 | |||
| 78 | 2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt) | ||
| 79 | |||
| 80 | # The Google Chrome software developed by Google is licensed under | ||
| 81 | # the BSD license. Other software included in this distribution is | ||
| 82 | # provided under other licenses, as set forth below. | ||
| 83 | # | ||
| 84 | # The BSD License | ||
| 85 | # http://opensource.org/licenses/bsd-license.php | ||
| 86 | # Copyright (C) 2006-2008, Google Inc. | ||
| 87 | # | ||
| 88 | # All rights reserved. | ||
| 89 | # | ||
| 90 | # Redistribution and use in source and binary forms, with or without | ||
| 91 | # modification, are permitted provided that the following conditions are met: | ||
| 92 | # | ||
| 93 | # Redistributions of source code must retain the above copyright notice, | ||
| 94 | # this list of conditions and the following disclaimer. | ||
| 95 | # Redistributions in binary form must reproduce the above | ||
| 96 | # copyright notice, this list of conditions and the following | ||
| 97 | # disclaimer in the documentation and/or other materials provided with | ||
| 98 | # the distribution. | ||
| 99 | # Neither the name of Google Inc. nor the names of its | ||
| 100 | # contributors may be used to endorse or promote products derived from | ||
| 101 | # this software without specific prior written permission. | ||
| 102 | # | ||
| 103 | # | ||
| 104 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND | ||
| 105 | # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, | ||
| 106 | # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| 107 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 108 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
| 109 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 110 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 111 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
| 112 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| 113 | # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 114 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 115 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 116 | # | ||
| 117 | # | ||
| 118 | # The word list in cjdict.txt are generated by combining three word lists | ||
| 119 | # listed below with further processing for compound word breaking. The | ||
| 120 | # frequency is generated with an iterative training against Google web | ||
| 121 | # corpora. | ||
| 122 | # | ||
| 123 | # * Libtabe (Chinese) | ||
| 124 | # - https://sourceforge.net/project/?group_id=1519 | ||
| 125 | # - Its license terms and conditions are shown below. | ||
| 126 | # | ||
| 127 | # * IPADIC (Japanese) | ||
| 128 | # - http://chasen.aist-nara.ac.jp/chasen/distribution.html | ||
| 129 | # - Its license terms and conditions are shown below. | ||
| 130 | # | ||
| 131 | # ---------COPYING.libtabe ---- BEGIN-------------------- | ||
| 132 | # | ||
| 133 | # /* | ||
| 134 | # * Copyright (c) 1999 TaBE Project. | ||
| 135 | # * Copyright (c) 1999 Pai-Hsiang Hsiao. | ||
| 136 | # * All rights reserved. | ||
| 137 | # * | ||
| 138 | # * Redistribution and use in source and binary forms, with or without | ||
| 139 | # * modification, are permitted provided that the following conditions | ||
| 140 | # * are met: | ||
| 141 | # * | ||
| 142 | # * . Redistributions of source code must retain the above copyright | ||
| 143 | # * notice, this list of conditions and the following disclaimer. | ||
| 144 | # * . Redistributions in binary form must reproduce the above copyright | ||
| 145 | # * notice, this list of conditions and the following disclaimer in | ||
| 146 | # * the documentation and/or other materials provided with the | ||
| 147 | # * distribution. | ||
| 148 | # * . Neither the name of the TaBE Project nor the names of its | ||
| 149 | # * contributors may be used to endorse or promote products derived | ||
| 150 | # * from this software without specific prior written permission. | ||
| 151 | # * | ||
| 152 | # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 153 | # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 154 | # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||
| 155 | # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
| 156 | # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||
| 157 | # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
| 158 | # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 159 | # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 160 | # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
| 161 | # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| 162 | # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
| 163 | # * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 164 | # */ | ||
| 165 | # | ||
| 166 | # /* | ||
| 167 | # * Copyright (c) 1999 Computer Systems and Communication Lab, | ||
| 168 | # * Institute of Information Science, Academia | ||
| 169 | # * Sinica. All rights reserved. | ||
| 170 | # * | ||
| 171 | # * Redistribution and use in source and binary forms, with or without | ||
| 172 | # * modification, are permitted provided that the following conditions | ||
| 173 | # * are met: | ||
| 174 | # * | ||
| 175 | # * . Redistributions of source code must retain the above copyright | ||
| 176 | # * notice, this list of conditions and the following disclaimer. | ||
| 177 | # * . Redistributions in binary form must reproduce the above copyright | ||
| 178 | # * notice, this list of conditions and the following disclaimer in | ||
| 179 | # * the documentation and/or other materials provided with the | ||
| 180 | # * distribution. | ||
| 181 | # * . Neither the name of the Computer Systems and Communication Lab | ||
| 182 | # * nor the names of its contributors may be used to endorse or | ||
| 183 | # * promote products derived from this software without specific | ||
| 184 | # * prior written permission. | ||
| 185 | # * | ||
| 186 | # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 187 | # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 188 | # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||
| 189 | # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
| 190 | # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||
| 191 | # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
| 192 | # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 193 | # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 194 | # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
| 195 | # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| 196 | # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
| 197 | # * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 198 | # */ | ||
| 199 | # | ||
| 200 | # Copyright 1996 Chih-Hao Tsai @ Beckman Institute, | ||
| 201 | # University of Illinois | ||
| 202 | # c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4 | ||
| 203 | # | ||
| 204 | # ---------------COPYING.libtabe-----END-------------------------------- | ||
| 205 | # | ||
| 206 | # | ||
| 207 | # ---------------COPYING.ipadic-----BEGIN------------------------------- | ||
| 208 | # | ||
| 209 | # Copyright 2000, 2001, 2002, 2003 Nara Institute of Science | ||
| 210 | # and Technology. All Rights Reserved. | ||
| 211 | # | ||
| 212 | # Use, reproduction, and distribution of this software is permitted. | ||
| 213 | # Any copy of this software, whether in its original form or modified, | ||
| 214 | # must include both the above copyright notice and the following | ||
| 215 | # paragraphs. | ||
| 216 | # | ||
| 217 | # Nara Institute of Science and Technology (NAIST), | ||
| 218 | # the copyright holders, disclaims all warranties with regard to this | ||
| 219 | # software, including all implied warranties of merchantability and | ||
| 220 | # fitness, in no event shall NAIST be liable for | ||
| 221 | # any special, indirect or consequential damages or any damages | ||
| 222 | # whatsoever resulting from loss of use, data or profits, whether in an | ||
| 223 | # action of contract, negligence or other tortuous action, arising out | ||
| 224 | # of or in connection with the use or performance of this software. | ||
| 225 | # | ||
| 226 | # A large portion of the dictionary entries | ||
| 227 | # originate from ICOT Free Software. The following conditions for ICOT | ||
| 228 | # Free Software applies to the current dictionary as well. | ||
| 229 | # | ||
| 230 | # Each User may also freely distribute the Program, whether in its | ||
| 231 | # original form or modified, to any third party or parties, PROVIDED | ||
| 232 | # that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear | ||
| 233 | # on, or be attached to, the Program, which is distributed substantially | ||
| 234 | # in the same form as set out herein and that such intended | ||
| 235 | # distribution, if actually made, will neither violate or otherwise | ||
| 236 | # contravene any of the laws and regulations of the countries having | ||
| 237 | # jurisdiction over the User or the intended distribution itself. | ||
| 238 | # | ||
| 239 | # NO WARRANTY | ||
| 240 | # | ||
| 241 | # The program was produced on an experimental basis in the course of the | ||
| 242 | # research and development conducted during the project and is provided | ||
| 243 | # to users as so produced on an experimental basis. Accordingly, the | ||
| 244 | # program is provided without any warranty whatsoever, whether express, | ||
| 245 | # implied, statutory or otherwise. The term "warranty" used herein | ||
| 246 | # includes, but is not limited to, any warranty of the quality, | ||
| 247 | # performance, merchantability and fitness for a particular purpose of | ||
| 248 | # the program and the nonexistence of any infringement or violation of | ||
| 249 | # any right of any third party. | ||
| 250 | # | ||
| 251 | # Each user of the program will agree and understand, and be deemed to | ||
| 252 | # have agreed and understood, that there is no warranty whatsoever for | ||
| 253 | # the program and, accordingly, the entire risk arising from or | ||
| 254 | # otherwise connected with the program is assumed by the user. | ||
| 255 | # | ||
| 256 | # Therefore, neither ICOT, the copyright holder, or any other | ||
| 257 | # organization that participated in or was otherwise related to the | ||
| 258 | # development of the program and their respective officials, directors, | ||
| 259 | # officers and other employees shall be held liable for any and all | ||
| 260 | # damages, including, without limitation, general, special, incidental | ||
| 261 | # and consequential damages, arising out of or otherwise in connection | ||
| 262 | # with the use or inability to use the program or any product, material | ||
| 263 | # or result produced or otherwise obtained by using the program, | ||
| 264 | # regardless of whether they have been advised of, or otherwise had | ||
| 265 | # knowledge of, the possibility of such damages at any time during the | ||
| 266 | # project or thereafter. Each user will be deemed to have agreed to the | ||
| 267 | # foregoing by his or her commencement of use of the program. The term | ||
| 268 | # "use" as used herein includes, but is not limited to, the use, | ||
| 269 | # modification, copying and distribution of the program and the | ||
| 270 | # production of secondary products from the program. | ||
| 271 | # | ||
| 272 | # In the case where the program, whether in its original form or | ||
| 273 | # modified, was distributed or delivered to or received by a user from | ||
| 274 | # any person, organization or entity other than ICOT, unless it makes or | ||
| 275 | # grants independently of ICOT any specific warranty to the user in | ||
| 276 | # writing, such person, organization or entity, will also be exempted | ||
| 277 | # from and not be held liable to the user for any such damages as noted | ||
| 278 | # above as far as the program is concerned. | ||
| 279 | # | ||
| 280 | # ---------------COPYING.ipadic-----END---------------------------------- | ||
| 281 | |||
| 282 | 3. Lao Word Break Dictionary Data (laodict.txt) | ||
| 283 | |||
| 284 | # Copyright (c) 2013 International Business Machines Corporation | ||
| 285 | # and others. All Rights Reserved. | ||
| 286 | # | ||
| 287 | # Project: http://code.google.com/p/lao-dictionary/ | ||
| 288 | # Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt | ||
| 289 | # License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt | ||
| 290 | # (copied below) | ||
| 291 | # | ||
| 292 | # This file is derived from the above dictionary, with slight | ||
| 293 | # modifications. | ||
| 294 | # ---------------------------------------------------------------------- | ||
| 295 | # Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell. | ||
| 296 | # All rights reserved. | ||
| 297 | # | ||
| 298 | # Redistribution and use in source and binary forms, with or without | ||
| 299 | # modification, | ||
| 300 | # are permitted provided that the following conditions are met: | ||
| 301 | # | ||
| 302 | # | ||
| 303 | # Redistributions of source code must retain the above copyright notice, this | ||
| 304 | # list of conditions and the following disclaimer. Redistributions in | ||
| 305 | # binary form must reproduce the above copyright notice, this list of | ||
| 306 | # conditions and the following disclaimer in the documentation and/or | ||
| 307 | # other materials provided with the distribution. | ||
| 308 | # | ||
| 309 | # | ||
| 310 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 311 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 312 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||
| 313 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
| 314 | # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, | ||
| 315 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
| 316 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 317 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 318 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
| 319 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| 320 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
| 321 | # OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 322 | # -------------------------------------------------------------------------- | ||
| 323 | |||
| 324 | 4. Burmese Word Break Dictionary Data (burmesedict.txt) | ||
| 325 | |||
| 326 | # Copyright (c) 2014 International Business Machines Corporation | ||
| 327 | # and others. All Rights Reserved. | ||
| 328 | # | ||
| 329 | # This list is part of a project hosted at: | ||
| 330 | # github.com/kanyawtech/myanmar-karen-word-lists | ||
| 331 | # | ||
| 332 | # -------------------------------------------------------------------------- | ||
| 333 | # Copyright (c) 2013, LeRoy Benjamin Sharon | ||
| 334 | # All rights reserved. | ||
| 335 | # | ||
| 336 | # Redistribution and use in source and binary forms, with or without | ||
| 337 | # modification, are permitted provided that the following conditions | ||
| 338 | # are met: Redistributions of source code must retain the above | ||
| 339 | # copyright notice, this list of conditions and the following | ||
| 340 | # disclaimer. Redistributions in binary form must reproduce the | ||
| 341 | # above copyright notice, this list of conditions and the following | ||
| 342 | # disclaimer in the documentation and/or other materials provided | ||
| 343 | # with the distribution. | ||
| 344 | # | ||
| 345 | # Neither the name Myanmar Karen Word Lists, nor the names of its | ||
| 346 | # contributors may be used to endorse or promote products derived | ||
| 347 | # from this software without specific prior written permission. | ||
| 348 | # | ||
| 349 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND | ||
| 350 | # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, | ||
| 351 | # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| 352 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 353 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS | ||
| 354 | # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
| 355 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | ||
| 356 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 357 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | ||
| 358 | # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR | ||
| 359 | # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF | ||
| 360 | # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| 361 | # SUCH DAMAGE. | ||
| 362 | # -------------------------------------------------------------------------- | ||
| 363 | |||
| 364 | 5. Time Zone Database | ||
| 365 | |||
| 366 | ICU uses the public domain data and code derived from Time Zone | ||
| 367 | Database for its time zone support. The ownership of the TZ database | ||
| 368 | is explained in BCP 175: Procedure for Maintaining the Time Zone | ||
| 369 | Database section 7. | ||
| 370 | |||
| 371 | # 7. Database Ownership | ||
| 372 | # | ||
| 373 | # The TZ database itself is not an IETF Contribution or an IETF | ||
| 374 | # document. Rather it is a pre-existing and regularly updated work | ||
| 375 | # that is in the public domain, and is intended to remain in the | ||
| 376 | # public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do | ||
| 377 | # not apply to the TZ Database or contributions that individuals make | ||
| 378 | # to it. Should any claims be made and substantiated against the TZ | ||
| 379 | # Database, the organization that is providing the IANA | ||
| 380 | # Considerations defined in this RFC, under the memorandum of | ||
| 381 | # understanding with the IETF, currently ICANN, may act in accordance | ||
| 382 | # with all competent court orders. No ownership claims will be made | ||
| 383 | # by ICANN or the IETF Trust on the database or the code. Any person | ||
| 384 | # making a contribution to the database or code waives all rights to | ||
| 385 | # future claims in that contribution or in the TZ Database. | ||
| 386 | |||
| 387 | 6. Google double-conversion | ||
| 388 | |||
| 389 | Copyright 2006-2011, the V8 project authors. All rights reserved. | ||
| 390 | Redistribution and use in source and binary forms, with or without | ||
| 391 | modification, are permitted provided that the following conditions are | ||
| 392 | met: | ||
| 393 | |||
| 394 | * Redistributions of source code must retain the above copyright | ||
| 395 | notice, this list of conditions and the following disclaimer. | ||
| 396 | * Redistributions in binary form must reproduce the above | ||
| 397 | copyright notice, this list of conditions and the following | ||
| 398 | disclaimer in the documentation and/or other materials provided | ||
| 399 | with the distribution. | ||
| 400 | * Neither the name of Google Inc. nor the names of its | ||
| 401 | contributors may be used to endorse or promote products derived | ||
| 402 | from this software without specific prior written permission. | ||
| 403 | |||
| 404 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 405 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 406 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 407 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 408 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 409 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 410 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 411 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 412 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 413 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 414 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
diff --git a/vendor/tree-sitter/lib/src/unicode/README.md b/vendor/tree-sitter/lib/src/unicode/README.md new file mode 100644 index 0000000..623b8e3 --- /dev/null +++ b/vendor/tree-sitter/lib/src/unicode/README.md | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | # ICU Parts | ||
| 2 | |||
| 3 | This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu). | ||
| 4 | |||
| 5 | ### License | ||
| 6 | |||
| 7 | The license for these files is contained in the `LICENSE` file within this directory. | ||
| 8 | |||
| 9 | ### Contents | ||
| 10 | |||
| 11 | * Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory: | ||
| 12 | * `utf8.h` | ||
| 13 | * `utf16.h` | ||
| 14 | * `umachine.h` | ||
| 15 | * Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed: | ||
| 16 | * `ptypes.h` | ||
| 17 | * `urename.h` | ||
| 18 | * `utf.h` | ||
| 19 | * `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained. | ||
| 20 | * `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository. | ||
| 21 | * `README.md` - This text file. | ||
| 22 | |||
| 23 | ### Updating ICU | ||
| 24 | |||
| 25 | To incorporate changes from the upstream `icu` repository: | ||
| 26 | |||
| 27 | * Update `ICU_SHA` with the new Git SHA. | ||
| 28 | * Update `LICENSE` with the license text from the directory mentioned above. | ||
| 29 | * Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository. | ||
diff --git a/vendor/tree-sitter/lib/src/unicode/ptypes.h b/vendor/tree-sitter/lib/src/unicode/ptypes.h new file mode 100644 index 0000000..ac79ad0 --- /dev/null +++ b/vendor/tree-sitter/lib/src/unicode/ptypes.h | |||
| @@ -0,0 +1 @@ | |||
| // This file must exist in order for `utf8.h` and `utf16.h` to be used. | |||
diff --git a/vendor/tree-sitter/lib/src/unicode/umachine.h b/vendor/tree-sitter/lib/src/unicode/umachine.h new file mode 100644 index 0000000..9195824 --- /dev/null +++ b/vendor/tree-sitter/lib/src/unicode/umachine.h | |||
| @@ -0,0 +1,448 @@ | |||
| 1 | // © 2016 and later: Unicode, Inc. and others. | ||
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | ||
| 3 | /* | ||
| 4 | ****************************************************************************** | ||
| 5 | * | ||
| 6 | * Copyright (C) 1999-2015, International Business Machines | ||
| 7 | * Corporation and others. All Rights Reserved. | ||
| 8 | * | ||
| 9 | ****************************************************************************** | ||
| 10 | * file name: umachine.h | ||
| 11 | * encoding: UTF-8 | ||
| 12 | * tab size: 8 (not used) | ||
| 13 | * indentation:4 | ||
| 14 | * | ||
| 15 | * created on: 1999sep13 | ||
| 16 | * created by: Markus W. Scherer | ||
| 17 | * | ||
| 18 | * This file defines basic types and constants for ICU to be | ||
| 19 | * platform-independent. umachine.h and utf.h are included into | ||
| 20 | * utypes.h to provide all the general definitions for ICU. | ||
| 21 | * All of these definitions used to be in utypes.h before | ||
| 22 | * the UTF-handling macros made this unmaintainable. | ||
| 23 | */ | ||
| 24 | |||
| 25 | #ifndef __UMACHINE_H__ | ||
| 26 | #define __UMACHINE_H__ | ||
| 27 | |||
| 28 | |||
| 29 | /** | ||
| 30 | * \file | ||
| 31 | * \brief Basic types and constants for UTF | ||
| 32 | * | ||
| 33 | * <h2> Basic types and constants for UTF </h2> | ||
| 34 | * This file defines basic types and constants for utf.h to be | ||
| 35 | * platform-independent. umachine.h and utf.h are included into | ||
| 36 | * utypes.h to provide all the general definitions for ICU. | ||
| 37 | * All of these definitions used to be in utypes.h before | ||
| 38 | * the UTF-handling macros made this unmaintainable. | ||
| 39 | * | ||
| 40 | */ | ||
| 41 | /*==========================================================================*/ | ||
| 42 | /* Include platform-dependent definitions */ | ||
| 43 | /* which are contained in the platform-specific file platform.h */ | ||
| 44 | /*==========================================================================*/ | ||
| 45 | |||
| 46 | #include "unicode/ptypes.h" /* platform.h is included in ptypes.h */ | ||
| 47 | |||
| 48 | /* | ||
| 49 | * ANSI C headers: | ||
| 50 | * stddef.h defines wchar_t | ||
| 51 | */ | ||
| 52 | #include <stddef.h> | ||
| 53 | |||
| 54 | /*==========================================================================*/ | ||
| 55 | /* For C wrappers, we use the symbol U_STABLE. */ | ||
| 56 | /* This works properly if the includer is C or C++. */ | ||
| 57 | /* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */ | ||
| 58 | /*==========================================================================*/ | ||
| 59 | |||
| 60 | /** | ||
| 61 | * \def U_CFUNC | ||
| 62 | * This is used in a declaration of a library private ICU C function. | ||
| 63 | * @stable ICU 2.4 | ||
| 64 | */ | ||
| 65 | |||
| 66 | /** | ||
| 67 | * \def U_CDECL_BEGIN | ||
| 68 | * This is used to begin a declaration of a library private ICU C API. | ||
| 69 | * @stable ICU 2.4 | ||
| 70 | */ | ||
| 71 | |||
| 72 | /** | ||
| 73 | * \def U_CDECL_END | ||
| 74 | * This is used to end a declaration of a library private ICU C API | ||
| 75 | * @stable ICU 2.4 | ||
| 76 | */ | ||
| 77 | |||
| 78 | #ifdef __cplusplus | ||
| 79 | # define U_CFUNC extern "C" | ||
| 80 | # define U_CDECL_BEGIN extern "C" { | ||
| 81 | # define U_CDECL_END } | ||
| 82 | #else | ||
| 83 | # define U_CFUNC extern | ||
| 84 | # define U_CDECL_BEGIN | ||
| 85 | # define U_CDECL_END | ||
| 86 | #endif | ||
| 87 | |||
| 88 | #ifndef U_ATTRIBUTE_DEPRECATED | ||
| 89 | /** | ||
| 90 | * \def U_ATTRIBUTE_DEPRECATED | ||
| 91 | * This is used for GCC specific attributes | ||
| 92 | * @internal | ||
| 93 | */ | ||
| 94 | #if U_GCC_MAJOR_MINOR >= 302 | ||
| 95 | # define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated)) | ||
| 96 | /** | ||
| 97 | * \def U_ATTRIBUTE_DEPRECATED | ||
| 98 | * This is used for Visual C++ specific attributes | ||
| 99 | * @internal | ||
| 100 | */ | ||
| 101 | #elif defined(_MSC_VER) && (_MSC_VER >= 1400) | ||
| 102 | # define U_ATTRIBUTE_DEPRECATED __declspec(deprecated) | ||
| 103 | #else | ||
| 104 | # define U_ATTRIBUTE_DEPRECATED | ||
| 105 | #endif | ||
| 106 | #endif | ||
| 107 | |||
| 108 | /** This is used to declare a function as a public ICU C API @stable ICU 2.0*/ | ||
| 109 | #define U_CAPI U_CFUNC U_EXPORT | ||
| 110 | /** This is used to declare a function as a stable public ICU C API*/ | ||
| 111 | #define U_STABLE U_CAPI | ||
| 112 | /** This is used to declare a function as a draft public ICU C API */ | ||
| 113 | #define U_DRAFT U_CAPI | ||
| 114 | /** This is used to declare a function as a deprecated public ICU C API */ | ||
| 115 | #define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED | ||
| 116 | /** This is used to declare a function as an obsolete public ICU C API */ | ||
| 117 | #define U_OBSOLETE U_CAPI | ||
| 118 | /** This is used to declare a function as an internal ICU C API */ | ||
| 119 | #define U_INTERNAL U_CAPI | ||
| 120 | |||
| 121 | /** | ||
| 122 | * \def U_OVERRIDE | ||
| 123 | * Defined to the C++11 "override" keyword if available. | ||
| 124 | * Denotes a class or member which is an override of the base class. | ||
| 125 | * May result in an error if it applied to something not an override. | ||
| 126 | * @internal | ||
| 127 | */ | ||
| 128 | #ifndef U_OVERRIDE | ||
| 129 | #define U_OVERRIDE override | ||
| 130 | #endif | ||
| 131 | |||
| 132 | /** | ||
| 133 | * \def U_FINAL | ||
| 134 | * Defined to the C++11 "final" keyword if available. | ||
| 135 | * Denotes a class or member which may not be overridden in subclasses. | ||
| 136 | * May result in an error if subclasses attempt to override. | ||
| 137 | * @internal | ||
| 138 | */ | ||
| 139 | #if !defined(U_FINAL) || defined(U_IN_DOXYGEN) | ||
| 140 | #define U_FINAL final | ||
| 141 | #endif | ||
| 142 | |||
| 143 | // Before ICU 65, function-like, multi-statement ICU macros were just defined as | ||
| 144 | // series of statements wrapped in { } blocks and the caller could choose to | ||
| 145 | // either treat them as if they were actual functions and end the invocation | ||
| 146 | // with a trailing ; creating an empty statement after the block or else omit | ||
| 147 | // this trailing ; using the knowledge that the macro would expand to { }. | ||
| 148 | // | ||
| 149 | // But doing so doesn't work well with macros that look like functions and | ||
| 150 | // compiler warnings about empty statements (ICU-20601) and ICU 65 therefore | ||
| 151 | // switches to the standard solution of wrapping such macros in do { } while. | ||
| 152 | // | ||
| 153 | // This will however break existing code that depends on being able to invoke | ||
| 154 | // these macros without a trailing ; so to be able to remain compatible with | ||
| 155 | // such code the wrapper is itself defined as macros so that it's possible to | ||
| 156 | // build ICU 65 and later with the old macro behaviour, like this: | ||
| 157 | // | ||
| 158 | // CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""' | ||
| 159 | // runConfigureICU ... | ||
| 160 | |||
| 161 | /** | ||
| 162 | * \def UPRV_BLOCK_MACRO_BEGIN | ||
| 163 | * Defined as the "do" keyword by default. | ||
| 164 | * @internal | ||
| 165 | */ | ||
| 166 | #ifndef UPRV_BLOCK_MACRO_BEGIN | ||
| 167 | #define UPRV_BLOCK_MACRO_BEGIN do | ||
| 168 | #endif | ||
| 169 | |||
| 170 | /** | ||
| 171 | * \def UPRV_BLOCK_MACRO_END | ||
| 172 | * Defined as "while (FALSE)" by default. | ||
| 173 | * @internal | ||
| 174 | */ | ||
| 175 | #ifndef UPRV_BLOCK_MACRO_END | ||
| 176 | #define UPRV_BLOCK_MACRO_END while (FALSE) | ||
| 177 | #endif | ||
| 178 | |||
| 179 | /*==========================================================================*/ | ||
| 180 | /* limits for int32_t etc., like in POSIX inttypes.h */ | ||
| 181 | /*==========================================================================*/ | ||
| 182 | |||
| 183 | #ifndef INT8_MIN | ||
| 184 | /** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */ | ||
| 185 | # define INT8_MIN ((int8_t)(-128)) | ||
| 186 | #endif | ||
| 187 | #ifndef INT16_MIN | ||
| 188 | /** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */ | ||
| 189 | # define INT16_MIN ((int16_t)(-32767-1)) | ||
| 190 | #endif | ||
| 191 | #ifndef INT32_MIN | ||
| 192 | /** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */ | ||
| 193 | # define INT32_MIN ((int32_t)(-2147483647-1)) | ||
| 194 | #endif | ||
| 195 | |||
| 196 | #ifndef INT8_MAX | ||
| 197 | /** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */ | ||
| 198 | # define INT8_MAX ((int8_t)(127)) | ||
| 199 | #endif | ||
| 200 | #ifndef INT16_MAX | ||
| 201 | /** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */ | ||
| 202 | # define INT16_MAX ((int16_t)(32767)) | ||
| 203 | #endif | ||
| 204 | #ifndef INT32_MAX | ||
| 205 | /** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */ | ||
| 206 | # define INT32_MAX ((int32_t)(2147483647)) | ||
| 207 | #endif | ||
| 208 | |||
| 209 | #ifndef UINT8_MAX | ||
| 210 | /** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */ | ||
| 211 | # define UINT8_MAX ((uint8_t)(255U)) | ||
| 212 | #endif | ||
| 213 | #ifndef UINT16_MAX | ||
| 214 | /** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */ | ||
| 215 | # define UINT16_MAX ((uint16_t)(65535U)) | ||
| 216 | #endif | ||
| 217 | #ifndef UINT32_MAX | ||
| 218 | /** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */ | ||
| 219 | # define UINT32_MAX ((uint32_t)(4294967295U)) | ||
| 220 | #endif | ||
| 221 | |||
| 222 | #if defined(U_INT64_T_UNAVAILABLE) | ||
| 223 | # error int64_t is required for decimal format and rule-based number format. | ||
| 224 | #else | ||
| 225 | # ifndef INT64_C | ||
| 226 | /** | ||
| 227 | * Provides a platform independent way to specify a signed 64-bit integer constant. | ||
| 228 | * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C | ||
| 229 | * @stable ICU 2.8 | ||
| 230 | */ | ||
| 231 | # define INT64_C(c) c ## LL | ||
| 232 | # endif | ||
| 233 | # ifndef UINT64_C | ||
| 234 | /** | ||
| 235 | * Provides a platform independent way to specify an unsigned 64-bit integer constant. | ||
| 236 | * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C | ||
| 237 | * @stable ICU 2.8 | ||
| 238 | */ | ||
| 239 | # define UINT64_C(c) c ## ULL | ||
| 240 | # endif | ||
| 241 | # ifndef U_INT64_MIN | ||
| 242 | /** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */ | ||
| 243 | # define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1)) | ||
| 244 | # endif | ||
| 245 | # ifndef U_INT64_MAX | ||
| 246 | /** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */ | ||
| 247 | # define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807))) | ||
| 248 | # endif | ||
| 249 | # ifndef U_UINT64_MAX | ||
| 250 | /** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */ | ||
| 251 | # define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615))) | ||
| 252 | # endif | ||
| 253 | #endif | ||
| 254 | |||
| 255 | /*==========================================================================*/ | ||
| 256 | /* Boolean data type */ | ||
| 257 | /*==========================================================================*/ | ||
| 258 | |||
| 259 | /** The ICU boolean type @stable ICU 2.0 */ | ||
| 260 | typedef int8_t UBool; | ||
| 261 | |||
| 262 | #ifndef TRUE | ||
| 263 | /** The TRUE value of a UBool @stable ICU 2.0 */ | ||
| 264 | # define TRUE 1 | ||
| 265 | #endif | ||
| 266 | #ifndef FALSE | ||
| 267 | /** The FALSE value of a UBool @stable ICU 2.0 */ | ||
| 268 | # define FALSE 0 | ||
| 269 | #endif | ||
| 270 | |||
| 271 | |||
| 272 | /*==========================================================================*/ | ||
| 273 | /* Unicode data types */ | ||
| 274 | /*==========================================================================*/ | ||
| 275 | |||
| 276 | /* wchar_t-related definitions -------------------------------------------- */ | ||
| 277 | |||
| 278 | /* | ||
| 279 | * \def U_WCHAR_IS_UTF16 | ||
| 280 | * Defined if wchar_t uses UTF-16. | ||
| 281 | * | ||
| 282 | * @stable ICU 2.0 | ||
| 283 | */ | ||
| 284 | /* | ||
| 285 | * \def U_WCHAR_IS_UTF32 | ||
| 286 | * Defined if wchar_t uses UTF-32. | ||
| 287 | * | ||
| 288 | * @stable ICU 2.0 | ||
| 289 | */ | ||
| 290 | #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) | ||
| 291 | # ifdef __STDC_ISO_10646__ | ||
| 292 | # if (U_SIZEOF_WCHAR_T==2) | ||
| 293 | # define U_WCHAR_IS_UTF16 | ||
| 294 | # elif (U_SIZEOF_WCHAR_T==4) | ||
| 295 | # define U_WCHAR_IS_UTF32 | ||
| 296 | # endif | ||
| 297 | # elif defined __UCS2__ | ||
| 298 | # if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2) | ||
| 299 | # define U_WCHAR_IS_UTF16 | ||
| 300 | # endif | ||
| 301 | # elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__)) | ||
| 302 | # if (U_SIZEOF_WCHAR_T==4) | ||
| 303 | # define U_WCHAR_IS_UTF32 | ||
| 304 | # endif | ||
| 305 | # elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED) | ||
| 306 | # define U_WCHAR_IS_UTF32 | ||
| 307 | # elif U_PLATFORM_HAS_WIN32_API | ||
| 308 | # define U_WCHAR_IS_UTF16 | ||
| 309 | # endif | ||
| 310 | #endif | ||
| 311 | |||
| 312 | /* UChar and UChar32 definitions -------------------------------------------- */ | ||
| 313 | |||
| 314 | /** Number of bytes in a UChar. @stable ICU 2.0 */ | ||
| 315 | #define U_SIZEOF_UCHAR 2 | ||
| 316 | |||
| 317 | /** | ||
| 318 | * \def U_CHAR16_IS_TYPEDEF | ||
| 319 | * If 1, then char16_t is a typedef and not a real type (yet) | ||
| 320 | * @internal | ||
| 321 | */ | ||
| 322 | #if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) | ||
| 323 | // for AIX, uchar.h needs to be included | ||
| 324 | # include <uchar.h> | ||
| 325 | # define U_CHAR16_IS_TYPEDEF 1 | ||
| 326 | #elif defined(_MSC_VER) && (_MSC_VER < 1900) | ||
| 327 | // Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type, | ||
| 328 | // and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx | ||
| 329 | # define U_CHAR16_IS_TYPEDEF 1 | ||
| 330 | #else | ||
| 331 | # define U_CHAR16_IS_TYPEDEF 0 | ||
| 332 | #endif | ||
| 333 | |||
| 334 | |||
| 335 | /** | ||
| 336 | * \var UChar | ||
| 337 | * | ||
| 338 | * The base type for UTF-16 code units and pointers. | ||
| 339 | * Unsigned 16-bit integer. | ||
| 340 | * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar. | ||
| 341 | * | ||
| 342 | * UChar is configurable by defining the macro UCHAR_TYPE | ||
| 343 | * on the preprocessor or compiler command line: | ||
| 344 | * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. | ||
| 345 | * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.) | ||
| 346 | * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16. | ||
| 347 | * | ||
| 348 | * The default is UChar=char16_t. | ||
| 349 | * | ||
| 350 | * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type. | ||
| 351 | * | ||
| 352 | * In C, char16_t is a simple typedef of uint_least16_t. | ||
| 353 | * ICU requires uint_least16_t=uint16_t for data memory mapping. | ||
| 354 | * On macOS, char16_t is not available because the uchar.h standard header is missing. | ||
| 355 | * | ||
| 356 | * @stable ICU 4.4 | ||
| 357 | */ | ||
| 358 | |||
| 359 | #if 1 | ||
| 360 | // #if 1 is normal. UChar defaults to char16_t in C++. | ||
| 361 | // For configuration testing of UChar=uint16_t temporarily change this to #if 0. | ||
| 362 | // The intltest Makefile #defines UCHAR_TYPE=char16_t, | ||
| 363 | // so we only #define it to uint16_t if it is undefined so far. | ||
| 364 | #elif !defined(UCHAR_TYPE) | ||
| 365 | # define UCHAR_TYPE uint16_t | ||
| 366 | #endif | ||
| 367 | |||
| 368 | #if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ | ||
| 369 | defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) | ||
| 370 | // Inside the ICU library code, never configurable. | ||
| 371 | typedef char16_t UChar; | ||
| 372 | #elif defined(UCHAR_TYPE) | ||
| 373 | typedef UCHAR_TYPE UChar; | ||
| 374 | #elif defined(__cplusplus) | ||
| 375 | typedef char16_t UChar; | ||
| 376 | #else | ||
| 377 | typedef uint16_t UChar; | ||
| 378 | #endif | ||
| 379 | |||
| 380 | /** | ||
| 381 | * \var OldUChar | ||
| 382 | * Default ICU 58 definition of UChar. | ||
| 383 | * A base type for UTF-16 code units and pointers. | ||
| 384 | * Unsigned 16-bit integer. | ||
| 385 | * | ||
| 386 | * Define OldUChar to be wchar_t if that is 16 bits wide. | ||
| 387 | * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. | ||
| 388 | * | ||
| 389 | * This makes the definition of OldUChar platform-dependent | ||
| 390 | * but allows direct string type compatibility with platforms with | ||
| 391 | * 16-bit wchar_t types. | ||
| 392 | * | ||
| 393 | * This is how UChar was defined in ICU 58, for transition convenience. | ||
| 394 | * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. | ||
| 395 | * The current UChar responds to UCHAR_TYPE but OldUChar does not. | ||
| 396 | * | ||
| 397 | * @stable ICU 59 | ||
| 398 | */ | ||
| 399 | #if U_SIZEOF_WCHAR_T==2 | ||
| 400 | typedef wchar_t OldUChar; | ||
| 401 | #elif defined(__CHAR16_TYPE__) | ||
| 402 | typedef __CHAR16_TYPE__ OldUChar; | ||
| 403 | #else | ||
| 404 | typedef uint16_t OldUChar; | ||
| 405 | #endif | ||
| 406 | |||
| 407 | /** | ||
| 408 | * Define UChar32 as a type for single Unicode code points. | ||
| 409 | * UChar32 is a signed 32-bit integer (same as int32_t). | ||
| 410 | * | ||
| 411 | * The Unicode code point range is 0..0x10ffff. | ||
| 412 | * All other values (negative or >=0x110000) are illegal as Unicode code points. | ||
| 413 | * They may be used as sentinel values to indicate "done", "error" | ||
| 414 | * or similar non-code point conditions. | ||
| 415 | * | ||
| 416 | * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined | ||
| 417 | * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) | ||
| 418 | * or else to be uint32_t. | ||
| 419 | * That is, the definition of UChar32 was platform-dependent. | ||
| 420 | * | ||
| 421 | * @see U_SENTINEL | ||
| 422 | * @stable ICU 2.4 | ||
| 423 | */ | ||
| 424 | typedef int32_t UChar32; | ||
| 425 | |||
| 426 | /** | ||
| 427 | * This value is intended for sentinel values for APIs that | ||
| 428 | * (take or) return single code points (UChar32). | ||
| 429 | * It is outside of the Unicode code point range 0..0x10ffff. | ||
| 430 | * | ||
| 431 | * For example, a "done" or "error" value in a new API | ||
| 432 | * could be indicated with U_SENTINEL. | ||
| 433 | * | ||
| 434 | * ICU APIs designed before ICU 2.4 usually define service-specific "done" | ||
| 435 | * values, mostly 0xffff. | ||
| 436 | * Those may need to be distinguished from | ||
| 437 | * actual U+ffff text contents by calling functions like | ||
| 438 | * CharacterIterator::hasNext() or UnicodeString::length(). | ||
| 439 | * | ||
| 440 | * @return -1 | ||
| 441 | * @see UChar32 | ||
| 442 | * @stable ICU 2.4 | ||
| 443 | */ | ||
| 444 | #define U_SENTINEL (-1) | ||
| 445 | |||
| 446 | #include "unicode/urename.h" | ||
| 447 | |||
| 448 | #endif | ||
diff --git a/vendor/tree-sitter/lib/src/unicode/urename.h b/vendor/tree-sitter/lib/src/unicode/urename.h new file mode 100644 index 0000000..ac79ad0 --- /dev/null +++ b/vendor/tree-sitter/lib/src/unicode/urename.h | |||
| @@ -0,0 +1 @@ | |||
| // This file must exist in order for `utf8.h` and `utf16.h` to be used. | |||
diff --git a/vendor/tree-sitter/lib/src/unicode/utf.h b/vendor/tree-sitter/lib/src/unicode/utf.h new file mode 100644 index 0000000..ac79ad0 --- /dev/null +++ b/vendor/tree-sitter/lib/src/unicode/utf.h | |||
| @@ -0,0 +1 @@ | |||
| // This file must exist in order for `utf8.h` and `utf16.h` to be used. | |||
diff --git a/vendor/tree-sitter/lib/src/unicode/utf16.h b/vendor/tree-sitter/lib/src/unicode/utf16.h new file mode 100644 index 0000000..9fd7d5c --- /dev/null +++ b/vendor/tree-sitter/lib/src/unicode/utf16.h | |||
| @@ -0,0 +1,733 @@ | |||
| 1 | // © 2016 and later: Unicode, Inc. and others. | ||
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | ||
| 3 | /* | ||
| 4 | ******************************************************************************* | ||
| 5 | * | ||
| 6 | * Copyright (C) 1999-2012, International Business Machines | ||
| 7 | * Corporation and others. All Rights Reserved. | ||
| 8 | * | ||
| 9 | ******************************************************************************* | ||
| 10 | * file name: utf16.h | ||
| 11 | * encoding: UTF-8 | ||
| 12 | * tab size: 8 (not used) | ||
| 13 | * indentation:4 | ||
| 14 | * | ||
| 15 | * created on: 1999sep09 | ||
| 16 | * created by: Markus W. Scherer | ||
| 17 | */ | ||
| 18 | |||
| 19 | /** | ||
| 20 | * \file | ||
| 21 | * \brief C API: 16-bit Unicode handling macros | ||
| 22 | * | ||
| 23 | * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. | ||
| 24 | * | ||
| 25 | * For more information see utf.h and the ICU User Guide Strings chapter | ||
| 26 | * (http://userguide.icu-project.org/strings). | ||
| 27 | * | ||
| 28 | * <em>Usage:</em> | ||
| 29 | * ICU coding guidelines for if() statements should be followed when using these macros. | ||
| 30 | * Compound statements (curly braces {}) must be used for if-else-while... | ||
| 31 | * bodies and all macro statements should be terminated with semicolon. | ||
| 32 | */ | ||
| 33 | |||
| 34 | #ifndef __UTF16_H__ | ||
| 35 | #define __UTF16_H__ | ||
| 36 | |||
| 37 | #include "unicode/umachine.h" | ||
| 38 | #ifndef __UTF_H__ | ||
| 39 | # include "unicode/utf.h" | ||
| 40 | #endif | ||
| 41 | |||
| 42 | /* single-code point definitions -------------------------------------------- */ | ||
| 43 | |||
| 44 | /** | ||
| 45 | * Does this code unit alone encode a code point (BMP, not a surrogate)? | ||
| 46 | * @param c 16-bit code unit | ||
| 47 | * @return TRUE or FALSE | ||
| 48 | * @stable ICU 2.4 | ||
| 49 | */ | ||
| 50 | #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) | ||
| 51 | |||
| 52 | /** | ||
| 53 | * Is this code unit a lead surrogate (U+d800..U+dbff)? | ||
| 54 | * @param c 16-bit code unit | ||
| 55 | * @return TRUE or FALSE | ||
| 56 | * @stable ICU 2.4 | ||
| 57 | */ | ||
| 58 | #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) | ||
| 59 | |||
| 60 | /** | ||
| 61 | * Is this code unit a trail surrogate (U+dc00..U+dfff)? | ||
| 62 | * @param c 16-bit code unit | ||
| 63 | * @return TRUE or FALSE | ||
| 64 | * @stable ICU 2.4 | ||
| 65 | */ | ||
| 66 | #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) | ||
| 67 | |||
| 68 | /** | ||
| 69 | * Is this code unit a surrogate (U+d800..U+dfff)? | ||
| 70 | * @param c 16-bit code unit | ||
| 71 | * @return TRUE or FALSE | ||
| 72 | * @stable ICU 2.4 | ||
| 73 | */ | ||
| 74 | #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) | ||
| 75 | |||
| 76 | /** | ||
| 77 | * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), | ||
| 78 | * is it a lead surrogate? | ||
| 79 | * @param c 16-bit code unit | ||
| 80 | * @return TRUE or FALSE | ||
| 81 | * @stable ICU 2.4 | ||
| 82 | */ | ||
| 83 | #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) | ||
| 84 | |||
| 85 | /** | ||
| 86 | * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), | ||
| 87 | * is it a trail surrogate? | ||
| 88 | * @param c 16-bit code unit | ||
| 89 | * @return TRUE or FALSE | ||
| 90 | * @stable ICU 4.2 | ||
| 91 | */ | ||
| 92 | #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) | ||
| 93 | |||
| 94 | /** | ||
| 95 | * Helper constant for U16_GET_SUPPLEMENTARY. | ||
| 96 | * @internal | ||
| 97 | */ | ||
| 98 | #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) | ||
| 99 | |||
| 100 | /** | ||
| 101 | * Get a supplementary code point value (U+10000..U+10ffff) | ||
| 102 | * from its lead and trail surrogates. | ||
| 103 | * The result is undefined if the input values are not | ||
| 104 | * lead and trail surrogates. | ||
| 105 | * | ||
| 106 | * @param lead lead surrogate (U+d800..U+dbff) | ||
| 107 | * @param trail trail surrogate (U+dc00..U+dfff) | ||
| 108 | * @return supplementary code point (U+10000..U+10ffff) | ||
| 109 | * @stable ICU 2.4 | ||
| 110 | */ | ||
| 111 | #define U16_GET_SUPPLEMENTARY(lead, trail) \ | ||
| 112 | (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) | ||
| 113 | |||
| 114 | |||
| 115 | /** | ||
| 116 | * Get the lead surrogate (0xd800..0xdbff) for a | ||
| 117 | * supplementary code point (0x10000..0x10ffff). | ||
| 118 | * @param supplementary 32-bit code point (U+10000..U+10ffff) | ||
| 119 | * @return lead surrogate (U+d800..U+dbff) for supplementary | ||
| 120 | * @stable ICU 2.4 | ||
| 121 | */ | ||
| 122 | #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) | ||
| 123 | |||
| 124 | /** | ||
| 125 | * Get the trail surrogate (0xdc00..0xdfff) for a | ||
| 126 | * supplementary code point (0x10000..0x10ffff). | ||
| 127 | * @param supplementary 32-bit code point (U+10000..U+10ffff) | ||
| 128 | * @return trail surrogate (U+dc00..U+dfff) for supplementary | ||
| 129 | * @stable ICU 2.4 | ||
| 130 | */ | ||
| 131 | #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) | ||
| 132 | |||
| 133 | /** | ||
| 134 | * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) | ||
| 135 | * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). | ||
| 136 | * @param c 32-bit code point | ||
| 137 | * @return 1 or 2 | ||
| 138 | * @stable ICU 2.4 | ||
| 139 | */ | ||
| 140 | #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) | ||
| 141 | |||
| 142 | /** | ||
| 143 | * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). | ||
| 144 | * @return 2 | ||
| 145 | * @stable ICU 2.4 | ||
| 146 | */ | ||
| 147 | #define U16_MAX_LENGTH 2 | ||
| 148 | |||
| 149 | /** | ||
| 150 | * Get a code point from a string at a random-access offset, | ||
| 151 | * without changing the offset. | ||
| 152 | * "Unsafe" macro, assumes well-formed UTF-16. | ||
| 153 | * | ||
| 154 | * The offset may point to either the lead or trail surrogate unit | ||
| 155 | * for a supplementary code point, in which case the macro will read | ||
| 156 | * the adjacent matching surrogate as well. | ||
| 157 | * The result is undefined if the offset points to a single, unpaired surrogate. | ||
| 158 | * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. | ||
| 159 | * | ||
| 160 | * @param s const UChar * string | ||
| 161 | * @param i string offset | ||
| 162 | * @param c output UChar32 variable | ||
| 163 | * @see U16_GET | ||
| 164 | * @stable ICU 2.4 | ||
| 165 | */ | ||
| 166 | #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 167 | (c)=(s)[i]; \ | ||
| 168 | if(U16_IS_SURROGATE(c)) { \ | ||
| 169 | if(U16_IS_SURROGATE_LEAD(c)) { \ | ||
| 170 | (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ | ||
| 171 | } else { \ | ||
| 172 | (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ | ||
| 173 | } \ | ||
| 174 | } \ | ||
| 175 | } UPRV_BLOCK_MACRO_END | ||
| 176 | |||
| 177 | /** | ||
| 178 | * Get a code point from a string at a random-access offset, | ||
| 179 | * without changing the offset. | ||
| 180 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 181 | * | ||
| 182 | * The offset may point to either the lead or trail surrogate unit | ||
| 183 | * for a supplementary code point, in which case the macro will read | ||
| 184 | * the adjacent matching surrogate as well. | ||
| 185 | * | ||
| 186 | * The length can be negative for a NUL-terminated string. | ||
| 187 | * | ||
| 188 | * If the offset points to a single, unpaired surrogate, then | ||
| 189 | * c is set to that unpaired surrogate. | ||
| 190 | * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. | ||
| 191 | * | ||
| 192 | * @param s const UChar * string | ||
| 193 | * @param start starting string offset (usually 0) | ||
| 194 | * @param i string offset, must be start<=i<length | ||
| 195 | * @param length string length | ||
| 196 | * @param c output UChar32 variable | ||
| 197 | * @see U16_GET_UNSAFE | ||
| 198 | * @stable ICU 2.4 | ||
| 199 | */ | ||
| 200 | #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 201 | (c)=(s)[i]; \ | ||
| 202 | if(U16_IS_SURROGATE(c)) { \ | ||
| 203 | uint16_t __c2; \ | ||
| 204 | if(U16_IS_SURROGATE_LEAD(c)) { \ | ||
| 205 | if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ | ||
| 206 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | ||
| 207 | } \ | ||
| 208 | } else { \ | ||
| 209 | if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | ||
| 210 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | ||
| 211 | } \ | ||
| 212 | } \ | ||
| 213 | } \ | ||
| 214 | } UPRV_BLOCK_MACRO_END | ||
| 215 | |||
| 216 | /** | ||
| 217 | * Get a code point from a string at a random-access offset, | ||
| 218 | * without changing the offset. | ||
| 219 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 220 | * | ||
| 221 | * The offset may point to either the lead or trail surrogate unit | ||
| 222 | * for a supplementary code point, in which case the macro will read | ||
| 223 | * the adjacent matching surrogate as well. | ||
| 224 | * | ||
| 225 | * The length can be negative for a NUL-terminated string. | ||
| 226 | * | ||
| 227 | * If the offset points to a single, unpaired surrogate, then | ||
| 228 | * c is set to U+FFFD. | ||
| 229 | * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. | ||
| 230 | * | ||
| 231 | * @param s const UChar * string | ||
| 232 | * @param start starting string offset (usually 0) | ||
| 233 | * @param i string offset, must be start<=i<length | ||
| 234 | * @param length string length | ||
| 235 | * @param c output UChar32 variable | ||
| 236 | * @see U16_GET_UNSAFE | ||
| 237 | * @stable ICU 60 | ||
| 238 | */ | ||
| 239 | #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 240 | (c)=(s)[i]; \ | ||
| 241 | if(U16_IS_SURROGATE(c)) { \ | ||
| 242 | uint16_t __c2; \ | ||
| 243 | if(U16_IS_SURROGATE_LEAD(c)) { \ | ||
| 244 | if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ | ||
| 245 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | ||
| 246 | } else { \ | ||
| 247 | (c)=0xfffd; \ | ||
| 248 | } \ | ||
| 249 | } else { \ | ||
| 250 | if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | ||
| 251 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | ||
| 252 | } else { \ | ||
| 253 | (c)=0xfffd; \ | ||
| 254 | } \ | ||
| 255 | } \ | ||
| 256 | } \ | ||
| 257 | } UPRV_BLOCK_MACRO_END | ||
| 258 | |||
| 259 | /* definitions with forward iteration --------------------------------------- */ | ||
| 260 | |||
| 261 | /** | ||
| 262 | * Get a code point from a string at a code point boundary offset, | ||
| 263 | * and advance the offset to the next code point boundary. | ||
| 264 | * (Post-incrementing forward iteration.) | ||
| 265 | * "Unsafe" macro, assumes well-formed UTF-16. | ||
| 266 | * | ||
| 267 | * The offset may point to the lead surrogate unit | ||
| 268 | * for a supplementary code point, in which case the macro will read | ||
| 269 | * the following trail surrogate as well. | ||
| 270 | * If the offset points to a trail surrogate, then that itself | ||
| 271 | * will be returned as the code point. | ||
| 272 | * The result is undefined if the offset points to a single, unpaired lead surrogate. | ||
| 273 | * | ||
| 274 | * @param s const UChar * string | ||
| 275 | * @param i string offset | ||
| 276 | * @param c output UChar32 variable | ||
| 277 | * @see U16_NEXT | ||
| 278 | * @stable ICU 2.4 | ||
| 279 | */ | ||
| 280 | #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 281 | (c)=(s)[(i)++]; \ | ||
| 282 | if(U16_IS_LEAD(c)) { \ | ||
| 283 | (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ | ||
| 284 | } \ | ||
| 285 | } UPRV_BLOCK_MACRO_END | ||
| 286 | |||
| 287 | /** | ||
| 288 | * Get a code point from a string at a code point boundary offset, | ||
| 289 | * and advance the offset to the next code point boundary. | ||
| 290 | * (Post-incrementing forward iteration.) | ||
| 291 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 292 | * | ||
| 293 | * The length can be negative for a NUL-terminated string. | ||
| 294 | * | ||
| 295 | * The offset may point to the lead surrogate unit | ||
| 296 | * for a supplementary code point, in which case the macro will read | ||
| 297 | * the following trail surrogate as well. | ||
| 298 | * If the offset points to a trail surrogate or | ||
| 299 | * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. | ||
| 300 | * | ||
| 301 | * @param s const UChar * string | ||
| 302 | * @param i string offset, must be i<length | ||
| 303 | * @param length string length | ||
| 304 | * @param c output UChar32 variable | ||
| 305 | * @see U16_NEXT_UNSAFE | ||
| 306 | * @stable ICU 2.4 | ||
| 307 | */ | ||
| 308 | #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 309 | (c)=(s)[(i)++]; \ | ||
| 310 | if(U16_IS_LEAD(c)) { \ | ||
| 311 | uint16_t __c2; \ | ||
| 312 | if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ | ||
| 313 | ++(i); \ | ||
| 314 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | ||
| 315 | } \ | ||
| 316 | } \ | ||
| 317 | } UPRV_BLOCK_MACRO_END | ||
| 318 | |||
| 319 | /** | ||
| 320 | * Get a code point from a string at a code point boundary offset, | ||
| 321 | * and advance the offset to the next code point boundary. | ||
| 322 | * (Post-incrementing forward iteration.) | ||
| 323 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 324 | * | ||
| 325 | * The length can be negative for a NUL-terminated string. | ||
| 326 | * | ||
| 327 | * The offset may point to the lead surrogate unit | ||
| 328 | * for a supplementary code point, in which case the macro will read | ||
| 329 | * the following trail surrogate as well. | ||
| 330 | * If the offset points to a trail surrogate or | ||
| 331 | * to a single, unpaired lead surrogate, then c is set to U+FFFD. | ||
| 332 | * | ||
| 333 | * @param s const UChar * string | ||
| 334 | * @param i string offset, must be i<length | ||
| 335 | * @param length string length | ||
| 336 | * @param c output UChar32 variable | ||
| 337 | * @see U16_NEXT_UNSAFE | ||
| 338 | * @stable ICU 60 | ||
| 339 | */ | ||
| 340 | #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 341 | (c)=(s)[(i)++]; \ | ||
| 342 | if(U16_IS_SURROGATE(c)) { \ | ||
| 343 | uint16_t __c2; \ | ||
| 344 | if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ | ||
| 345 | ++(i); \ | ||
| 346 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | ||
| 347 | } else { \ | ||
| 348 | (c)=0xfffd; \ | ||
| 349 | } \ | ||
| 350 | } \ | ||
| 351 | } UPRV_BLOCK_MACRO_END | ||
| 352 | |||
| 353 | /** | ||
| 354 | * Append a code point to a string, overwriting 1 or 2 code units. | ||
| 355 | * The offset points to the current end of the string contents | ||
| 356 | * and is advanced (post-increment). | ||
| 357 | * "Unsafe" macro, assumes a valid code point and sufficient space in the string. | ||
| 358 | * Otherwise, the result is undefined. | ||
| 359 | * | ||
| 360 | * @param s const UChar * string buffer | ||
| 361 | * @param i string offset | ||
| 362 | * @param c code point to append | ||
| 363 | * @see U16_APPEND | ||
| 364 | * @stable ICU 2.4 | ||
| 365 | */ | ||
| 366 | #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 367 | if((uint32_t)(c)<=0xffff) { \ | ||
| 368 | (s)[(i)++]=(uint16_t)(c); \ | ||
| 369 | } else { \ | ||
| 370 | (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ | ||
| 371 | (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ | ||
| 372 | } \ | ||
| 373 | } UPRV_BLOCK_MACRO_END | ||
| 374 | |||
| 375 | /** | ||
| 376 | * Append a code point to a string, overwriting 1 or 2 code units. | ||
| 377 | * The offset points to the current end of the string contents | ||
| 378 | * and is advanced (post-increment). | ||
| 379 | * "Safe" macro, checks for a valid code point. | ||
| 380 | * If a surrogate pair is written, checks for sufficient space in the string. | ||
| 381 | * If the code point is not valid or a trail surrogate does not fit, | ||
| 382 | * then isError is set to TRUE. | ||
| 383 | * | ||
| 384 | * @param s const UChar * string buffer | ||
| 385 | * @param i string offset, must be i<capacity | ||
| 386 | * @param capacity size of the string buffer | ||
| 387 | * @param c code point to append | ||
| 388 | * @param isError output UBool set to TRUE if an error occurs, otherwise not modified | ||
| 389 | * @see U16_APPEND_UNSAFE | ||
| 390 | * @stable ICU 2.4 | ||
| 391 | */ | ||
| 392 | #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 393 | if((uint32_t)(c)<=0xffff) { \ | ||
| 394 | (s)[(i)++]=(uint16_t)(c); \ | ||
| 395 | } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ | ||
| 396 | (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ | ||
| 397 | (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ | ||
| 398 | } else /* c>0x10ffff or not enough space */ { \ | ||
| 399 | (isError)=TRUE; \ | ||
| 400 | } \ | ||
| 401 | } UPRV_BLOCK_MACRO_END | ||
| 402 | |||
| 403 | /** | ||
| 404 | * Advance the string offset from one code point boundary to the next. | ||
| 405 | * (Post-incrementing iteration.) | ||
| 406 | * "Unsafe" macro, assumes well-formed UTF-16. | ||
| 407 | * | ||
| 408 | * @param s const UChar * string | ||
| 409 | * @param i string offset | ||
| 410 | * @see U16_FWD_1 | ||
| 411 | * @stable ICU 2.4 | ||
| 412 | */ | ||
| 413 | #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 414 | if(U16_IS_LEAD((s)[(i)++])) { \ | ||
| 415 | ++(i); \ | ||
| 416 | } \ | ||
| 417 | } UPRV_BLOCK_MACRO_END | ||
| 418 | |||
| 419 | /** | ||
| 420 | * Advance the string offset from one code point boundary to the next. | ||
| 421 | * (Post-incrementing iteration.) | ||
| 422 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 423 | * | ||
| 424 | * The length can be negative for a NUL-terminated string. | ||
| 425 | * | ||
| 426 | * @param s const UChar * string | ||
| 427 | * @param i string offset, must be i<length | ||
| 428 | * @param length string length | ||
| 429 | * @see U16_FWD_1_UNSAFE | ||
| 430 | * @stable ICU 2.4 | ||
| 431 | */ | ||
| 432 | #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 433 | if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ | ||
| 434 | ++(i); \ | ||
| 435 | } \ | ||
| 436 | } UPRV_BLOCK_MACRO_END | ||
| 437 | |||
| 438 | /** | ||
| 439 | * Advance the string offset from one code point boundary to the n-th next one, | ||
| 440 | * i.e., move forward by n code points. | ||
| 441 | * (Post-incrementing iteration.) | ||
| 442 | * "Unsafe" macro, assumes well-formed UTF-16. | ||
| 443 | * | ||
| 444 | * @param s const UChar * string | ||
| 445 | * @param i string offset | ||
| 446 | * @param n number of code points to skip | ||
| 447 | * @see U16_FWD_N | ||
| 448 | * @stable ICU 2.4 | ||
| 449 | */ | ||
| 450 | #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 451 | int32_t __N=(n); \ | ||
| 452 | while(__N>0) { \ | ||
| 453 | U16_FWD_1_UNSAFE(s, i); \ | ||
| 454 | --__N; \ | ||
| 455 | } \ | ||
| 456 | } UPRV_BLOCK_MACRO_END | ||
| 457 | |||
| 458 | /** | ||
| 459 | * Advance the string offset from one code point boundary to the n-th next one, | ||
| 460 | * i.e., move forward by n code points. | ||
| 461 | * (Post-incrementing iteration.) | ||
| 462 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 463 | * | ||
| 464 | * The length can be negative for a NUL-terminated string. | ||
| 465 | * | ||
| 466 | * @param s const UChar * string | ||
| 467 | * @param i int32_t string offset, must be i<length | ||
| 468 | * @param length int32_t string length | ||
| 469 | * @param n number of code points to skip | ||
| 470 | * @see U16_FWD_N_UNSAFE | ||
| 471 | * @stable ICU 2.4 | ||
| 472 | */ | ||
| 473 | #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 474 | int32_t __N=(n); \ | ||
| 475 | while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ | ||
| 476 | U16_FWD_1(s, i, length); \ | ||
| 477 | --__N; \ | ||
| 478 | } \ | ||
| 479 | } UPRV_BLOCK_MACRO_END | ||
| 480 | |||
| 481 | /** | ||
| 482 | * Adjust a random-access offset to a code point boundary | ||
| 483 | * at the start of a code point. | ||
| 484 | * If the offset points to the trail surrogate of a surrogate pair, | ||
| 485 | * then the offset is decremented. | ||
| 486 | * Otherwise, it is not modified. | ||
| 487 | * "Unsafe" macro, assumes well-formed UTF-16. | ||
| 488 | * | ||
| 489 | * @param s const UChar * string | ||
| 490 | * @param i string offset | ||
| 491 | * @see U16_SET_CP_START | ||
| 492 | * @stable ICU 2.4 | ||
| 493 | */ | ||
| 494 | #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 495 | if(U16_IS_TRAIL((s)[i])) { \ | ||
| 496 | --(i); \ | ||
| 497 | } \ | ||
| 498 | } UPRV_BLOCK_MACRO_END | ||
| 499 | |||
| 500 | /** | ||
| 501 | * Adjust a random-access offset to a code point boundary | ||
| 502 | * at the start of a code point. | ||
| 503 | * If the offset points to the trail surrogate of a surrogate pair, | ||
| 504 | * then the offset is decremented. | ||
| 505 | * Otherwise, it is not modified. | ||
| 506 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 507 | * | ||
| 508 | * @param s const UChar * string | ||
| 509 | * @param start starting string offset (usually 0) | ||
| 510 | * @param i string offset, must be start<=i | ||
| 511 | * @see U16_SET_CP_START_UNSAFE | ||
| 512 | * @stable ICU 2.4 | ||
| 513 | */ | ||
| 514 | #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 515 | if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ | ||
| 516 | --(i); \ | ||
| 517 | } \ | ||
| 518 | } UPRV_BLOCK_MACRO_END | ||
| 519 | |||
| 520 | /* definitions with backward iteration -------------------------------------- */ | ||
| 521 | |||
| 522 | /** | ||
| 523 | * Move the string offset from one code point boundary to the previous one | ||
| 524 | * and get the code point between them. | ||
| 525 | * (Pre-decrementing backward iteration.) | ||
| 526 | * "Unsafe" macro, assumes well-formed UTF-16. | ||
| 527 | * | ||
| 528 | * The input offset may be the same as the string length. | ||
| 529 | * If the offset is behind a trail surrogate unit | ||
| 530 | * for a supplementary code point, then the macro will read | ||
| 531 | * the preceding lead surrogate as well. | ||
| 532 | * If the offset is behind a lead surrogate, then that itself | ||
| 533 | * will be returned as the code point. | ||
| 534 | * The result is undefined if the offset is behind a single, unpaired trail surrogate. | ||
| 535 | * | ||
| 536 | * @param s const UChar * string | ||
| 537 | * @param i string offset | ||
| 538 | * @param c output UChar32 variable | ||
| 539 | * @see U16_PREV | ||
| 540 | * @stable ICU 2.4 | ||
| 541 | */ | ||
| 542 | #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 543 | (c)=(s)[--(i)]; \ | ||
| 544 | if(U16_IS_TRAIL(c)) { \ | ||
| 545 | (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ | ||
| 546 | } \ | ||
| 547 | } UPRV_BLOCK_MACRO_END | ||
| 548 | |||
| 549 | /** | ||
| 550 | * Move the string offset from one code point boundary to the previous one | ||
| 551 | * and get the code point between them. | ||
| 552 | * (Pre-decrementing backward iteration.) | ||
| 553 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 554 | * | ||
| 555 | * The input offset may be the same as the string length. | ||
| 556 | * If the offset is behind a trail surrogate unit | ||
| 557 | * for a supplementary code point, then the macro will read | ||
| 558 | * the preceding lead surrogate as well. | ||
| 559 | * If the offset is behind a lead surrogate or behind a single, unpaired | ||
| 560 | * trail surrogate, then c is set to that unpaired surrogate. | ||
| 561 | * | ||
| 562 | * @param s const UChar * string | ||
| 563 | * @param start starting string offset (usually 0) | ||
| 564 | * @param i string offset, must be start<i | ||
| 565 | * @param c output UChar32 variable | ||
| 566 | * @see U16_PREV_UNSAFE | ||
| 567 | * @stable ICU 2.4 | ||
| 568 | */ | ||
| 569 | #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 570 | (c)=(s)[--(i)]; \ | ||
| 571 | if(U16_IS_TRAIL(c)) { \ | ||
| 572 | uint16_t __c2; \ | ||
| 573 | if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | ||
| 574 | --(i); \ | ||
| 575 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | ||
| 576 | } \ | ||
| 577 | } \ | ||
| 578 | } UPRV_BLOCK_MACRO_END | ||
| 579 | |||
| 580 | /** | ||
| 581 | * Move the string offset from one code point boundary to the previous one | ||
| 582 | * and get the code point between them. | ||
| 583 | * (Pre-decrementing backward iteration.) | ||
| 584 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 585 | * | ||
| 586 | * The input offset may be the same as the string length. | ||
| 587 | * If the offset is behind a trail surrogate unit | ||
| 588 | * for a supplementary code point, then the macro will read | ||
| 589 | * the preceding lead surrogate as well. | ||
| 590 | * If the offset is behind a lead surrogate or behind a single, unpaired | ||
| 591 | * trail surrogate, then c is set to U+FFFD. | ||
| 592 | * | ||
| 593 | * @param s const UChar * string | ||
| 594 | * @param start starting string offset (usually 0) | ||
| 595 | * @param i string offset, must be start<i | ||
| 596 | * @param c output UChar32 variable | ||
| 597 | * @see U16_PREV_UNSAFE | ||
| 598 | * @stable ICU 60 | ||
| 599 | */ | ||
| 600 | #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 601 | (c)=(s)[--(i)]; \ | ||
| 602 | if(U16_IS_SURROGATE(c)) { \ | ||
| 603 | uint16_t __c2; \ | ||
| 604 | if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | ||
| 605 | --(i); \ | ||
| 606 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | ||
| 607 | } else { \ | ||
| 608 | (c)=0xfffd; \ | ||
| 609 | } \ | ||
| 610 | } \ | ||
| 611 | } UPRV_BLOCK_MACRO_END | ||
| 612 | |||
| 613 | /** | ||
| 614 | * Move the string offset from one code point boundary to the previous one. | ||
| 615 | * (Pre-decrementing backward iteration.) | ||
| 616 | * The input offset may be the same as the string length. | ||
| 617 | * "Unsafe" macro, assumes well-formed UTF-16. | ||
| 618 | * | ||
| 619 | * @param s const UChar * string | ||
| 620 | * @param i string offset | ||
| 621 | * @see U16_BACK_1 | ||
| 622 | * @stable ICU 2.4 | ||
| 623 | */ | ||
| 624 | #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 625 | if(U16_IS_TRAIL((s)[--(i)])) { \ | ||
| 626 | --(i); \ | ||
| 627 | } \ | ||
| 628 | } UPRV_BLOCK_MACRO_END | ||
| 629 | |||
| 630 | /** | ||
| 631 | * Move the string offset from one code point boundary to the previous one. | ||
| 632 | * (Pre-decrementing backward iteration.) | ||
| 633 | * The input offset may be the same as the string length. | ||
| 634 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 635 | * | ||
| 636 | * @param s const UChar * string | ||
| 637 | * @param start starting string offset (usually 0) | ||
| 638 | * @param i string offset, must be start<i | ||
| 639 | * @see U16_BACK_1_UNSAFE | ||
| 640 | * @stable ICU 2.4 | ||
| 641 | */ | ||
| 642 | #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 643 | if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ | ||
| 644 | --(i); \ | ||
| 645 | } \ | ||
| 646 | } UPRV_BLOCK_MACRO_END | ||
| 647 | |||
| 648 | /** | ||
| 649 | * Move the string offset from one code point boundary to the n-th one before it, | ||
| 650 | * i.e., move backward by n code points. | ||
| 651 | * (Pre-decrementing backward iteration.) | ||
| 652 | * The input offset may be the same as the string length. | ||
| 653 | * "Unsafe" macro, assumes well-formed UTF-16. | ||
| 654 | * | ||
| 655 | * @param s const UChar * string | ||
| 656 | * @param i string offset | ||
| 657 | * @param n number of code points to skip | ||
| 658 | * @see U16_BACK_N | ||
| 659 | * @stable ICU 2.4 | ||
| 660 | */ | ||
| 661 | #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 662 | int32_t __N=(n); \ | ||
| 663 | while(__N>0) { \ | ||
| 664 | U16_BACK_1_UNSAFE(s, i); \ | ||
| 665 | --__N; \ | ||
| 666 | } \ | ||
| 667 | } UPRV_BLOCK_MACRO_END | ||
| 668 | |||
| 669 | /** | ||
| 670 | * Move the string offset from one code point boundary to the n-th one before it, | ||
| 671 | * i.e., move backward by n code points. | ||
| 672 | * (Pre-decrementing backward iteration.) | ||
| 673 | * The input offset may be the same as the string length. | ||
| 674 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 675 | * | ||
| 676 | * @param s const UChar * string | ||
| 677 | * @param start start of string | ||
| 678 | * @param i string offset, must be start<i | ||
| 679 | * @param n number of code points to skip | ||
| 680 | * @see U16_BACK_N_UNSAFE | ||
| 681 | * @stable ICU 2.4 | ||
| 682 | */ | ||
| 683 | #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 684 | int32_t __N=(n); \ | ||
| 685 | while(__N>0 && (i)>(start)) { \ | ||
| 686 | U16_BACK_1(s, start, i); \ | ||
| 687 | --__N; \ | ||
| 688 | } \ | ||
| 689 | } UPRV_BLOCK_MACRO_END | ||
| 690 | |||
| 691 | /** | ||
| 692 | * Adjust a random-access offset to a code point boundary after a code point. | ||
| 693 | * If the offset is behind the lead surrogate of a surrogate pair, | ||
| 694 | * then the offset is incremented. | ||
| 695 | * Otherwise, it is not modified. | ||
| 696 | * The input offset may be the same as the string length. | ||
| 697 | * "Unsafe" macro, assumes well-formed UTF-16. | ||
| 698 | * | ||
| 699 | * @param s const UChar * string | ||
| 700 | * @param i string offset | ||
| 701 | * @see U16_SET_CP_LIMIT | ||
| 702 | * @stable ICU 2.4 | ||
| 703 | */ | ||
| 704 | #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 705 | if(U16_IS_LEAD((s)[(i)-1])) { \ | ||
| 706 | ++(i); \ | ||
| 707 | } \ | ||
| 708 | } UPRV_BLOCK_MACRO_END | ||
| 709 | |||
| 710 | /** | ||
| 711 | * Adjust a random-access offset to a code point boundary after a code point. | ||
| 712 | * If the offset is behind the lead surrogate of a surrogate pair, | ||
| 713 | * then the offset is incremented. | ||
| 714 | * Otherwise, it is not modified. | ||
| 715 | * The input offset may be the same as the string length. | ||
| 716 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | ||
| 717 | * | ||
| 718 | * The length can be negative for a NUL-terminated string. | ||
| 719 | * | ||
| 720 | * @param s const UChar * string | ||
| 721 | * @param start int32_t starting string offset (usually 0) | ||
| 722 | * @param i int32_t string offset, start<=i<=length | ||
| 723 | * @param length int32_t string length | ||
| 724 | * @see U16_SET_CP_LIMIT_UNSAFE | ||
| 725 | * @stable ICU 2.4 | ||
| 726 | */ | ||
| 727 | #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 728 | if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ | ||
| 729 | ++(i); \ | ||
| 730 | } \ | ||
| 731 | } UPRV_BLOCK_MACRO_END | ||
| 732 | |||
| 733 | #endif | ||
diff --git a/vendor/tree-sitter/lib/src/unicode/utf8.h b/vendor/tree-sitter/lib/src/unicode/utf8.h new file mode 100644 index 0000000..bb00130 --- /dev/null +++ b/vendor/tree-sitter/lib/src/unicode/utf8.h | |||
| @@ -0,0 +1,881 @@ | |||
| 1 | // © 2016 and later: Unicode, Inc. and others. | ||
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | ||
| 3 | /* | ||
| 4 | ******************************************************************************* | ||
| 5 | * | ||
| 6 | * Copyright (C) 1999-2015, International Business Machines | ||
| 7 | * Corporation and others. All Rights Reserved. | ||
| 8 | * | ||
| 9 | ******************************************************************************* | ||
| 10 | * file name: utf8.h | ||
| 11 | * encoding: UTF-8 | ||
| 12 | * tab size: 8 (not used) | ||
| 13 | * indentation:4 | ||
| 14 | * | ||
| 15 | * created on: 1999sep13 | ||
| 16 | * created by: Markus W. Scherer | ||
| 17 | */ | ||
| 18 | |||
| 19 | /** | ||
| 20 | * \file | ||
| 21 | * \brief C API: 8-bit Unicode handling macros | ||
| 22 | * | ||
| 23 | * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings. | ||
| 24 | * | ||
| 25 | * For more information see utf.h and the ICU User Guide Strings chapter | ||
| 26 | * (http://userguide.icu-project.org/strings). | ||
| 27 | * | ||
| 28 | * <em>Usage:</em> | ||
| 29 | * ICU coding guidelines for if() statements should be followed when using these macros. | ||
| 30 | * Compound statements (curly braces {}) must be used for if-else-while... | ||
| 31 | * bodies and all macro statements should be terminated with semicolon. | ||
| 32 | */ | ||
| 33 | |||
| 34 | #ifndef __UTF8_H__ | ||
| 35 | #define __UTF8_H__ | ||
| 36 | |||
| 37 | #include "unicode/umachine.h" | ||
| 38 | #ifndef __UTF_H__ | ||
| 39 | # include "unicode/utf.h" | ||
| 40 | #endif | ||
| 41 | |||
| 42 | /* internal definitions ----------------------------------------------------- */ | ||
| 43 | |||
| 44 | /** | ||
| 45 | * Counts the trail bytes for a UTF-8 lead byte. | ||
| 46 | * Returns 0 for 0..0xc1 as well as for 0xf5..0xff. | ||
| 47 | * leadByte might be evaluated multiple times. | ||
| 48 | * | ||
| 49 | * This is internal since it is not meant to be called directly by external clients; | ||
| 50 | * however it is called by public macros in this file and thus must remain stable. | ||
| 51 | * | ||
| 52 | * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. | ||
| 53 | * @internal | ||
| 54 | */ | ||
| 55 | #define U8_COUNT_TRAIL_BYTES(leadByte) \ | ||
| 56 | (U8_IS_LEAD(leadByte) ? \ | ||
| 57 | ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0) | ||
| 58 | |||
| 59 | /** | ||
| 60 | * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence. | ||
| 61 | * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff. | ||
| 62 | * leadByte might be evaluated multiple times. | ||
| 63 | * | ||
| 64 | * This is internal since it is not meant to be called directly by external clients; | ||
| 65 | * however it is called by public macros in this file and thus must remain stable. | ||
| 66 | * | ||
| 67 | * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. | ||
| 68 | * @internal | ||
| 69 | */ | ||
| 70 | #define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \ | ||
| 71 | (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)) | ||
| 72 | |||
| 73 | /** | ||
| 74 | * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. | ||
| 75 | * | ||
| 76 | * This is internal since it is not meant to be called directly by external clients; | ||
| 77 | * however it is called by public macros in this file and thus must remain stable. | ||
| 78 | * @internal | ||
| 79 | */ | ||
| 80 | #define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) | ||
| 81 | |||
| 82 | /** | ||
| 83 | * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1. | ||
| 84 | * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. | ||
| 85 | * Lead byte E0..EF bits 3..0 are used as byte index, | ||
| 86 | * first trail byte bits 7..5 are used as bit index into that byte. | ||
| 87 | * @see U8_IS_VALID_LEAD3_AND_T1 | ||
| 88 | * @internal | ||
| 89 | */ | ||
| 90 | #define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" | ||
| 91 | |||
| 92 | /** | ||
| 93 | * Internal 3-byte UTF-8 validity check. | ||
| 94 | * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence. | ||
| 95 | * @internal | ||
| 96 | */ | ||
| 97 | #define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5))) | ||
| 98 | |||
| 99 | /** | ||
| 100 | * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1. | ||
| 101 | * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. | ||
| 102 | * First trail byte bits 7..4 are used as byte index, | ||
| 103 | * lead byte F0..F4 bits 2..0 are used as bit index into that byte. | ||
| 104 | * @see U8_IS_VALID_LEAD4_AND_T1 | ||
| 105 | * @internal | ||
| 106 | */ | ||
| 107 | #define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" | ||
| 108 | |||
| 109 | /** | ||
| 110 | * Internal 4-byte UTF-8 validity check. | ||
| 111 | * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence. | ||
| 112 | * @internal | ||
| 113 | */ | ||
| 114 | #define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7))) | ||
| 115 | |||
| 116 | /** | ||
| 117 | * Function for handling "next code point" with error-checking. | ||
| 118 | * | ||
| 119 | * This is internal since it is not meant to be called directly by external clients; | ||
| 120 | * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this | ||
| 121 | * file and thus must remain stable, and should not be hidden when other internal | ||
| 122 | * functions are hidden (otherwise public macros would fail to compile). | ||
| 123 | * @internal | ||
| 124 | */ | ||
| 125 | U_STABLE UChar32 U_EXPORT2 | ||
| 126 | utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict); | ||
| 127 | |||
| 128 | /** | ||
| 129 | * Function for handling "append code point" with error-checking. | ||
| 130 | * | ||
| 131 | * This is internal since it is not meant to be called directly by external clients; | ||
| 132 | * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this | ||
| 133 | * file and thus must remain stable, and should not be hidden when other internal | ||
| 134 | * functions are hidden (otherwise public macros would fail to compile). | ||
| 135 | * @internal | ||
| 136 | */ | ||
| 137 | U_STABLE int32_t U_EXPORT2 | ||
| 138 | utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError); | ||
| 139 | |||
| 140 | /** | ||
| 141 | * Function for handling "previous code point" with error-checking. | ||
| 142 | * | ||
| 143 | * This is internal since it is not meant to be called directly by external clients; | ||
| 144 | * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this | ||
| 145 | * file and thus must remain stable, and should not be hidden when other internal | ||
| 146 | * functions are hidden (otherwise public macros would fail to compile). | ||
| 147 | * @internal | ||
| 148 | */ | ||
| 149 | U_STABLE UChar32 U_EXPORT2 | ||
| 150 | utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict); | ||
| 151 | |||
| 152 | /** | ||
| 153 | * Function for handling "skip backward one code point" with error-checking. | ||
| 154 | * | ||
| 155 | * This is internal since it is not meant to be called directly by external clients; | ||
| 156 | * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this | ||
| 157 | * file and thus must remain stable, and should not be hidden when other internal | ||
| 158 | * functions are hidden (otherwise public macros would fail to compile). | ||
| 159 | * @internal | ||
| 160 | */ | ||
| 161 | U_STABLE int32_t U_EXPORT2 | ||
| 162 | utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); | ||
| 163 | |||
| 164 | /* single-code point definitions -------------------------------------------- */ | ||
| 165 | |||
| 166 | /** | ||
| 167 | * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? | ||
| 168 | * @param c 8-bit code unit (byte) | ||
| 169 | * @return TRUE or FALSE | ||
| 170 | * @stable ICU 2.4 | ||
| 171 | */ | ||
| 172 | #define U8_IS_SINGLE(c) (((c)&0x80)==0) | ||
| 173 | |||
| 174 | /** | ||
| 175 | * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4) | ||
| 176 | * @param c 8-bit code unit (byte) | ||
| 177 | * @return TRUE or FALSE | ||
| 178 | * @stable ICU 2.4 | ||
| 179 | */ | ||
| 180 | #define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32) | ||
| 181 | // 0x32=0xf4-0xc2 | ||
| 182 | |||
| 183 | /** | ||
| 184 | * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF) | ||
| 185 | * @param c 8-bit code unit (byte) | ||
| 186 | * @return TRUE or FALSE | ||
| 187 | * @stable ICU 2.4 | ||
| 188 | */ | ||
| 189 | #define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40) | ||
| 190 | |||
| 191 | /** | ||
| 192 | * How many code units (bytes) are used for the UTF-8 encoding | ||
| 193 | * of this Unicode code point? | ||
| 194 | * @param c 32-bit code point | ||
| 195 | * @return 1..4, or 0 if c is a surrogate or not a Unicode code point | ||
| 196 | * @stable ICU 2.4 | ||
| 197 | */ | ||
| 198 | #define U8_LENGTH(c) \ | ||
| 199 | ((uint32_t)(c)<=0x7f ? 1 : \ | ||
| 200 | ((uint32_t)(c)<=0x7ff ? 2 : \ | ||
| 201 | ((uint32_t)(c)<=0xd7ff ? 3 : \ | ||
| 202 | ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \ | ||
| 203 | ((uint32_t)(c)<=0xffff ? 3 : 4)\ | ||
| 204 | ) \ | ||
| 205 | ) \ | ||
| 206 | ) \ | ||
| 207 | ) | ||
| 208 | |||
| 209 | /** | ||
| 210 | * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff). | ||
| 211 | * @return 4 | ||
| 212 | * @stable ICU 2.4 | ||
| 213 | */ | ||
| 214 | #define U8_MAX_LENGTH 4 | ||
| 215 | |||
| 216 | /** | ||
| 217 | * Get a code point from a string at a random-access offset, | ||
| 218 | * without changing the offset. | ||
| 219 | * The offset may point to either the lead byte or one of the trail bytes | ||
| 220 | * for a code point, in which case the macro will read all of the bytes | ||
| 221 | * for the code point. | ||
| 222 | * The result is undefined if the offset points to an illegal UTF-8 | ||
| 223 | * byte sequence. | ||
| 224 | * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. | ||
| 225 | * | ||
| 226 | * @param s const uint8_t * string | ||
| 227 | * @param i string offset | ||
| 228 | * @param c output UChar32 variable | ||
| 229 | * @see U8_GET | ||
| 230 | * @stable ICU 2.4 | ||
| 231 | */ | ||
| 232 | #define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 233 | int32_t _u8_get_unsafe_index=(int32_t)(i); \ | ||
| 234 | U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \ | ||
| 235 | U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \ | ||
| 236 | } UPRV_BLOCK_MACRO_END | ||
| 237 | |||
| 238 | /** | ||
| 239 | * Get a code point from a string at a random-access offset, | ||
| 240 | * without changing the offset. | ||
| 241 | * The offset may point to either the lead byte or one of the trail bytes | ||
| 242 | * for a code point, in which case the macro will read all of the bytes | ||
| 243 | * for the code point. | ||
| 244 | * | ||
| 245 | * The length can be negative for a NUL-terminated string. | ||
| 246 | * | ||
| 247 | * If the offset points to an illegal UTF-8 byte sequence, then | ||
| 248 | * c is set to a negative value. | ||
| 249 | * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. | ||
| 250 | * | ||
| 251 | * @param s const uint8_t * string | ||
| 252 | * @param start int32_t starting string offset | ||
| 253 | * @param i int32_t string offset, must be start<=i<length | ||
| 254 | * @param length int32_t string length | ||
| 255 | * @param c output UChar32 variable, set to <0 in case of an error | ||
| 256 | * @see U8_GET_UNSAFE | ||
| 257 | * @stable ICU 2.4 | ||
| 258 | */ | ||
| 259 | #define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 260 | int32_t _u8_get_index=(i); \ | ||
| 261 | U8_SET_CP_START(s, start, _u8_get_index); \ | ||
| 262 | U8_NEXT(s, _u8_get_index, length, c); \ | ||
| 263 | } UPRV_BLOCK_MACRO_END | ||
| 264 | |||
| 265 | /** | ||
| 266 | * Get a code point from a string at a random-access offset, | ||
| 267 | * without changing the offset. | ||
| 268 | * The offset may point to either the lead byte or one of the trail bytes | ||
| 269 | * for a code point, in which case the macro will read all of the bytes | ||
| 270 | * for the code point. | ||
| 271 | * | ||
| 272 | * The length can be negative for a NUL-terminated string. | ||
| 273 | * | ||
| 274 | * If the offset points to an illegal UTF-8 byte sequence, then | ||
| 275 | * c is set to U+FFFD. | ||
| 276 | * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD. | ||
| 277 | * | ||
| 278 | * This macro does not distinguish between a real U+FFFD in the text | ||
| 279 | * and U+FFFD returned for an ill-formed sequence. | ||
| 280 | * Use U8_GET() if that distinction is important. | ||
| 281 | * | ||
| 282 | * @param s const uint8_t * string | ||
| 283 | * @param start int32_t starting string offset | ||
| 284 | * @param i int32_t string offset, must be start<=i<length | ||
| 285 | * @param length int32_t string length | ||
| 286 | * @param c output UChar32 variable, set to U+FFFD in case of an error | ||
| 287 | * @see U8_GET | ||
| 288 | * @stable ICU 51 | ||
| 289 | */ | ||
| 290 | #define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 291 | int32_t _u8_get_index=(i); \ | ||
| 292 | U8_SET_CP_START(s, start, _u8_get_index); \ | ||
| 293 | U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \ | ||
| 294 | } UPRV_BLOCK_MACRO_END | ||
| 295 | |||
| 296 | /* definitions with forward iteration --------------------------------------- */ | ||
| 297 | |||
| 298 | /** | ||
| 299 | * Get a code point from a string at a code point boundary offset, | ||
| 300 | * and advance the offset to the next code point boundary. | ||
| 301 | * (Post-incrementing forward iteration.) | ||
| 302 | * "Unsafe" macro, assumes well-formed UTF-8. | ||
| 303 | * | ||
| 304 | * The offset may point to the lead byte of a multi-byte sequence, | ||
| 305 | * in which case the macro will read the whole sequence. | ||
| 306 | * The result is undefined if the offset points to a trail byte | ||
| 307 | * or an illegal UTF-8 sequence. | ||
| 308 | * | ||
| 309 | * @param s const uint8_t * string | ||
| 310 | * @param i string offset | ||
| 311 | * @param c output UChar32 variable | ||
| 312 | * @see U8_NEXT | ||
| 313 | * @stable ICU 2.4 | ||
| 314 | */ | ||
| 315 | #define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 316 | (c)=(uint8_t)(s)[(i)++]; \ | ||
| 317 | if(!U8_IS_SINGLE(c)) { \ | ||
| 318 | if((c)<0xe0) { \ | ||
| 319 | (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \ | ||
| 320 | } else if((c)<0xf0) { \ | ||
| 321 | /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ | ||
| 322 | (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \ | ||
| 323 | (i)+=2; \ | ||
| 324 | } else { \ | ||
| 325 | (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \ | ||
| 326 | (i)+=3; \ | ||
| 327 | } \ | ||
| 328 | } \ | ||
| 329 | } UPRV_BLOCK_MACRO_END | ||
| 330 | |||
| 331 | /** | ||
| 332 | * Get a code point from a string at a code point boundary offset, | ||
| 333 | * and advance the offset to the next code point boundary. | ||
| 334 | * (Post-incrementing forward iteration.) | ||
| 335 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 336 | * | ||
| 337 | * The length can be negative for a NUL-terminated string. | ||
| 338 | * | ||
| 339 | * The offset may point to the lead byte of a multi-byte sequence, | ||
| 340 | * in which case the macro will read the whole sequence. | ||
| 341 | * If the offset points to a trail byte or an illegal UTF-8 sequence, then | ||
| 342 | * c is set to a negative value. | ||
| 343 | * | ||
| 344 | * @param s const uint8_t * string | ||
| 345 | * @param i int32_t string offset, must be i<length | ||
| 346 | * @param length int32_t string length | ||
| 347 | * @param c output UChar32 variable, set to <0 in case of an error | ||
| 348 | * @see U8_NEXT_UNSAFE | ||
| 349 | * @stable ICU 2.4 | ||
| 350 | */ | ||
| 351 | #define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL) | ||
| 352 | |||
| 353 | /** | ||
| 354 | * Get a code point from a string at a code point boundary offset, | ||
| 355 | * and advance the offset to the next code point boundary. | ||
| 356 | * (Post-incrementing forward iteration.) | ||
| 357 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 358 | * | ||
| 359 | * The length can be negative for a NUL-terminated string. | ||
| 360 | * | ||
| 361 | * The offset may point to the lead byte of a multi-byte sequence, | ||
| 362 | * in which case the macro will read the whole sequence. | ||
| 363 | * If the offset points to a trail byte or an illegal UTF-8 sequence, then | ||
| 364 | * c is set to U+FFFD. | ||
| 365 | * | ||
| 366 | * This macro does not distinguish between a real U+FFFD in the text | ||
| 367 | * and U+FFFD returned for an ill-formed sequence. | ||
| 368 | * Use U8_NEXT() if that distinction is important. | ||
| 369 | * | ||
| 370 | * @param s const uint8_t * string | ||
| 371 | * @param i int32_t string offset, must be i<length | ||
| 372 | * @param length int32_t string length | ||
| 373 | * @param c output UChar32 variable, set to U+FFFD in case of an error | ||
| 374 | * @see U8_NEXT | ||
| 375 | * @stable ICU 51 | ||
| 376 | */ | ||
| 377 | #define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd) | ||
| 378 | |||
| 379 | /** @internal */ | ||
| 380 | #define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 381 | (c)=(uint8_t)(s)[(i)++]; \ | ||
| 382 | if(!U8_IS_SINGLE(c)) { \ | ||
| 383 | uint8_t __t = 0; \ | ||
| 384 | if((i)!=(length) && \ | ||
| 385 | /* fetch/validate/assemble all but last trail byte */ \ | ||
| 386 | ((c)>=0xe0 ? \ | ||
| 387 | ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \ | ||
| 388 | U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \ | ||
| 389 | (__t&=0x3f, 1) \ | ||
| 390 | : /* U+10000..U+10FFFF */ \ | ||
| 391 | ((c)-=0xf0)<=4 && \ | ||
| 392 | U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \ | ||
| 393 | ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \ | ||
| 394 | (__t=(s)[i]-0x80)<=0x3f) && \ | ||
| 395 | /* valid second-to-last trail byte */ \ | ||
| 396 | ((c)=((c)<<6)|__t, ++(i)!=(length)) \ | ||
| 397 | : /* U+0080..U+07FF */ \ | ||
| 398 | (c)>=0xc2 && ((c)&=0x1f, 1)) && \ | ||
| 399 | /* last trail byte */ \ | ||
| 400 | (__t=(s)[i]-0x80)<=0x3f && \ | ||
| 401 | ((c)=((c)<<6)|__t, ++(i), 1)) { \ | ||
| 402 | } else { \ | ||
| 403 | (c)=(sub); /* ill-formed*/ \ | ||
| 404 | } \ | ||
| 405 | } \ | ||
| 406 | } UPRV_BLOCK_MACRO_END | ||
| 407 | |||
| 408 | /** | ||
| 409 | * Append a code point to a string, overwriting 1 to 4 bytes. | ||
| 410 | * The offset points to the current end of the string contents | ||
| 411 | * and is advanced (post-increment). | ||
| 412 | * "Unsafe" macro, assumes a valid code point and sufficient space in the string. | ||
| 413 | * Otherwise, the result is undefined. | ||
| 414 | * | ||
| 415 | * @param s const uint8_t * string buffer | ||
| 416 | * @param i string offset | ||
| 417 | * @param c code point to append | ||
| 418 | * @see U8_APPEND | ||
| 419 | * @stable ICU 2.4 | ||
| 420 | */ | ||
| 421 | #define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 422 | uint32_t __uc=(c); \ | ||
| 423 | if(__uc<=0x7f) { \ | ||
| 424 | (s)[(i)++]=(uint8_t)__uc; \ | ||
| 425 | } else { \ | ||
| 426 | if(__uc<=0x7ff) { \ | ||
| 427 | (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ | ||
| 428 | } else { \ | ||
| 429 | if(__uc<=0xffff) { \ | ||
| 430 | (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ | ||
| 431 | } else { \ | ||
| 432 | (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ | ||
| 433 | (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ | ||
| 434 | } \ | ||
| 435 | (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ | ||
| 436 | } \ | ||
| 437 | (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ | ||
| 438 | } \ | ||
| 439 | } UPRV_BLOCK_MACRO_END | ||
| 440 | |||
| 441 | /** | ||
| 442 | * Append a code point to a string, overwriting 1 to 4 bytes. | ||
| 443 | * The offset points to the current end of the string contents | ||
| 444 | * and is advanced (post-increment). | ||
| 445 | * "Safe" macro, checks for a valid code point. | ||
| 446 | * If a non-ASCII code point is written, checks for sufficient space in the string. | ||
| 447 | * If the code point is not valid or trail bytes do not fit, | ||
| 448 | * then isError is set to TRUE. | ||
| 449 | * | ||
| 450 | * @param s const uint8_t * string buffer | ||
| 451 | * @param i int32_t string offset, must be i<capacity | ||
| 452 | * @param capacity int32_t size of the string buffer | ||
| 453 | * @param c UChar32 code point to append | ||
| 454 | * @param isError output UBool set to TRUE if an error occurs, otherwise not modified | ||
| 455 | * @see U8_APPEND_UNSAFE | ||
| 456 | * @stable ICU 2.4 | ||
| 457 | */ | ||
| 458 | #define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 459 | uint32_t __uc=(c); \ | ||
| 460 | if(__uc<=0x7f) { \ | ||
| 461 | (s)[(i)++]=(uint8_t)__uc; \ | ||
| 462 | } else if(__uc<=0x7ff && (i)+1<(capacity)) { \ | ||
| 463 | (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ | ||
| 464 | (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ | ||
| 465 | } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \ | ||
| 466 | (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ | ||
| 467 | (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ | ||
| 468 | (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ | ||
| 469 | } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \ | ||
| 470 | (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ | ||
| 471 | (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ | ||
| 472 | (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ | ||
| 473 | (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ | ||
| 474 | } else { \ | ||
| 475 | (isError)=TRUE; \ | ||
| 476 | } \ | ||
| 477 | } UPRV_BLOCK_MACRO_END | ||
| 478 | |||
| 479 | /** | ||
| 480 | * Advance the string offset from one code point boundary to the next. | ||
| 481 | * (Post-incrementing iteration.) | ||
| 482 | * "Unsafe" macro, assumes well-formed UTF-8. | ||
| 483 | * | ||
| 484 | * @param s const uint8_t * string | ||
| 485 | * @param i string offset | ||
| 486 | * @see U8_FWD_1 | ||
| 487 | * @stable ICU 2.4 | ||
| 488 | */ | ||
| 489 | #define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 490 | (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \ | ||
| 491 | } UPRV_BLOCK_MACRO_END | ||
| 492 | |||
| 493 | /** | ||
| 494 | * Advance the string offset from one code point boundary to the next. | ||
| 495 | * (Post-incrementing iteration.) | ||
| 496 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 497 | * | ||
| 498 | * The length can be negative for a NUL-terminated string. | ||
| 499 | * | ||
| 500 | * @param s const uint8_t * string | ||
| 501 | * @param i int32_t string offset, must be i<length | ||
| 502 | * @param length int32_t string length | ||
| 503 | * @see U8_FWD_1_UNSAFE | ||
| 504 | * @stable ICU 2.4 | ||
| 505 | */ | ||
| 506 | #define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 507 | uint8_t __b=(s)[(i)++]; \ | ||
| 508 | if(U8_IS_LEAD(__b) && (i)!=(length)) { \ | ||
| 509 | uint8_t __t1=(s)[i]; \ | ||
| 510 | if((0xe0<=__b && __b<0xf0)) { \ | ||
| 511 | if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \ | ||
| 512 | ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \ | ||
| 513 | ++(i); \ | ||
| 514 | } \ | ||
| 515 | } else if(__b<0xe0) { \ | ||
| 516 | if(U8_IS_TRAIL(__t1)) { \ | ||
| 517 | ++(i); \ | ||
| 518 | } \ | ||
| 519 | } else /* c>=0xf0 */ { \ | ||
| 520 | if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \ | ||
| 521 | ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \ | ||
| 522 | ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \ | ||
| 523 | ++(i); \ | ||
| 524 | } \ | ||
| 525 | } \ | ||
| 526 | } \ | ||
| 527 | } UPRV_BLOCK_MACRO_END | ||
| 528 | |||
| 529 | /** | ||
| 530 | * Advance the string offset from one code point boundary to the n-th next one, | ||
| 531 | * i.e., move forward by n code points. | ||
| 532 | * (Post-incrementing iteration.) | ||
| 533 | * "Unsafe" macro, assumes well-formed UTF-8. | ||
| 534 | * | ||
| 535 | * @param s const uint8_t * string | ||
| 536 | * @param i string offset | ||
| 537 | * @param n number of code points to skip | ||
| 538 | * @see U8_FWD_N | ||
| 539 | * @stable ICU 2.4 | ||
| 540 | */ | ||
| 541 | #define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 542 | int32_t __N=(n); \ | ||
| 543 | while(__N>0) { \ | ||
| 544 | U8_FWD_1_UNSAFE(s, i); \ | ||
| 545 | --__N; \ | ||
| 546 | } \ | ||
| 547 | } UPRV_BLOCK_MACRO_END | ||
| 548 | |||
| 549 | /** | ||
| 550 | * Advance the string offset from one code point boundary to the n-th next one, | ||
| 551 | * i.e., move forward by n code points. | ||
| 552 | * (Post-incrementing iteration.) | ||
| 553 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 554 | * | ||
| 555 | * The length can be negative for a NUL-terminated string. | ||
| 556 | * | ||
| 557 | * @param s const uint8_t * string | ||
| 558 | * @param i int32_t string offset, must be i<length | ||
| 559 | * @param length int32_t string length | ||
| 560 | * @param n number of code points to skip | ||
| 561 | * @see U8_FWD_N_UNSAFE | ||
| 562 | * @stable ICU 2.4 | ||
| 563 | */ | ||
| 564 | #define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 565 | int32_t __N=(n); \ | ||
| 566 | while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ | ||
| 567 | U8_FWD_1(s, i, length); \ | ||
| 568 | --__N; \ | ||
| 569 | } \ | ||
| 570 | } UPRV_BLOCK_MACRO_END | ||
| 571 | |||
| 572 | /** | ||
| 573 | * Adjust a random-access offset to a code point boundary | ||
| 574 | * at the start of a code point. | ||
| 575 | * If the offset points to a UTF-8 trail byte, | ||
| 576 | * then the offset is moved backward to the corresponding lead byte. | ||
| 577 | * Otherwise, it is not modified. | ||
| 578 | * "Unsafe" macro, assumes well-formed UTF-8. | ||
| 579 | * | ||
| 580 | * @param s const uint8_t * string | ||
| 581 | * @param i string offset | ||
| 582 | * @see U8_SET_CP_START | ||
| 583 | * @stable ICU 2.4 | ||
| 584 | */ | ||
| 585 | #define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 586 | while(U8_IS_TRAIL((s)[i])) { --(i); } \ | ||
| 587 | } UPRV_BLOCK_MACRO_END | ||
| 588 | |||
| 589 | /** | ||
| 590 | * Adjust a random-access offset to a code point boundary | ||
| 591 | * at the start of a code point. | ||
| 592 | * If the offset points to a UTF-8 trail byte, | ||
| 593 | * then the offset is moved backward to the corresponding lead byte. | ||
| 594 | * Otherwise, it is not modified. | ||
| 595 | * | ||
| 596 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 597 | * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i]. | ||
| 598 | * | ||
| 599 | * @param s const uint8_t * string | ||
| 600 | * @param start int32_t starting string offset (usually 0) | ||
| 601 | * @param i int32_t string offset, must be start<=i | ||
| 602 | * @see U8_SET_CP_START_UNSAFE | ||
| 603 | * @see U8_TRUNCATE_IF_INCOMPLETE | ||
| 604 | * @stable ICU 2.4 | ||
| 605 | */ | ||
| 606 | #define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 607 | if(U8_IS_TRAIL((s)[(i)])) { \ | ||
| 608 | (i)=utf8_back1SafeBody(s, start, (i)); \ | ||
| 609 | } \ | ||
| 610 | } UPRV_BLOCK_MACRO_END | ||
| 611 | |||
| 612 | /** | ||
| 613 | * If the string ends with a UTF-8 byte sequence that is valid so far | ||
| 614 | * but incomplete, then reduce the length of the string to end before | ||
| 615 | * the lead byte of that incomplete sequence. | ||
| 616 | * For example, if the string ends with E1 80, the length is reduced by 2. | ||
| 617 | * | ||
| 618 | * In all other cases (the string ends with a complete sequence, or it is not | ||
| 619 | * possible for any further trail byte to extend the trailing sequence) | ||
| 620 | * the length remains unchanged. | ||
| 621 | * | ||
| 622 | * Useful for processing text split across multiple buffers | ||
| 623 | * (save the incomplete sequence for later) | ||
| 624 | * and for optimizing iteration | ||
| 625 | * (check for string length only once per character). | ||
| 626 | * | ||
| 627 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 628 | * Unlike U8_SET_CP_START(), this macro never reads s[length]. | ||
| 629 | * | ||
| 630 | * (In UTF-16, simply check for U16_IS_LEAD(last code unit).) | ||
| 631 | * | ||
| 632 | * @param s const uint8_t * string | ||
| 633 | * @param start int32_t starting string offset (usually 0) | ||
| 634 | * @param length int32_t string length (usually start<=length) | ||
| 635 | * @see U8_SET_CP_START | ||
| 636 | * @stable ICU 61 | ||
| 637 | */ | ||
| 638 | #define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 639 | if((length)>(start)) { \ | ||
| 640 | uint8_t __b1=s[(length)-1]; \ | ||
| 641 | if(U8_IS_SINGLE(__b1)) { \ | ||
| 642 | /* common ASCII character */ \ | ||
| 643 | } else if(U8_IS_LEAD(__b1)) { \ | ||
| 644 | --(length); \ | ||
| 645 | } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \ | ||
| 646 | uint8_t __b2=s[(length)-2]; \ | ||
| 647 | if(0xe0<=__b2 && __b2<=0xf4) { \ | ||
| 648 | if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \ | ||
| 649 | U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \ | ||
| 650 | (length)-=2; \ | ||
| 651 | } \ | ||
| 652 | } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \ | ||
| 653 | uint8_t __b3=s[(length)-3]; \ | ||
| 654 | if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \ | ||
| 655 | (length)-=3; \ | ||
| 656 | } \ | ||
| 657 | } \ | ||
| 658 | } \ | ||
| 659 | } \ | ||
| 660 | } UPRV_BLOCK_MACRO_END | ||
| 661 | |||
| 662 | /* definitions with backward iteration -------------------------------------- */ | ||
| 663 | |||
| 664 | /** | ||
| 665 | * Move the string offset from one code point boundary to the previous one | ||
| 666 | * and get the code point between them. | ||
| 667 | * (Pre-decrementing backward iteration.) | ||
| 668 | * "Unsafe" macro, assumes well-formed UTF-8. | ||
| 669 | * | ||
| 670 | * The input offset may be the same as the string length. | ||
| 671 | * If the offset is behind a multi-byte sequence, then the macro will read | ||
| 672 | * the whole sequence. | ||
| 673 | * If the offset is behind a lead byte, then that itself | ||
| 674 | * will be returned as the code point. | ||
| 675 | * The result is undefined if the offset is behind an illegal UTF-8 sequence. | ||
| 676 | * | ||
| 677 | * @param s const uint8_t * string | ||
| 678 | * @param i string offset | ||
| 679 | * @param c output UChar32 variable | ||
| 680 | * @see U8_PREV | ||
| 681 | * @stable ICU 2.4 | ||
| 682 | */ | ||
| 683 | #define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 684 | (c)=(uint8_t)(s)[--(i)]; \ | ||
| 685 | if(U8_IS_TRAIL(c)) { \ | ||
| 686 | uint8_t __b, __count=1, __shift=6; \ | ||
| 687 | \ | ||
| 688 | /* c is a trail byte */ \ | ||
| 689 | (c)&=0x3f; \ | ||
| 690 | for(;;) { \ | ||
| 691 | __b=(s)[--(i)]; \ | ||
| 692 | if(__b>=0xc0) { \ | ||
| 693 | U8_MASK_LEAD_BYTE(__b, __count); \ | ||
| 694 | (c)|=(UChar32)__b<<__shift; \ | ||
| 695 | break; \ | ||
| 696 | } else { \ | ||
| 697 | (c)|=(UChar32)(__b&0x3f)<<__shift; \ | ||
| 698 | ++__count; \ | ||
| 699 | __shift+=6; \ | ||
| 700 | } \ | ||
| 701 | } \ | ||
| 702 | } \ | ||
| 703 | } UPRV_BLOCK_MACRO_END | ||
| 704 | |||
| 705 | /** | ||
| 706 | * Move the string offset from one code point boundary to the previous one | ||
| 707 | * and get the code point between them. | ||
| 708 | * (Pre-decrementing backward iteration.) | ||
| 709 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 710 | * | ||
| 711 | * The input offset may be the same as the string length. | ||
| 712 | * If the offset is behind a multi-byte sequence, then the macro will read | ||
| 713 | * the whole sequence. | ||
| 714 | * If the offset is behind a lead byte, then that itself | ||
| 715 | * will be returned as the code point. | ||
| 716 | * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value. | ||
| 717 | * | ||
| 718 | * @param s const uint8_t * string | ||
| 719 | * @param start int32_t starting string offset (usually 0) | ||
| 720 | * @param i int32_t string offset, must be start<i | ||
| 721 | * @param c output UChar32 variable, set to <0 in case of an error | ||
| 722 | * @see U8_PREV_UNSAFE | ||
| 723 | * @stable ICU 2.4 | ||
| 724 | */ | ||
| 725 | #define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 726 | (c)=(uint8_t)(s)[--(i)]; \ | ||
| 727 | if(!U8_IS_SINGLE(c)) { \ | ||
| 728 | (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \ | ||
| 729 | } \ | ||
| 730 | } UPRV_BLOCK_MACRO_END | ||
| 731 | |||
| 732 | /** | ||
| 733 | * Move the string offset from one code point boundary to the previous one | ||
| 734 | * and get the code point between them. | ||
| 735 | * (Pre-decrementing backward iteration.) | ||
| 736 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 737 | * | ||
| 738 | * The input offset may be the same as the string length. | ||
| 739 | * If the offset is behind a multi-byte sequence, then the macro will read | ||
| 740 | * the whole sequence. | ||
| 741 | * If the offset is behind a lead byte, then that itself | ||
| 742 | * will be returned as the code point. | ||
| 743 | * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD. | ||
| 744 | * | ||
| 745 | * This macro does not distinguish between a real U+FFFD in the text | ||
| 746 | * and U+FFFD returned for an ill-formed sequence. | ||
| 747 | * Use U8_PREV() if that distinction is important. | ||
| 748 | * | ||
| 749 | * @param s const uint8_t * string | ||
| 750 | * @param start int32_t starting string offset (usually 0) | ||
| 751 | * @param i int32_t string offset, must be start<i | ||
| 752 | * @param c output UChar32 variable, set to U+FFFD in case of an error | ||
| 753 | * @see U8_PREV | ||
| 754 | * @stable ICU 51 | ||
| 755 | */ | ||
| 756 | #define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 757 | (c)=(uint8_t)(s)[--(i)]; \ | ||
| 758 | if(!U8_IS_SINGLE(c)) { \ | ||
| 759 | (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \ | ||
| 760 | } \ | ||
| 761 | } UPRV_BLOCK_MACRO_END | ||
| 762 | |||
| 763 | /** | ||
| 764 | * Move the string offset from one code point boundary to the previous one. | ||
| 765 | * (Pre-decrementing backward iteration.) | ||
| 766 | * The input offset may be the same as the string length. | ||
| 767 | * "Unsafe" macro, assumes well-formed UTF-8. | ||
| 768 | * | ||
| 769 | * @param s const uint8_t * string | ||
| 770 | * @param i string offset | ||
| 771 | * @see U8_BACK_1 | ||
| 772 | * @stable ICU 2.4 | ||
| 773 | */ | ||
| 774 | #define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 775 | while(U8_IS_TRAIL((s)[--(i)])) {} \ | ||
| 776 | } UPRV_BLOCK_MACRO_END | ||
| 777 | |||
| 778 | /** | ||
| 779 | * Move the string offset from one code point boundary to the previous one. | ||
| 780 | * (Pre-decrementing backward iteration.) | ||
| 781 | * The input offset may be the same as the string length. | ||
| 782 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 783 | * | ||
| 784 | * @param s const uint8_t * string | ||
| 785 | * @param start int32_t starting string offset (usually 0) | ||
| 786 | * @param i int32_t string offset, must be start<i | ||
| 787 | * @see U8_BACK_1_UNSAFE | ||
| 788 | * @stable ICU 2.4 | ||
| 789 | */ | ||
| 790 | #define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 791 | if(U8_IS_TRAIL((s)[--(i)])) { \ | ||
| 792 | (i)=utf8_back1SafeBody(s, start, (i)); \ | ||
| 793 | } \ | ||
| 794 | } UPRV_BLOCK_MACRO_END | ||
| 795 | |||
| 796 | /** | ||
| 797 | * Move the string offset from one code point boundary to the n-th one before it, | ||
| 798 | * i.e., move backward by n code points. | ||
| 799 | * (Pre-decrementing backward iteration.) | ||
| 800 | * The input offset may be the same as the string length. | ||
| 801 | * "Unsafe" macro, assumes well-formed UTF-8. | ||
| 802 | * | ||
| 803 | * @param s const uint8_t * string | ||
| 804 | * @param i string offset | ||
| 805 | * @param n number of code points to skip | ||
| 806 | * @see U8_BACK_N | ||
| 807 | * @stable ICU 2.4 | ||
| 808 | */ | ||
| 809 | #define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 810 | int32_t __N=(n); \ | ||
| 811 | while(__N>0) { \ | ||
| 812 | U8_BACK_1_UNSAFE(s, i); \ | ||
| 813 | --__N; \ | ||
| 814 | } \ | ||
| 815 | } UPRV_BLOCK_MACRO_END | ||
| 816 | |||
| 817 | /** | ||
| 818 | * Move the string offset from one code point boundary to the n-th one before it, | ||
| 819 | * i.e., move backward by n code points. | ||
| 820 | * (Pre-decrementing backward iteration.) | ||
| 821 | * The input offset may be the same as the string length. | ||
| 822 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 823 | * | ||
| 824 | * @param s const uint8_t * string | ||
| 825 | * @param start int32_t index of the start of the string | ||
| 826 | * @param i int32_t string offset, must be start<i | ||
| 827 | * @param n number of code points to skip | ||
| 828 | * @see U8_BACK_N_UNSAFE | ||
| 829 | * @stable ICU 2.4 | ||
| 830 | */ | ||
| 831 | #define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 832 | int32_t __N=(n); \ | ||
| 833 | while(__N>0 && (i)>(start)) { \ | ||
| 834 | U8_BACK_1(s, start, i); \ | ||
| 835 | --__N; \ | ||
| 836 | } \ | ||
| 837 | } UPRV_BLOCK_MACRO_END | ||
| 838 | |||
| 839 | /** | ||
| 840 | * Adjust a random-access offset to a code point boundary after a code point. | ||
| 841 | * If the offset is behind a partial multi-byte sequence, | ||
| 842 | * then the offset is incremented to behind the whole sequence. | ||
| 843 | * Otherwise, it is not modified. | ||
| 844 | * The input offset may be the same as the string length. | ||
| 845 | * "Unsafe" macro, assumes well-formed UTF-8. | ||
| 846 | * | ||
| 847 | * @param s const uint8_t * string | ||
| 848 | * @param i string offset | ||
| 849 | * @see U8_SET_CP_LIMIT | ||
| 850 | * @stable ICU 2.4 | ||
| 851 | */ | ||
| 852 | #define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 853 | U8_BACK_1_UNSAFE(s, i); \ | ||
| 854 | U8_FWD_1_UNSAFE(s, i); \ | ||
| 855 | } UPRV_BLOCK_MACRO_END | ||
| 856 | |||
| 857 | /** | ||
| 858 | * Adjust a random-access offset to a code point boundary after a code point. | ||
| 859 | * If the offset is behind a partial multi-byte sequence, | ||
| 860 | * then the offset is incremented to behind the whole sequence. | ||
| 861 | * Otherwise, it is not modified. | ||
| 862 | * The input offset may be the same as the string length. | ||
| 863 | * "Safe" macro, checks for illegal sequences and for string boundaries. | ||
| 864 | * | ||
| 865 | * The length can be negative for a NUL-terminated string. | ||
| 866 | * | ||
| 867 | * @param s const uint8_t * string | ||
| 868 | * @param start int32_t starting string offset (usually 0) | ||
| 869 | * @param i int32_t string offset, must be start<=i<=length | ||
| 870 | * @param length int32_t string length | ||
| 871 | * @see U8_SET_CP_LIMIT_UNSAFE | ||
| 872 | * @stable ICU 2.4 | ||
| 873 | */ | ||
| 874 | #define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ | ||
| 875 | if((start)<(i) && ((i)<(length) || (length)<0)) { \ | ||
| 876 | U8_BACK_1(s, start, i); \ | ||
| 877 | U8_FWD_1(s, i, length); \ | ||
| 878 | } \ | ||
| 879 | } UPRV_BLOCK_MACRO_END | ||
| 880 | |||
| 881 | #endif | ||
