diff options
Diffstat (limited to 'vendor/github.com/mitjafelicijan/go-tree-sitter/query.c')
| -rw-r--r-- | vendor/github.com/mitjafelicijan/go-tree-sitter/query.c | 4134 |
1 files changed, 4134 insertions, 0 deletions
diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/query.c b/vendor/github.com/mitjafelicijan/go-tree-sitter/query.c new file mode 100644 index 0000000..1b6e04b --- /dev/null +++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/query.c | |||
| @@ -0,0 +1,4134 @@ | |||
| 1 | #include "api.h" | ||
| 2 | #include "./alloc.h" | ||
| 3 | #include "./array.h" | ||
| 4 | #include "./language.h" | ||
| 5 | #include "./point.h" | ||
| 6 | #include "./tree_cursor.h" | ||
| 7 | #include "./unicode.h" | ||
| 8 | #include <wctype.h> | ||
| 9 | |||
| 10 | // #define DEBUG_ANALYZE_QUERY | ||
| 11 | // #define DEBUG_EXECUTE_QUERY | ||
| 12 | |||
| 13 | #define MAX_STEP_CAPTURE_COUNT 3 | ||
| 14 | #define MAX_NEGATED_FIELD_COUNT 8 | ||
| 15 | #define MAX_STATE_PREDECESSOR_COUNT 256 | ||
| 16 | #define MAX_ANALYSIS_STATE_DEPTH 8 | ||
| 17 | #define MAX_ANALYSIS_ITERATION_COUNT 256 | ||
| 18 | |||
| 19 | /* | ||
| 20 | * Stream - A sequence of unicode characters derived from a UTF8 string. | ||
| 21 | * This struct is used in parsing queries from S-expressions. | ||
| 22 | */ | ||
| 23 | typedef struct { | ||
| 24 | const char *input; | ||
| 25 | const char *start; | ||
| 26 | const char *end; | ||
| 27 | int32_t next; | ||
| 28 | uint8_t next_size; | ||
| 29 | } Stream; | ||
| 30 | |||
| 31 | /* | ||
| 32 | * QueryStep - A step in the process of matching a query. Each node within | ||
| 33 | * a query S-expression corresponds to one of these steps. An entire pattern | ||
| 34 | * is represented as a sequence of these steps. The basic properties of a | ||
| 35 | * node are represented by these fields: | ||
| 36 | * - `symbol` - The grammar symbol to match. A zero value represents the | ||
| 37 | * wildcard symbol, '_'. | ||
| 38 | * - `field` - The field name to match. A zero value means that a field name | ||
| 39 | * was not specified. | ||
| 40 | * - `capture_ids` - An array of integers representing the names of captures | ||
| 41 | * associated with this node in the pattern, terminated by a `NONE` value. | ||
| 42 | * - `depth` - The depth where this node occurs in the pattern. The root node | ||
| 43 | * of the pattern has depth zero. | ||
| 44 | * - `negated_field_list_id` - An id representing a set of fields that must | ||
| 45 | * not be present on a node matching this step. | ||
| 46 | * | ||
| 47 | * Steps have some additional fields in order to handle the `.` (or "anchor") operator, | ||
| 48 | * which forbids additional child nodes: | ||
| 49 | * - `is_immediate` - Indicates that the node matching this step cannot be preceded | ||
| 50 | * by other sibling nodes that weren't specified in the pattern. | ||
| 51 | * - `is_last_child` - Indicates that the node matching this step cannot have any | ||
| 52 | * subsequent named siblings. | ||
| 53 | * | ||
| 54 | * For simple patterns, steps are matched in sequential order. But in order to | ||
| 55 | * handle alternative/repeated/optional sub-patterns, query steps are not always | ||
| 56 | * structured as a linear sequence; they sometimes need to split and merge. This | ||
| 57 | * is done using the following fields: | ||
| 58 | * - `alternative_index` - The index of a different query step that serves as | ||
| 59 | * an alternative to this step. A `NONE` value represents no alternative. | ||
| 60 | * When a query state reaches a step with an alternative index, the state | ||
| 61 | * is duplicated, with one copy remaining at the original step, and one copy | ||
| 62 | * moving to the alternative step. The alternative may have its own alternative | ||
| 63 | * step, so this splitting is an iterative process. | ||
| 64 | * - `is_dead_end` - Indicates that this state cannot be passed directly, and | ||
| 65 | * exists only in order to redirect to an alternative index, with no splitting. | ||
| 66 | * - `is_pass_through` - Indicates that state has no matching logic of its own, | ||
| 67 | * and exists only to split a state. One copy of the state advances immediately | ||
| 68 | * to the next step, and one moves to the alternative step. | ||
| 69 | * - `alternative_is_immediate` - Indicates that this step's alternative step | ||
| 70 | * should be treated as if `is_immediate` is true. | ||
| 71 | * | ||
| 72 | * Steps also store some derived state that summarizes how they relate to other | ||
| 73 | * steps within the same pattern. This is used to optimize the matching process: | ||
| 74 | * - `contains_captures` - Indicates that this step or one of its child steps | ||
| 75 | * has a non-empty `capture_ids` list. | ||
| 76 | * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then | ||
| 77 | * it and all of its subsequent sibling steps within the same parent pattern | ||
| 78 | * are guaranteed to match. | ||
| 79 | * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but | ||
| 80 | * for the entire top-level pattern. When iterating through a query's | ||
| 81 | * captures using `ts_query_cursor_next_capture`, this field is used to | ||
| 82 | * detect that a capture can safely be returned from a match that has not | ||
| 83 | * even completed yet. | ||
| 84 | */ | ||
| 85 | typedef struct { | ||
| 86 | TSSymbol symbol; | ||
| 87 | TSSymbol supertype_symbol; | ||
| 88 | TSFieldId field; | ||
| 89 | uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; | ||
| 90 | uint16_t depth; | ||
| 91 | uint16_t alternative_index; | ||
| 92 | uint16_t negated_field_list_id; | ||
| 93 | bool is_named: 1; | ||
| 94 | bool is_immediate: 1; | ||
| 95 | bool is_last_child: 1; | ||
| 96 | bool is_pass_through: 1; | ||
| 97 | bool is_dead_end: 1; | ||
| 98 | bool alternative_is_immediate: 1; | ||
| 99 | bool contains_captures: 1; | ||
| 100 | bool root_pattern_guaranteed: 1; | ||
| 101 | bool parent_pattern_guaranteed: 1; | ||
| 102 | } QueryStep; | ||
| 103 | |||
| 104 | /* | ||
| 105 | * Slice - A slice of an external array. Within a query, capture names, | ||
| 106 | * literal string values, and predicate step information are stored in three | ||
| 107 | * contiguous arrays. Individual captures, string values, and predicates are | ||
| 108 | * represented as slices of these three arrays. | ||
| 109 | */ | ||
| 110 | typedef struct { | ||
| 111 | uint32_t offset; | ||
| 112 | uint32_t length; | ||
| 113 | } Slice; | ||
| 114 | |||
| 115 | /* | ||
| 116 | * SymbolTable - a two-way mapping of strings to ids. | ||
| 117 | */ | ||
| 118 | typedef struct { | ||
| 119 | Array(char) characters; | ||
| 120 | Array(Slice) slices; | ||
| 121 | } SymbolTable; | ||
| 122 | |||
| 123 | /** | ||
| 124 | * CaptureQuantififers - a data structure holding the quantifiers of pattern captures. | ||
| 125 | */ | ||
| 126 | typedef Array(uint8_t) CaptureQuantifiers; | ||
| 127 | |||
| 128 | /* | ||
| 129 | * PatternEntry - Information about the starting point for matching a particular | ||
| 130 | * pattern. These entries are stored in a 'pattern map' - a sorted array that | ||
| 131 | * makes it possible to efficiently lookup patterns based on the symbol for their | ||
| 132 | * first step. The entry consists of the following fields: | ||
| 133 | * - `pattern_index` - the index of the pattern within the query | ||
| 134 | * - `step_index` - the index of the pattern's first step in the shared `steps` array | ||
| 135 | * - `is_rooted` - whether or not the pattern has a single root node. This property | ||
| 136 | * affects decisions about whether or not to start the pattern for nodes outside | ||
| 137 | * of a QueryCursor's range restriction. | ||
| 138 | */ | ||
| 139 | typedef struct { | ||
| 140 | uint16_t step_index; | ||
| 141 | uint16_t pattern_index; | ||
| 142 | bool is_rooted; | ||
| 143 | } PatternEntry; | ||
| 144 | |||
| 145 | typedef struct { | ||
| 146 | Slice steps; | ||
| 147 | Slice predicate_steps; | ||
| 148 | uint32_t start_byte; | ||
| 149 | bool is_non_local; | ||
| 150 | } QueryPattern; | ||
| 151 | |||
| 152 | typedef struct { | ||
| 153 | uint32_t byte_offset; | ||
| 154 | uint16_t step_index; | ||
| 155 | } StepOffset; | ||
| 156 | |||
| 157 | /* | ||
| 158 | * QueryState - The state of an in-progress match of a particular pattern | ||
| 159 | * in a query. While executing, a `TSQueryCursor` must keep track of a number | ||
| 160 | * of possible in-progress matches. Each of those possible matches is | ||
| 161 | * represented as one of these states. Fields: | ||
| 162 | * - `id` - A numeric id that is exposed to the public API. This allows the | ||
| 163 | * caller to remove a given match, preventing any more of its captures | ||
| 164 | * from being returned. | ||
| 165 | * - `start_depth` - The depth in the tree where the first step of the state's | ||
| 166 | * pattern was matched. | ||
| 167 | * - `pattern_index` - The pattern that the state is matching. | ||
| 168 | * - `consumed_capture_count` - The number of captures from this match that | ||
| 169 | * have already been returned. | ||
| 170 | * - `capture_list_id` - A numeric id that can be used to retrieve the state's | ||
| 171 | * list of captures from the `CaptureListPool`. | ||
| 172 | * - `seeking_immediate_match` - A flag that indicates that the state's next | ||
| 173 | * step must be matched by the very next sibling. This is used when | ||
| 174 | * processing repetitions. | ||
| 175 | * - `has_in_progress_alternatives` - A flag that indicates that there is are | ||
| 176 | * other states that have the same captures as this state, but are at | ||
| 177 | * different steps in their pattern. This means that in order to obey the | ||
| 178 | * 'longest-match' rule, this state should not be returned as a match until | ||
| 179 | * it is clear that there can be no other alternative match with more captures. | ||
| 180 | */ | ||
| 181 | typedef struct { | ||
| 182 | uint32_t id; | ||
| 183 | uint32_t capture_list_id; | ||
| 184 | uint16_t start_depth; | ||
| 185 | uint16_t step_index; | ||
| 186 | uint16_t pattern_index; | ||
| 187 | uint16_t consumed_capture_count: 12; | ||
| 188 | bool seeking_immediate_match: 1; | ||
| 189 | bool has_in_progress_alternatives: 1; | ||
| 190 | bool dead: 1; | ||
| 191 | bool needs_parent: 1; | ||
| 192 | } QueryState; | ||
| 193 | |||
| 194 | typedef Array(TSQueryCapture) CaptureList; | ||
| 195 | |||
| 196 | /* | ||
| 197 | * CaptureListPool - A collection of *lists* of captures. Each query state needs | ||
| 198 | * to maintain its own list of captures. To avoid repeated allocations, this struct | ||
| 199 | * maintains a fixed set of capture lists, and keeps track of which ones are | ||
| 200 | * currently in use by a query state. | ||
| 201 | */ | ||
| 202 | typedef struct { | ||
| 203 | Array(CaptureList) list; | ||
| 204 | CaptureList empty_list; | ||
| 205 | // The maximum number of capture lists that we are allowed to allocate. We | ||
| 206 | // never allow `list` to allocate more entries than this, dropping pending | ||
| 207 | // matches if needed to stay under the limit. | ||
| 208 | uint32_t max_capture_list_count; | ||
| 209 | // The number of capture lists allocated in `list` that are not currently in | ||
| 210 | // use. We reuse those existing-but-unused capture lists before trying to | ||
| 211 | // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture | ||
| 212 | // list's length to indicate that it's not in use. | ||
| 213 | uint32_t free_capture_list_count; | ||
| 214 | } CaptureListPool; | ||
| 215 | |||
| 216 | /* | ||
| 217 | * AnalysisState - The state needed for walking the parse table when analyzing | ||
| 218 | * a query pattern, to determine at which steps the pattern might fail to match. | ||
| 219 | */ | ||
| 220 | typedef struct { | ||
| 221 | TSStateId parse_state; | ||
| 222 | TSSymbol parent_symbol; | ||
| 223 | uint16_t child_index; | ||
| 224 | TSFieldId field_id: 15; | ||
| 225 | bool done: 1; | ||
| 226 | } AnalysisStateEntry; | ||
| 227 | |||
| 228 | typedef struct { | ||
| 229 | AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; | ||
| 230 | uint16_t depth; | ||
| 231 | uint16_t step_index; | ||
| 232 | TSSymbol root_symbol; | ||
| 233 | } AnalysisState; | ||
| 234 | |||
| 235 | typedef Array(AnalysisState *) AnalysisStateSet; | ||
| 236 | |||
| 237 | typedef struct { | ||
| 238 | AnalysisStateSet states; | ||
| 239 | AnalysisStateSet next_states; | ||
| 240 | AnalysisStateSet deeper_states; | ||
| 241 | AnalysisStateSet state_pool; | ||
| 242 | Array(uint16_t) final_step_indices; | ||
| 243 | Array(TSSymbol) finished_parent_symbols; | ||
| 244 | bool did_abort; | ||
| 245 | } QueryAnalysis; | ||
| 246 | |||
| 247 | /* | ||
| 248 | * AnalysisSubgraph - A subset of the states in the parse table that are used | ||
| 249 | * in constructing nodes with a certain symbol. Each state is accompanied by | ||
| 250 | * some information about the possible node that could be produced in | ||
| 251 | * downstream states. | ||
| 252 | */ | ||
| 253 | typedef struct { | ||
| 254 | TSStateId state; | ||
| 255 | uint16_t production_id; | ||
| 256 | uint8_t child_index: 7; | ||
| 257 | bool done: 1; | ||
| 258 | } AnalysisSubgraphNode; | ||
| 259 | |||
| 260 | typedef struct { | ||
| 261 | TSSymbol symbol; | ||
| 262 | Array(TSStateId) start_states; | ||
| 263 | Array(AnalysisSubgraphNode) nodes; | ||
| 264 | } AnalysisSubgraph; | ||
| 265 | |||
| 266 | typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; | ||
| 267 | |||
| 268 | /* | ||
| 269 | * StatePredecessorMap - A map that stores the predecessors of each parse state. | ||
| 270 | * This is used during query analysis to determine which parse states can lead | ||
| 271 | * to which reduce actions. | ||
| 272 | */ | ||
| 273 | typedef struct { | ||
| 274 | TSStateId *contents; | ||
| 275 | } StatePredecessorMap; | ||
| 276 | |||
| 277 | /* | ||
| 278 | * TSQuery - A tree query, compiled from a string of S-expressions. The query | ||
| 279 | * itself is immutable. The mutable state used in the process of executing the | ||
| 280 | * query is stored in a `TSQueryCursor`. | ||
| 281 | */ | ||
| 282 | struct TSQuery { | ||
| 283 | SymbolTable captures; | ||
| 284 | SymbolTable predicate_values; | ||
| 285 | Array(CaptureQuantifiers) capture_quantifiers; | ||
| 286 | Array(QueryStep) steps; | ||
| 287 | Array(PatternEntry) pattern_map; | ||
| 288 | Array(TSQueryPredicateStep) predicate_steps; | ||
| 289 | Array(QueryPattern) patterns; | ||
| 290 | Array(StepOffset) step_offsets; | ||
| 291 | Array(TSFieldId) negated_fields; | ||
| 292 | Array(char) string_buffer; | ||
| 293 | Array(TSSymbol) repeat_symbols_with_rootless_patterns; | ||
| 294 | const TSLanguage *language; | ||
| 295 | uint16_t wildcard_root_pattern_count; | ||
| 296 | }; | ||
| 297 | |||
| 298 | /* | ||
| 299 | * TSQueryCursor - A stateful struct used to execute a query on a tree. | ||
| 300 | */ | ||
| 301 | struct TSQueryCursor { | ||
| 302 | const TSQuery *query; | ||
| 303 | TSTreeCursor cursor; | ||
| 304 | Array(QueryState) states; | ||
| 305 | Array(QueryState) finished_states; | ||
| 306 | CaptureListPool capture_list_pool; | ||
| 307 | uint32_t depth; | ||
| 308 | uint32_t max_start_depth; | ||
| 309 | uint32_t start_byte; | ||
| 310 | uint32_t end_byte; | ||
| 311 | TSPoint start_point; | ||
| 312 | TSPoint end_point; | ||
| 313 | uint32_t next_state_id; | ||
| 314 | bool on_visible_node; | ||
| 315 | bool ascending; | ||
| 316 | bool halted; | ||
| 317 | bool did_exceed_match_limit; | ||
| 318 | }; | ||
| 319 | |||
| 320 | static const TSQueryError PARENT_DONE = -1; | ||
| 321 | static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; | ||
| 322 | static const uint16_t NONE = UINT16_MAX; | ||
| 323 | static const TSSymbol WILDCARD_SYMBOL = 0; | ||
| 324 | |||
| 325 | /********** | ||
| 326 | * Stream | ||
| 327 | **********/ | ||
| 328 | |||
| 329 | // Advance to the next unicode code point in the stream. | ||
| 330 | static bool stream_advance(Stream *self) { | ||
| 331 | self->input += self->next_size; | ||
| 332 | if (self->input < self->end) { | ||
| 333 | uint32_t size = ts_decode_utf8( | ||
| 334 | (const uint8_t *)self->input, | ||
| 335 | (uint32_t)(self->end - self->input), | ||
| 336 | &self->next | ||
| 337 | ); | ||
| 338 | if (size > 0) { | ||
| 339 | self->next_size = size; | ||
| 340 | return true; | ||
| 341 | } | ||
| 342 | } else { | ||
| 343 | self->next_size = 0; | ||
| 344 | self->next = '\0'; | ||
| 345 | } | ||
| 346 | return false; | ||
| 347 | } | ||
| 348 | |||
| 349 | // Reset the stream to the given input position, represented as a pointer | ||
| 350 | // into the input string. | ||
| 351 | static void stream_reset(Stream *self, const char *input) { | ||
| 352 | self->input = input; | ||
| 353 | self->next_size = 0; | ||
| 354 | stream_advance(self); | ||
| 355 | } | ||
| 356 | |||
| 357 | static Stream stream_new(const char *string, uint32_t length) { | ||
| 358 | Stream self = { | ||
| 359 | .next = 0, | ||
| 360 | .input = string, | ||
| 361 | .start = string, | ||
| 362 | .end = string + length, | ||
| 363 | }; | ||
| 364 | stream_advance(&self); | ||
| 365 | return self; | ||
| 366 | } | ||
| 367 | |||
| 368 | static void stream_skip_whitespace(Stream *self) { | ||
| 369 | for (;;) { | ||
| 370 | if (iswspace(self->next)) { | ||
| 371 | stream_advance(self); | ||
| 372 | } else if (self->next == ';') { | ||
| 373 | // skip over comments | ||
| 374 | stream_advance(self); | ||
| 375 | while (self->next && self->next != '\n') { | ||
| 376 | if (!stream_advance(self)) break; | ||
| 377 | } | ||
| 378 | } else { | ||
| 379 | break; | ||
| 380 | } | ||
| 381 | } | ||
| 382 | } | ||
| 383 | |||
| 384 | static bool stream_is_ident_start(Stream *self) { | ||
| 385 | return iswalnum(self->next) || self->next == '_' || self->next == '-'; | ||
| 386 | } | ||
| 387 | |||
| 388 | static void stream_scan_identifier(Stream *stream) { | ||
| 389 | do { | ||
| 390 | stream_advance(stream); | ||
| 391 | } while ( | ||
| 392 | iswalnum(stream->next) || | ||
| 393 | stream->next == '_' || | ||
| 394 | stream->next == '-' || | ||
| 395 | stream->next == '.' || | ||
| 396 | stream->next == '?' || | ||
| 397 | stream->next == '!' | ||
| 398 | ); | ||
| 399 | } | ||
| 400 | |||
| 401 | static uint32_t stream_offset(Stream *self) { | ||
| 402 | return (uint32_t)(self->input - self->start); | ||
| 403 | } | ||
| 404 | |||
| 405 | /****************** | ||
| 406 | * CaptureListPool | ||
| 407 | ******************/ | ||
| 408 | |||
| 409 | static CaptureListPool capture_list_pool_new(void) { | ||
| 410 | return (CaptureListPool) { | ||
| 411 | .list = array_new(), | ||
| 412 | .empty_list = array_new(), | ||
| 413 | .max_capture_list_count = UINT32_MAX, | ||
| 414 | .free_capture_list_count = 0, | ||
| 415 | }; | ||
| 416 | } | ||
| 417 | |||
| 418 | static void capture_list_pool_reset(CaptureListPool *self) { | ||
| 419 | for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { | ||
| 420 | // This invalid size means that the list is not in use. | ||
| 421 | self->list.contents[i].size = UINT32_MAX; | ||
| 422 | } | ||
| 423 | self->free_capture_list_count = self->list.size; | ||
| 424 | } | ||
| 425 | |||
| 426 | static void capture_list_pool_delete(CaptureListPool *self) { | ||
| 427 | for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { | ||
| 428 | array_delete(&self->list.contents[i]); | ||
| 429 | } | ||
| 430 | array_delete(&self->list); | ||
| 431 | } | ||
| 432 | |||
| 433 | static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { | ||
| 434 | if (id >= self->list.size) return &self->empty_list; | ||
| 435 | return &self->list.contents[id]; | ||
| 436 | } | ||
| 437 | |||
| 438 | static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { | ||
| 439 | assert(id < self->list.size); | ||
| 440 | return &self->list.contents[id]; | ||
| 441 | } | ||
| 442 | |||
| 443 | static bool capture_list_pool_is_empty(const CaptureListPool *self) { | ||
| 444 | // The capture list pool is empty if all allocated lists are in use, and we | ||
| 445 | // have reached the maximum allowed number of allocated lists. | ||
| 446 | return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; | ||
| 447 | } | ||
| 448 | |||
| 449 | static uint16_t capture_list_pool_acquire(CaptureListPool *self) { | ||
| 450 | // First see if any already allocated capture list is currently unused. | ||
| 451 | if (self->free_capture_list_count > 0) { | ||
| 452 | for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { | ||
| 453 | if (self->list.contents[i].size == UINT32_MAX) { | ||
| 454 | array_clear(&self->list.contents[i]); | ||
| 455 | self->free_capture_list_count--; | ||
| 456 | return i; | ||
| 457 | } | ||
| 458 | } | ||
| 459 | } | ||
| 460 | |||
| 461 | // Otherwise allocate and initialize a new capture list, as long as that | ||
| 462 | // doesn't put us over the requested maximum. | ||
| 463 | uint32_t i = self->list.size; | ||
| 464 | if (i >= self->max_capture_list_count) { | ||
| 465 | return NONE; | ||
| 466 | } | ||
| 467 | CaptureList list; | ||
| 468 | array_init(&list); | ||
| 469 | array_push(&self->list, list); | ||
| 470 | return i; | ||
| 471 | } | ||
| 472 | |||
| 473 | static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { | ||
| 474 | if (id >= self->list.size) return; | ||
| 475 | self->list.contents[id].size = UINT32_MAX; | ||
| 476 | self->free_capture_list_count++; | ||
| 477 | } | ||
| 478 | |||
| 479 | /************** | ||
| 480 | * Quantifiers | ||
| 481 | **************/ | ||
| 482 | |||
| 483 | static TSQuantifier quantifier_mul( | ||
| 484 | TSQuantifier left, | ||
| 485 | TSQuantifier right | ||
| 486 | ) { | ||
| 487 | switch (left) | ||
| 488 | { | ||
| 489 | case TSQuantifierZero: | ||
| 490 | return TSQuantifierZero; | ||
| 491 | case TSQuantifierZeroOrOne: | ||
| 492 | switch (right) { | ||
| 493 | case TSQuantifierZero: | ||
| 494 | return TSQuantifierZero; | ||
| 495 | case TSQuantifierZeroOrOne: | ||
| 496 | case TSQuantifierOne: | ||
| 497 | return TSQuantifierZeroOrOne; | ||
| 498 | case TSQuantifierZeroOrMore: | ||
| 499 | case TSQuantifierOneOrMore: | ||
| 500 | return TSQuantifierZeroOrMore; | ||
| 501 | }; | ||
| 502 | break; | ||
| 503 | case TSQuantifierZeroOrMore: | ||
| 504 | switch (right) { | ||
| 505 | case TSQuantifierZero: | ||
| 506 | return TSQuantifierZero; | ||
| 507 | case TSQuantifierZeroOrOne: | ||
| 508 | case TSQuantifierZeroOrMore: | ||
| 509 | case TSQuantifierOne: | ||
| 510 | case TSQuantifierOneOrMore: | ||
| 511 | return TSQuantifierZeroOrMore; | ||
| 512 | }; | ||
| 513 | break; | ||
| 514 | case TSQuantifierOne: | ||
| 515 | return right; | ||
| 516 | case TSQuantifierOneOrMore: | ||
| 517 | switch (right) { | ||
| 518 | case TSQuantifierZero: | ||
| 519 | return TSQuantifierZero; | ||
| 520 | case TSQuantifierZeroOrOne: | ||
| 521 | case TSQuantifierZeroOrMore: | ||
| 522 | return TSQuantifierZeroOrMore; | ||
| 523 | case TSQuantifierOne: | ||
| 524 | case TSQuantifierOneOrMore: | ||
| 525 | return TSQuantifierOneOrMore; | ||
| 526 | }; | ||
| 527 | break; | ||
| 528 | } | ||
| 529 | return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! | ||
| 530 | } | ||
| 531 | |||
| 532 | static TSQuantifier quantifier_join( | ||
| 533 | TSQuantifier left, | ||
| 534 | TSQuantifier right | ||
| 535 | ) { | ||
| 536 | switch (left) | ||
| 537 | { | ||
| 538 | case TSQuantifierZero: | ||
| 539 | switch (right) { | ||
| 540 | case TSQuantifierZero: | ||
| 541 | return TSQuantifierZero; | ||
| 542 | case TSQuantifierZeroOrOne: | ||
| 543 | case TSQuantifierOne: | ||
| 544 | return TSQuantifierZeroOrOne; | ||
| 545 | case TSQuantifierZeroOrMore: | ||
| 546 | case TSQuantifierOneOrMore: | ||
| 547 | return TSQuantifierZeroOrMore; | ||
| 548 | }; | ||
| 549 | break; | ||
| 550 | case TSQuantifierZeroOrOne: | ||
| 551 | switch (right) { | ||
| 552 | case TSQuantifierZero: | ||
| 553 | case TSQuantifierZeroOrOne: | ||
| 554 | case TSQuantifierOne: | ||
| 555 | return TSQuantifierZeroOrOne; | ||
| 556 | break; | ||
| 557 | case TSQuantifierZeroOrMore: | ||
| 558 | case TSQuantifierOneOrMore: | ||
| 559 | return TSQuantifierZeroOrMore; | ||
| 560 | break; | ||
| 561 | }; | ||
| 562 | break; | ||
| 563 | case TSQuantifierZeroOrMore: | ||
| 564 | return TSQuantifierZeroOrMore; | ||
| 565 | case TSQuantifierOne: | ||
| 566 | switch (right) { | ||
| 567 | case TSQuantifierZero: | ||
| 568 | case TSQuantifierZeroOrOne: | ||
| 569 | return TSQuantifierZeroOrOne; | ||
| 570 | case TSQuantifierZeroOrMore: | ||
| 571 | return TSQuantifierZeroOrMore; | ||
| 572 | case TSQuantifierOne: | ||
| 573 | return TSQuantifierOne; | ||
| 574 | case TSQuantifierOneOrMore: | ||
| 575 | return TSQuantifierOneOrMore; | ||
| 576 | }; | ||
| 577 | break; | ||
| 578 | case TSQuantifierOneOrMore: | ||
| 579 | switch (right) { | ||
| 580 | case TSQuantifierZero: | ||
| 581 | case TSQuantifierZeroOrOne: | ||
| 582 | case TSQuantifierZeroOrMore: | ||
| 583 | return TSQuantifierZeroOrMore; | ||
| 584 | case TSQuantifierOne: | ||
| 585 | case TSQuantifierOneOrMore: | ||
| 586 | return TSQuantifierOneOrMore; | ||
| 587 | }; | ||
| 588 | break; | ||
| 589 | } | ||
| 590 | return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! | ||
| 591 | } | ||
| 592 | |||
| 593 | static TSQuantifier quantifier_add( | ||
| 594 | TSQuantifier left, | ||
| 595 | TSQuantifier right | ||
| 596 | ) { | ||
| 597 | switch (left) | ||
| 598 | { | ||
| 599 | case TSQuantifierZero: | ||
| 600 | return right; | ||
| 601 | case TSQuantifierZeroOrOne: | ||
| 602 | switch (right) { | ||
| 603 | case TSQuantifierZero: | ||
| 604 | return TSQuantifierZeroOrOne; | ||
| 605 | case TSQuantifierZeroOrOne: | ||
| 606 | case TSQuantifierZeroOrMore: | ||
| 607 | return TSQuantifierZeroOrMore; | ||
| 608 | case TSQuantifierOne: | ||
| 609 | case TSQuantifierOneOrMore: | ||
| 610 | return TSQuantifierOneOrMore; | ||
| 611 | }; | ||
| 612 | break; | ||
| 613 | case TSQuantifierZeroOrMore: | ||
| 614 | switch (right) { | ||
| 615 | case TSQuantifierZero: | ||
| 616 | return TSQuantifierZeroOrMore; | ||
| 617 | case TSQuantifierZeroOrOne: | ||
| 618 | case TSQuantifierZeroOrMore: | ||
| 619 | return TSQuantifierZeroOrMore; | ||
| 620 | case TSQuantifierOne: | ||
| 621 | case TSQuantifierOneOrMore: | ||
| 622 | return TSQuantifierOneOrMore; | ||
| 623 | }; | ||
| 624 | break; | ||
| 625 | case TSQuantifierOne: | ||
| 626 | switch (right) { | ||
| 627 | case TSQuantifierZero: | ||
| 628 | return TSQuantifierOne; | ||
| 629 | case TSQuantifierZeroOrOne: | ||
| 630 | case TSQuantifierZeroOrMore: | ||
| 631 | case TSQuantifierOne: | ||
| 632 | case TSQuantifierOneOrMore: | ||
| 633 | return TSQuantifierOneOrMore; | ||
| 634 | }; | ||
| 635 | break; | ||
| 636 | case TSQuantifierOneOrMore: | ||
| 637 | return TSQuantifierOneOrMore; | ||
| 638 | } | ||
| 639 | return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! | ||
| 640 | } | ||
| 641 | |||
| 642 | // Create new capture quantifiers structure | ||
| 643 | static CaptureQuantifiers capture_quantifiers_new(void) { | ||
| 644 | return (CaptureQuantifiers) array_new(); | ||
| 645 | } | ||
| 646 | |||
| 647 | // Delete capture quantifiers structure | ||
| 648 | static void capture_quantifiers_delete( | ||
| 649 | CaptureQuantifiers *self | ||
| 650 | ) { | ||
| 651 | array_delete(self); | ||
| 652 | } | ||
| 653 | |||
| 654 | // Clear capture quantifiers structure | ||
| 655 | static void capture_quantifiers_clear( | ||
| 656 | CaptureQuantifiers *self | ||
| 657 | ) { | ||
| 658 | array_clear(self); | ||
| 659 | } | ||
| 660 | |||
| 661 | // Replace capture quantifiers with the given quantifiers | ||
| 662 | static void capture_quantifiers_replace( | ||
| 663 | CaptureQuantifiers *self, | ||
| 664 | CaptureQuantifiers *quantifiers | ||
| 665 | ) { | ||
| 666 | array_clear(self); | ||
| 667 | array_push_all(self, quantifiers); | ||
| 668 | } | ||
| 669 | |||
| 670 | // Return capture quantifier for the given capture id | ||
| 671 | static TSQuantifier capture_quantifier_for_id( | ||
| 672 | const CaptureQuantifiers *self, | ||
| 673 | uint16_t id | ||
| 674 | ) { | ||
| 675 | return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id); | ||
| 676 | } | ||
| 677 | |||
| 678 | // Add the given quantifier to the current value for id | ||
| 679 | static void capture_quantifiers_add_for_id( | ||
| 680 | CaptureQuantifiers *self, | ||
| 681 | uint16_t id, | ||
| 682 | TSQuantifier quantifier | ||
| 683 | ) { | ||
| 684 | if (self->size <= id) { | ||
| 685 | array_grow_by(self, id + 1 - self->size); | ||
| 686 | } | ||
| 687 | uint8_t *own_quantifier = array_get(self, id); | ||
| 688 | *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier); | ||
| 689 | } | ||
| 690 | |||
| 691 | // Point-wise add the given quantifiers to the current values | ||
| 692 | static void capture_quantifiers_add_all( | ||
| 693 | CaptureQuantifiers *self, | ||
| 694 | CaptureQuantifiers *quantifiers | ||
| 695 | ) { | ||
| 696 | if (self->size < quantifiers->size) { | ||
| 697 | array_grow_by(self, quantifiers->size - self->size); | ||
| 698 | } | ||
| 699 | for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) { | ||
| 700 | uint8_t *quantifier = array_get(quantifiers, id); | ||
| 701 | uint8_t *own_quantifier = array_get(self, id); | ||
| 702 | *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); | ||
| 703 | } | ||
| 704 | } | ||
| 705 | |||
| 706 | // Join the given quantifier with the current values | ||
| 707 | static void capture_quantifiers_mul( | ||
| 708 | CaptureQuantifiers *self, | ||
| 709 | TSQuantifier quantifier | ||
| 710 | ) { | ||
| 711 | for (uint16_t id = 0; id < (uint16_t)self->size; id++) { | ||
| 712 | uint8_t *own_quantifier = array_get(self, id); | ||
| 713 | *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier); | ||
| 714 | } | ||
| 715 | } | ||
| 716 | |||
| 717 | // Point-wise join the quantifiers from a list of alternatives with the current values | ||
| 718 | static void capture_quantifiers_join_all( | ||
| 719 | CaptureQuantifiers *self, | ||
| 720 | CaptureQuantifiers *quantifiers | ||
| 721 | ) { | ||
| 722 | if (self->size < quantifiers->size) { | ||
| 723 | array_grow_by(self, quantifiers->size - self->size); | ||
| 724 | } | ||
| 725 | for (uint32_t id = 0; id < quantifiers->size; id++) { | ||
| 726 | uint8_t *quantifier = array_get(quantifiers, id); | ||
| 727 | uint8_t *own_quantifier = array_get(self, id); | ||
| 728 | *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); | ||
| 729 | } | ||
| 730 | for (uint32_t id = quantifiers->size; id < self->size; id++) { | ||
| 731 | uint8_t *own_quantifier = array_get(self, id); | ||
| 732 | *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero); | ||
| 733 | } | ||
| 734 | } | ||
| 735 | |||
| 736 | /************** | ||
| 737 | * SymbolTable | ||
| 738 | **************/ | ||
| 739 | |||
| 740 | static SymbolTable symbol_table_new(void) { | ||
| 741 | return (SymbolTable) { | ||
| 742 | .characters = array_new(), | ||
| 743 | .slices = array_new(), | ||
| 744 | }; | ||
| 745 | } | ||
| 746 | |||
| 747 | static void symbol_table_delete(SymbolTable *self) { | ||
| 748 | array_delete(&self->characters); | ||
| 749 | array_delete(&self->slices); | ||
| 750 | } | ||
| 751 | |||
| 752 | static int symbol_table_id_for_name( | ||
| 753 | const SymbolTable *self, | ||
| 754 | const char *name, | ||
| 755 | uint32_t length | ||
| 756 | ) { | ||
| 757 | for (unsigned i = 0; i < self->slices.size; i++) { | ||
| 758 | Slice slice = self->slices.contents[i]; | ||
| 759 | if ( | ||
| 760 | slice.length == length && | ||
| 761 | !strncmp(&self->characters.contents[slice.offset], name, length) | ||
| 762 | ) return i; | ||
| 763 | } | ||
| 764 | return -1; | ||
| 765 | } | ||
| 766 | |||
| 767 | static const char *symbol_table_name_for_id( | ||
| 768 | const SymbolTable *self, | ||
| 769 | uint16_t id, | ||
| 770 | uint32_t *length | ||
| 771 | ) { | ||
| 772 | Slice slice = self->slices.contents[id]; | ||
| 773 | *length = slice.length; | ||
| 774 | return &self->characters.contents[slice.offset]; | ||
| 775 | } | ||
| 776 | |||
| 777 | static uint16_t symbol_table_insert_name( | ||
| 778 | SymbolTable *self, | ||
| 779 | const char *name, | ||
| 780 | uint32_t length | ||
| 781 | ) { | ||
| 782 | int id = symbol_table_id_for_name(self, name, length); | ||
| 783 | if (id >= 0) return (uint16_t)id; | ||
| 784 | Slice slice = { | ||
| 785 | .offset = self->characters.size, | ||
| 786 | .length = length, | ||
| 787 | }; | ||
| 788 | array_grow_by(&self->characters, length + 1); | ||
| 789 | memcpy(&self->characters.contents[slice.offset], name, length); | ||
| 790 | self->characters.contents[self->characters.size - 1] = 0; | ||
| 791 | array_push(&self->slices, slice); | ||
| 792 | return self->slices.size - 1; | ||
| 793 | } | ||
| 794 | |||
| 795 | /************ | ||
| 796 | * QueryStep | ||
| 797 | ************/ | ||
| 798 | |||
| 799 | static QueryStep query_step__new( | ||
| 800 | TSSymbol symbol, | ||
| 801 | uint16_t depth, | ||
| 802 | bool is_immediate | ||
| 803 | ) { | ||
| 804 | QueryStep step = { | ||
| 805 | .symbol = symbol, | ||
| 806 | .depth = depth, | ||
| 807 | .field = 0, | ||
| 808 | .alternative_index = NONE, | ||
| 809 | .negated_field_list_id = 0, | ||
| 810 | .contains_captures = false, | ||
| 811 | .is_last_child = false, | ||
| 812 | .is_named = false, | ||
| 813 | .is_pass_through = false, | ||
| 814 | .is_dead_end = false, | ||
| 815 | .root_pattern_guaranteed = false, | ||
| 816 | .is_immediate = is_immediate, | ||
| 817 | .alternative_is_immediate = false, | ||
| 818 | }; | ||
| 819 | for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { | ||
| 820 | step.capture_ids[i] = NONE; | ||
| 821 | } | ||
| 822 | return step; | ||
| 823 | } | ||
| 824 | |||
| 825 | static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { | ||
| 826 | for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { | ||
| 827 | if (self->capture_ids[i] == NONE) { | ||
| 828 | self->capture_ids[i] = capture_id; | ||
| 829 | break; | ||
| 830 | } | ||
| 831 | } | ||
| 832 | } | ||
| 833 | |||
| 834 | static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { | ||
| 835 | for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { | ||
| 836 | if (self->capture_ids[i] == capture_id) { | ||
| 837 | self->capture_ids[i] = NONE; | ||
| 838 | while (i + 1 < MAX_STEP_CAPTURE_COUNT) { | ||
| 839 | if (self->capture_ids[i + 1] == NONE) break; | ||
| 840 | self->capture_ids[i] = self->capture_ids[i + 1]; | ||
| 841 | self->capture_ids[i + 1] = NONE; | ||
| 842 | i++; | ||
| 843 | } | ||
| 844 | break; | ||
| 845 | } | ||
| 846 | } | ||
| 847 | } | ||
| 848 | |||
| 849 | /********************** | ||
| 850 | * StatePredecessorMap | ||
| 851 | **********************/ | ||
| 852 | |||
| 853 | static inline StatePredecessorMap state_predecessor_map_new( | ||
| 854 | const TSLanguage *language | ||
| 855 | ) { | ||
| 856 | return (StatePredecessorMap) { | ||
| 857 | .contents = ts_calloc( | ||
| 858 | (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), | ||
| 859 | sizeof(TSStateId) | ||
| 860 | ), | ||
| 861 | }; | ||
| 862 | } | ||
| 863 | |||
| 864 | static inline void state_predecessor_map_delete(StatePredecessorMap *self) { | ||
| 865 | ts_free(self->contents); | ||
| 866 | } | ||
| 867 | |||
| 868 | static inline void state_predecessor_map_add( | ||
| 869 | StatePredecessorMap *self, | ||
| 870 | TSStateId state, | ||
| 871 | TSStateId predecessor | ||
| 872 | ) { | ||
| 873 | size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); | ||
| 874 | TSStateId *count = &self->contents[index]; | ||
| 875 | if ( | ||
| 876 | *count == 0 || | ||
| 877 | (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor) | ||
| 878 | ) { | ||
| 879 | (*count)++; | ||
| 880 | self->contents[index + *count] = predecessor; | ||
| 881 | } | ||
| 882 | } | ||
| 883 | |||
| 884 | static inline const TSStateId *state_predecessor_map_get( | ||
| 885 | const StatePredecessorMap *self, | ||
| 886 | TSStateId state, | ||
| 887 | unsigned *count | ||
| 888 | ) { | ||
| 889 | size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); | ||
| 890 | *count = self->contents[index]; | ||
| 891 | return &self->contents[index + 1]; | ||
| 892 | } | ||
| 893 | |||
| 894 | /**************** | ||
| 895 | * AnalysisState | ||
| 896 | ****************/ | ||
| 897 | |||
| 898 | static unsigned analysis_state__recursion_depth(const AnalysisState *self) { | ||
| 899 | unsigned result = 0; | ||
| 900 | for (unsigned i = 0; i < self->depth; i++) { | ||
| 901 | TSSymbol symbol = self->stack[i].parent_symbol; | ||
| 902 | for (unsigned j = 0; j < i; j++) { | ||
| 903 | if (self->stack[j].parent_symbol == symbol) { | ||
| 904 | result++; | ||
| 905 | break; | ||
| 906 | } | ||
| 907 | } | ||
| 908 | } | ||
| 909 | return result; | ||
| 910 | } | ||
| 911 | |||
| 912 | static inline int analysis_state__compare_position( | ||
| 913 | AnalysisState *const *self, | ||
| 914 | AnalysisState *const *other | ||
| 915 | ) { | ||
| 916 | for (unsigned i = 0; i < (*self)->depth; i++) { | ||
| 917 | if (i >= (*other)->depth) return -1; | ||
| 918 | if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1; | ||
| 919 | if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1; | ||
| 920 | } | ||
| 921 | if ((*self)->depth < (*other)->depth) return 1; | ||
| 922 | if ((*self)->step_index < (*other)->step_index) return -1; | ||
| 923 | if ((*self)->step_index > (*other)->step_index) return 1; | ||
| 924 | return 0; | ||
| 925 | } | ||
| 926 | |||
| 927 | static inline int analysis_state__compare( | ||
| 928 | AnalysisState *const *self, | ||
| 929 | AnalysisState *const *other | ||
| 930 | ) { | ||
| 931 | int result = analysis_state__compare_position(self, other); | ||
| 932 | if (result != 0) return result; | ||
| 933 | for (unsigned i = 0; i < (*self)->depth; i++) { | ||
| 934 | if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1; | ||
| 935 | if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1; | ||
| 936 | if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1; | ||
| 937 | if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1; | ||
| 938 | if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1; | ||
| 939 | if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1; | ||
| 940 | } | ||
| 941 | return 0; | ||
| 942 | } | ||
| 943 | |||
| 944 | static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { | ||
| 945 | if (self->depth == 0) { | ||
| 946 | return &self->stack[0]; | ||
| 947 | } | ||
| 948 | return &self->stack[self->depth - 1]; | ||
| 949 | } | ||
| 950 | |||
| 951 | static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) { | ||
| 952 | for (unsigned i = 0; i < self->depth; i++) { | ||
| 953 | if (self->stack[i].parent_symbol == symbol) return true; | ||
| 954 | } | ||
| 955 | return false; | ||
| 956 | } | ||
| 957 | |||
| 958 | /****************** | ||
| 959 | * AnalysisStateSet | ||
| 960 | ******************/ | ||
| 961 | |||
| 962 | // Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by | ||
| 963 | // cloning one from scratch. | ||
| 964 | static inline AnalysisState *analysis_state_pool__clone_or_reuse( | ||
| 965 | AnalysisStateSet *self, | ||
| 966 | AnalysisState *borrowed_item | ||
| 967 | ) { | ||
| 968 | AnalysisState *new_item; | ||
| 969 | if (self->size) { | ||
| 970 | new_item = array_pop(self); | ||
| 971 | } else { | ||
| 972 | new_item = ts_malloc(sizeof(AnalysisState)); | ||
| 973 | } | ||
| 974 | *new_item = *borrowed_item; | ||
| 975 | return new_item; | ||
| 976 | } | ||
| 977 | |||
| 978 | // Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this | ||
| 979 | // set. The set does not contain duplicates, so if the item is already present, it will not be | ||
| 980 | // inserted, and no clone will be made. | ||
| 981 | // | ||
| 982 | // The caller retains ownership of the passed-in memory. However, the clone that is created by this | ||
| 983 | // function will be managed by the state set. | ||
| 984 | static inline void analysis_state_set__insert_sorted( | ||
| 985 | AnalysisStateSet *self, | ||
| 986 | AnalysisStateSet *pool, | ||
| 987 | AnalysisState *borrowed_item | ||
| 988 | ) { | ||
| 989 | unsigned index, exists; | ||
| 990 | array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists); | ||
| 991 | if (!exists) { | ||
| 992 | AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); | ||
| 993 | array_insert(self, index, new_item); | ||
| 994 | } | ||
| 995 | } | ||
| 996 | |||
| 997 | // Inserts a clone of the passed-in item at the end position of this list. | ||
| 998 | // | ||
| 999 | // IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function | ||
| 1000 | // `analysis_state__compare`) than largest item already in this set. If items are inserted in the | ||
| 1001 | // wrong order, the set will not function properly for future use. | ||
| 1002 | // | ||
| 1003 | // The caller retains ownership of the passed-in memory. However, the clone that is created by this | ||
| 1004 | // function will be managed by the state set. | ||
| 1005 | static inline void analysis_state_set__push( | ||
| 1006 | AnalysisStateSet *self, | ||
| 1007 | AnalysisStateSet *pool, | ||
| 1008 | AnalysisState *borrowed_item | ||
| 1009 | ) { | ||
| 1010 | AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); | ||
| 1011 | array_push(self, new_item); | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | // Removes all items from this set, returning it to an empty state. | ||
| 1015 | static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) { | ||
| 1016 | array_push_all(pool, self); | ||
| 1017 | array_clear(self); | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | // Releases all memory that is managed with this state set, including any items currently present. | ||
| 1021 | // After calling this function, the set is no longer suitable for use. | ||
| 1022 | static inline void analysis_state_set__delete(AnalysisStateSet *self) { | ||
| 1023 | for (unsigned i = 0; i < self->size; i++) { | ||
| 1024 | ts_free(self->contents[i]); | ||
| 1025 | } | ||
| 1026 | array_delete(self); | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | /**************** | ||
| 1030 | * QueryAnalyzer | ||
| 1031 | ****************/ | ||
| 1032 | |||
| 1033 | static inline QueryAnalysis query_analysis__new(void) { | ||
| 1034 | return (QueryAnalysis) { | ||
| 1035 | .states = array_new(), | ||
| 1036 | .next_states = array_new(), | ||
| 1037 | .deeper_states = array_new(), | ||
| 1038 | .state_pool = array_new(), | ||
| 1039 | .final_step_indices = array_new(), | ||
| 1040 | .finished_parent_symbols = array_new(), | ||
| 1041 | .did_abort = false, | ||
| 1042 | }; | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | static inline void query_analysis__delete(QueryAnalysis *self) { | ||
| 1046 | analysis_state_set__delete(&self->states); | ||
| 1047 | analysis_state_set__delete(&self->next_states); | ||
| 1048 | analysis_state_set__delete(&self->deeper_states); | ||
| 1049 | analysis_state_set__delete(&self->state_pool); | ||
| 1050 | array_delete(&self->final_step_indices); | ||
| 1051 | array_delete(&self->finished_parent_symbols); | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | /*********************** | ||
| 1055 | * AnalysisSubgraphNode | ||
| 1056 | ***********************/ | ||
| 1057 | |||
| 1058 | static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { | ||
| 1059 | if (self->state < other->state) return -1; | ||
| 1060 | if (self->state > other->state) return 1; | ||
| 1061 | if (self->child_index < other->child_index) return -1; | ||
| 1062 | if (self->child_index > other->child_index) return 1; | ||
| 1063 | if (self->done < other->done) return -1; | ||
| 1064 | if (self->done > other->done) return 1; | ||
| 1065 | if (self->production_id < other->production_id) return -1; | ||
| 1066 | if (self->production_id > other->production_id) return 1; | ||
| 1067 | return 0; | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | /********* | ||
| 1071 | * Query | ||
| 1072 | *********/ | ||
| 1073 | |||
| 1074 | // The `pattern_map` contains a mapping from TSSymbol values to indices in the | ||
| 1075 | // `steps` array. For a given syntax node, the `pattern_map` makes it possible | ||
| 1076 | // to quickly find the starting steps of all of the patterns whose root matches | ||
| 1077 | // that node. Each entry has two fields: a `pattern_index`, which identifies one | ||
| 1078 | // of the patterns in the query, and a `step_index`, which indicates the start | ||
| 1079 | // offset of that pattern's steps within the `steps` array. | ||
| 1080 | // | ||
| 1081 | // The entries are sorted by the patterns' root symbols, and lookups use a | ||
| 1082 | // binary search. This ensures that the cost of this initial lookup step | ||
| 1083 | // scales logarithmically with the number of patterns in the query. | ||
| 1084 | // | ||
| 1085 | // This returns `true` if the symbol is present and `false` otherwise. | ||
| 1086 | // If the symbol is not present `*result` is set to the index where the | ||
| 1087 | // symbol should be inserted. | ||
| 1088 | static inline bool ts_query__pattern_map_search( | ||
| 1089 | const TSQuery *self, | ||
| 1090 | TSSymbol needle, | ||
| 1091 | uint32_t *result | ||
| 1092 | ) { | ||
| 1093 | uint32_t base_index = self->wildcard_root_pattern_count; | ||
| 1094 | uint32_t size = self->pattern_map.size - base_index; | ||
| 1095 | if (size == 0) { | ||
| 1096 | *result = base_index; | ||
| 1097 | return false; | ||
| 1098 | } | ||
| 1099 | while (size > 1) { | ||
| 1100 | uint32_t half_size = size / 2; | ||
| 1101 | uint32_t mid_index = base_index + half_size; | ||
| 1102 | TSSymbol mid_symbol = self->steps.contents[ | ||
| 1103 | self->pattern_map.contents[mid_index].step_index | ||
| 1104 | ].symbol; | ||
| 1105 | if (needle > mid_symbol) base_index = mid_index; | ||
| 1106 | size -= half_size; | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | TSSymbol symbol = self->steps.contents[ | ||
| 1110 | self->pattern_map.contents[base_index].step_index | ||
| 1111 | ].symbol; | ||
| 1112 | |||
| 1113 | if (needle > symbol) { | ||
| 1114 | base_index++; | ||
| 1115 | if (base_index < self->pattern_map.size) { | ||
| 1116 | symbol = self->steps.contents[ | ||
| 1117 | self->pattern_map.contents[base_index].step_index | ||
| 1118 | ].symbol; | ||
| 1119 | } | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | *result = base_index; | ||
| 1123 | return needle == symbol; | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | // Insert a new pattern's start index into the pattern map, maintaining | ||
| 1127 | // the pattern map's ordering invariant. | ||
| 1128 | static inline void ts_query__pattern_map_insert( | ||
| 1129 | TSQuery *self, | ||
| 1130 | TSSymbol symbol, | ||
| 1131 | PatternEntry new_entry | ||
| 1132 | ) { | ||
| 1133 | uint32_t index; | ||
| 1134 | ts_query__pattern_map_search(self, symbol, &index); | ||
| 1135 | |||
| 1136 | // Ensure that the entries are sorted not only by symbol, but also | ||
| 1137 | // by pattern_index. This way, states for earlier patterns will be | ||
| 1138 | // initiated first, which allows the ordering of the states array | ||
| 1139 | // to be maintained more efficiently. | ||
| 1140 | while (index < self->pattern_map.size) { | ||
| 1141 | PatternEntry *entry = &self->pattern_map.contents[index]; | ||
| 1142 | if ( | ||
| 1143 | self->steps.contents[entry->step_index].symbol == symbol && | ||
| 1144 | entry->pattern_index < new_entry.pattern_index | ||
| 1145 | ) { | ||
| 1146 | index++; | ||
| 1147 | } else { | ||
| 1148 | break; | ||
| 1149 | } | ||
| 1150 | } | ||
| 1151 | |||
| 1152 | array_insert(&self->pattern_map, index, new_entry); | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | // Walk the subgraph for this non-terminal, tracking all of the possible | ||
| 1156 | // sequences of progress within the pattern. | ||
| 1157 | static void ts_query__perform_analysis( | ||
| 1158 | TSQuery *self, | ||
| 1159 | const AnalysisSubgraphArray *subgraphs, | ||
| 1160 | QueryAnalysis *analysis | ||
| 1161 | ) { | ||
| 1162 | unsigned recursion_depth_limit = 0; | ||
| 1163 | unsigned prev_final_step_count = 0; | ||
| 1164 | array_clear(&analysis->final_step_indices); | ||
| 1165 | array_clear(&analysis->finished_parent_symbols); | ||
| 1166 | |||
| 1167 | for (unsigned iteration = 0;; iteration++) { | ||
| 1168 | if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { | ||
| 1169 | analysis->did_abort = true; | ||
| 1170 | break; | ||
| 1171 | } | ||
| 1172 | |||
| 1173 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1174 | printf("Iteration: %u. Final step indices:", iteration); | ||
| 1175 | for (unsigned j = 0; j < analysis->final_step_indices.size; j++) { | ||
| 1176 | printf(" %4u", analysis->final_step_indices.contents[j]); | ||
| 1177 | } | ||
| 1178 | printf("\n"); | ||
| 1179 | for (unsigned j = 0; j < analysis->states.size; j++) { | ||
| 1180 | AnalysisState *state = analysis->states.contents[j]; | ||
| 1181 | printf(" %3u: step: %u, stack: [", j, state->step_index); | ||
| 1182 | for (unsigned k = 0; k < state->depth; k++) { | ||
| 1183 | printf( | ||
| 1184 | " {%s, child: %u, state: %4u", | ||
| 1185 | self->language->symbol_names[state->stack[k].parent_symbol], | ||
| 1186 | state->stack[k].child_index, | ||
| 1187 | state->stack[k].parse_state | ||
| 1188 | ); | ||
| 1189 | if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); | ||
| 1190 | if (state->stack[k].done) printf(", DONE"); | ||
| 1191 | printf("}"); | ||
| 1192 | } | ||
| 1193 | printf(" ]\n"); | ||
| 1194 | } | ||
| 1195 | #endif | ||
| 1196 | |||
| 1197 | // If no further progress can be made within the current recursion depth limit, then | ||
| 1198 | // bump the depth limit by one, and continue to process the states the exceeded the | ||
| 1199 | // limit. But only allow this if progress has been made since the last time the depth | ||
| 1200 | // limit was increased. | ||
| 1201 | if (analysis->states.size == 0) { | ||
| 1202 | if ( | ||
| 1203 | analysis->deeper_states.size > 0 && | ||
| 1204 | analysis->final_step_indices.size > prev_final_step_count | ||
| 1205 | ) { | ||
| 1206 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1207 | printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); | ||
| 1208 | #endif | ||
| 1209 | |||
| 1210 | prev_final_step_count = analysis->final_step_indices.size; | ||
| 1211 | recursion_depth_limit++; | ||
| 1212 | AnalysisStateSet _states = analysis->states; | ||
| 1213 | analysis->states = analysis->deeper_states; | ||
| 1214 | analysis->deeper_states = _states; | ||
| 1215 | continue; | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | break; | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | analysis_state_set__clear(&analysis->next_states, &analysis->state_pool); | ||
| 1222 | for (unsigned j = 0; j < analysis->states.size; j++) { | ||
| 1223 | AnalysisState * const state = analysis->states.contents[j]; | ||
| 1224 | |||
| 1225 | // For efficiency, it's important to avoid processing the same analysis state more | ||
| 1226 | // than once. To achieve this, keep the states in order of ascending position within | ||
| 1227 | // their hypothetical syntax trees. In each iteration of this loop, start by advancing | ||
| 1228 | // the states that have made the least progress. Avoid advancing states that have already | ||
| 1229 | // made more progress. | ||
| 1230 | if (analysis->next_states.size > 0) { | ||
| 1231 | int comparison = analysis_state__compare_position( | ||
| 1232 | &state, | ||
| 1233 | array_back(&analysis->next_states) | ||
| 1234 | ); | ||
| 1235 | if (comparison == 0) { | ||
| 1236 | analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state); | ||
| 1237 | continue; | ||
| 1238 | } else if (comparison > 0) { | ||
| 1239 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1240 | printf("Terminate iteration at state %u\n", j); | ||
| 1241 | #endif | ||
| 1242 | while (j < analysis->states.size) { | ||
| 1243 | analysis_state_set__push( | ||
| 1244 | &analysis->next_states, | ||
| 1245 | &analysis->state_pool, | ||
| 1246 | analysis->states.contents[j] | ||
| 1247 | ); | ||
| 1248 | j++; | ||
| 1249 | } | ||
| 1250 | break; | ||
| 1251 | } | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | const TSStateId parse_state = analysis_state__top(state)->parse_state; | ||
| 1255 | const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol; | ||
| 1256 | const TSFieldId parent_field_id = analysis_state__top(state)->field_id; | ||
| 1257 | const unsigned child_index = analysis_state__top(state)->child_index; | ||
| 1258 | const QueryStep * const step = &self->steps.contents[state->step_index]; | ||
| 1259 | |||
| 1260 | unsigned subgraph_index, exists; | ||
| 1261 | array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); | ||
| 1262 | if (!exists) continue; | ||
| 1263 | const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index]; | ||
| 1264 | |||
| 1265 | // Follow every possible path in the parse table, but only visit states that | ||
| 1266 | // are part of the subgraph for the current symbol. | ||
| 1267 | LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); | ||
| 1268 | while (ts_lookahead_iterator__next(&lookahead_iterator)) { | ||
| 1269 | TSSymbol sym = lookahead_iterator.symbol; | ||
| 1270 | |||
| 1271 | AnalysisSubgraphNode successor = { | ||
| 1272 | .state = parse_state, | ||
| 1273 | .child_index = child_index, | ||
| 1274 | }; | ||
| 1275 | if (lookahead_iterator.action_count) { | ||
| 1276 | const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; | ||
| 1277 | if (action->type == TSParseActionTypeShift) { | ||
| 1278 | if (!action->shift.extra) { | ||
| 1279 | successor.state = action->shift.state; | ||
| 1280 | successor.child_index++; | ||
| 1281 | } | ||
| 1282 | } else { | ||
| 1283 | continue; | ||
| 1284 | } | ||
| 1285 | } else if (lookahead_iterator.next_state != 0) { | ||
| 1286 | successor.state = lookahead_iterator.next_state; | ||
| 1287 | successor.child_index++; | ||
| 1288 | } else { | ||
| 1289 | continue; | ||
| 1290 | } | ||
| 1291 | |||
| 1292 | unsigned node_index; | ||
| 1293 | array_search_sorted_with( | ||
| 1294 | &subgraph->nodes, | ||
| 1295 | analysis_subgraph_node__compare, &successor, | ||
| 1296 | &node_index, &exists | ||
| 1297 | ); | ||
| 1298 | while (node_index < subgraph->nodes.size) { | ||
| 1299 | AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; | ||
| 1300 | if (node->state != successor.state || node->child_index != successor.child_index) break; | ||
| 1301 | |||
| 1302 | // Use the subgraph to determine what alias and field will eventually be applied | ||
| 1303 | // to this child node. | ||
| 1304 | TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); | ||
| 1305 | TSSymbol visible_symbol = alias | ||
| 1306 | ? alias | ||
| 1307 | : self->language->symbol_metadata[sym].visible | ||
| 1308 | ? self->language->public_symbol_map[sym] | ||
| 1309 | : 0; | ||
| 1310 | TSFieldId field_id = parent_field_id; | ||
| 1311 | if (!field_id) { | ||
| 1312 | const TSFieldMapEntry *field_map, *field_map_end; | ||
| 1313 | ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); | ||
| 1314 | for (; field_map != field_map_end; field_map++) { | ||
| 1315 | if (!field_map->inherited && field_map->child_index == child_index) { | ||
| 1316 | field_id = field_map->field_id; | ||
| 1317 | break; | ||
| 1318 | } | ||
| 1319 | } | ||
| 1320 | } | ||
| 1321 | |||
| 1322 | // Create a new state that has advanced past this hypothetical subtree. | ||
| 1323 | AnalysisState next_state = *state; | ||
| 1324 | AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); | ||
| 1325 | next_state_top->child_index = successor.child_index; | ||
| 1326 | next_state_top->parse_state = successor.state; | ||
| 1327 | if (node->done) next_state_top->done = true; | ||
| 1328 | |||
| 1329 | // Determine if this hypothetical child node would match the current step | ||
| 1330 | // of the query pattern. | ||
| 1331 | bool does_match = false; | ||
| 1332 | if (visible_symbol) { | ||
| 1333 | does_match = true; | ||
| 1334 | if (step->symbol == WILDCARD_SYMBOL) { | ||
| 1335 | if ( | ||
| 1336 | step->is_named && | ||
| 1337 | !self->language->symbol_metadata[visible_symbol].named | ||
| 1338 | ) does_match = false; | ||
| 1339 | } else if (step->symbol != visible_symbol) { | ||
| 1340 | does_match = false; | ||
| 1341 | } | ||
| 1342 | if (step->field && step->field != field_id) { | ||
| 1343 | does_match = false; | ||
| 1344 | } | ||
| 1345 | if ( | ||
| 1346 | step->supertype_symbol && | ||
| 1347 | !analysis_state__has_supertype(state, step->supertype_symbol) | ||
| 1348 | ) does_match = false; | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | // If this child is hidden, then descend into it and walk through its children. | ||
| 1352 | // If the top entry of the stack is at the end of its rule, then that entry can | ||
| 1353 | // be replaced. Otherwise, push a new entry onto the stack. | ||
| 1354 | else if (sym >= self->language->token_count) { | ||
| 1355 | if (!next_state_top->done) { | ||
| 1356 | if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { | ||
| 1357 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1358 | printf("Exceeded depth limit for state %u\n", j); | ||
| 1359 | #endif | ||
| 1360 | |||
| 1361 | analysis->did_abort = true; | ||
| 1362 | continue; | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | next_state.depth++; | ||
| 1366 | next_state_top = analysis_state__top(&next_state); | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | *next_state_top = (AnalysisStateEntry) { | ||
| 1370 | .parse_state = parse_state, | ||
| 1371 | .parent_symbol = sym, | ||
| 1372 | .child_index = 0, | ||
| 1373 | .field_id = field_id, | ||
| 1374 | .done = false, | ||
| 1375 | }; | ||
| 1376 | |||
| 1377 | if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { | ||
| 1378 | analysis_state_set__insert_sorted( | ||
| 1379 | &analysis->deeper_states, | ||
| 1380 | &analysis->state_pool, | ||
| 1381 | &next_state | ||
| 1382 | ); | ||
| 1383 | continue; | ||
| 1384 | } | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | // Pop from the stack when this state reached the end of its current syntax node. | ||
| 1388 | while (next_state.depth > 0 && next_state_top->done) { | ||
| 1389 | next_state.depth--; | ||
| 1390 | next_state_top = analysis_state__top(&next_state); | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | // If this hypothetical child did match the current step of the query pattern, | ||
| 1394 | // then advance to the next step at the current depth. This involves skipping | ||
| 1395 | // over any descendant steps of the current child. | ||
| 1396 | const QueryStep *next_step = step; | ||
| 1397 | if (does_match) { | ||
| 1398 | for (;;) { | ||
| 1399 | next_state.step_index++; | ||
| 1400 | next_step = &self->steps.contents[next_state.step_index]; | ||
| 1401 | if ( | ||
| 1402 | next_step->depth == PATTERN_DONE_MARKER || | ||
| 1403 | next_step->depth <= step->depth | ||
| 1404 | ) break; | ||
| 1405 | } | ||
| 1406 | } else if (successor.state == parse_state) { | ||
| 1407 | continue; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | for (;;) { | ||
| 1411 | // Skip pass-through states. Although these states have alternatives, they are only | ||
| 1412 | // used to implement repetitions, and query analysis does not need to process | ||
| 1413 | // repetitions in order to determine whether steps are possible and definite. | ||
| 1414 | if (next_step->is_pass_through) { | ||
| 1415 | next_state.step_index++; | ||
| 1416 | next_step++; | ||
| 1417 | continue; | ||
| 1418 | } | ||
| 1419 | |||
| 1420 | // If the pattern is finished or hypothetical parent node is complete, then | ||
| 1421 | // record that matching can terminate at this step of the pattern. Otherwise, | ||
| 1422 | // add this state to the list of states to process on the next iteration. | ||
| 1423 | if (!next_step->is_dead_end) { | ||
| 1424 | bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth; | ||
| 1425 | if (did_finish_pattern) { | ||
| 1426 | array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol); | ||
| 1427 | } else if (next_state.depth == 0) { | ||
| 1428 | array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index); | ||
| 1429 | } else { | ||
| 1430 | analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state); | ||
| 1431 | } | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | // If the state has advanced to a step with an alternative step, then add another state | ||
| 1435 | // at that alternative step. This process is simpler than the process of actually matching a | ||
| 1436 | // pattern during query execution, because for the purposes of query analysis, there is no | ||
| 1437 | // need to process repetitions. | ||
| 1438 | if ( | ||
| 1439 | does_match && | ||
| 1440 | next_step->alternative_index != NONE && | ||
| 1441 | next_step->alternative_index > next_state.step_index | ||
| 1442 | ) { | ||
| 1443 | next_state.step_index = next_step->alternative_index; | ||
| 1444 | next_step = &self->steps.contents[next_state.step_index]; | ||
| 1445 | } else { | ||
| 1446 | break; | ||
| 1447 | } | ||
| 1448 | } | ||
| 1449 | } | ||
| 1450 | } | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | AnalysisStateSet _states = analysis->states; | ||
| 1454 | analysis->states = analysis->next_states; | ||
| 1455 | analysis->next_states = _states; | ||
| 1456 | } | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { | ||
| 1460 | Array(uint16_t) non_rooted_pattern_start_steps = array_new(); | ||
| 1461 | for (unsigned i = 0; i < self->pattern_map.size; i++) { | ||
| 1462 | PatternEntry *pattern = &self->pattern_map.contents[i]; | ||
| 1463 | if (!pattern->is_rooted) { | ||
| 1464 | QueryStep *step = &self->steps.contents[pattern->step_index]; | ||
| 1465 | if (step->symbol != WILDCARD_SYMBOL) { | ||
| 1466 | array_push(&non_rooted_pattern_start_steps, i); | ||
| 1467 | } | ||
| 1468 | } | ||
| 1469 | } | ||
| 1470 | |||
| 1471 | // Walk forward through all of the steps in the query, computing some | ||
| 1472 | // basic information about each step. Mark all of the steps that contain | ||
| 1473 | // captures, and record the indices of all of the steps that have child steps. | ||
| 1474 | Array(uint32_t) parent_step_indices = array_new(); | ||
| 1475 | for (unsigned i = 0; i < self->steps.size; i++) { | ||
| 1476 | QueryStep *step = &self->steps.contents[i]; | ||
| 1477 | if (step->depth == PATTERN_DONE_MARKER) { | ||
| 1478 | step->parent_pattern_guaranteed = true; | ||
| 1479 | step->root_pattern_guaranteed = true; | ||
| 1480 | continue; | ||
| 1481 | } | ||
| 1482 | |||
| 1483 | bool has_children = false; | ||
| 1484 | bool is_wildcard = step->symbol == WILDCARD_SYMBOL; | ||
| 1485 | step->contains_captures = step->capture_ids[0] != NONE; | ||
| 1486 | for (unsigned j = i + 1; j < self->steps.size; j++) { | ||
| 1487 | QueryStep *next_step = &self->steps.contents[j]; | ||
| 1488 | if ( | ||
| 1489 | next_step->depth == PATTERN_DONE_MARKER || | ||
| 1490 | next_step->depth <= step->depth | ||
| 1491 | ) break; | ||
| 1492 | if (next_step->capture_ids[0] != NONE) { | ||
| 1493 | step->contains_captures = true; | ||
| 1494 | } | ||
| 1495 | if (!is_wildcard) { | ||
| 1496 | next_step->root_pattern_guaranteed = true; | ||
| 1497 | next_step->parent_pattern_guaranteed = true; | ||
| 1498 | } | ||
| 1499 | has_children = true; | ||
| 1500 | } | ||
| 1501 | |||
| 1502 | if (has_children && !is_wildcard) { | ||
| 1503 | array_push(&parent_step_indices, i); | ||
| 1504 | } | ||
| 1505 | } | ||
| 1506 | |||
| 1507 | // For every parent symbol in the query, initialize an 'analysis subgraph'. | ||
| 1508 | // This subgraph lists all of the states in the parse table that are directly | ||
| 1509 | // involved in building subtrees for this symbol. | ||
| 1510 | // | ||
| 1511 | // In addition to the parent symbols in the query, construct subgraphs for all | ||
| 1512 | // of the hidden symbols in the grammar, because these might occur within | ||
| 1513 | // one of the parent nodes, such that their children appear to belong to the | ||
| 1514 | // parent. | ||
| 1515 | AnalysisSubgraphArray subgraphs = array_new(); | ||
| 1516 | for (unsigned i = 0; i < parent_step_indices.size; i++) { | ||
| 1517 | uint32_t parent_step_index = parent_step_indices.contents[i]; | ||
| 1518 | TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; | ||
| 1519 | AnalysisSubgraph subgraph = { .symbol = parent_symbol }; | ||
| 1520 | array_insert_sorted_by(&subgraphs, .symbol, subgraph); | ||
| 1521 | } | ||
| 1522 | for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { | ||
| 1523 | if (!ts_language_symbol_metadata(self->language, sym).visible) { | ||
| 1524 | AnalysisSubgraph subgraph = { .symbol = sym }; | ||
| 1525 | array_insert_sorted_by(&subgraphs, .symbol, subgraph); | ||
| 1526 | } | ||
| 1527 | } | ||
| 1528 | |||
| 1529 | // Scan the parse table to find the data needed to populate these subgraphs. | ||
| 1530 | // Collect three things during this scan: | ||
| 1531 | // 1) All of the parse states where one of these symbols can start. | ||
| 1532 | // 2) All of the parse states where one of these symbols can end, along | ||
| 1533 | // with information about the node that would be created. | ||
| 1534 | // 3) A list of predecessor states for each state. | ||
| 1535 | StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); | ||
| 1536 | for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) { | ||
| 1537 | unsigned subgraph_index, exists; | ||
| 1538 | LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); | ||
| 1539 | while (ts_lookahead_iterator__next(&lookahead_iterator)) { | ||
| 1540 | if (lookahead_iterator.action_count) { | ||
| 1541 | for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { | ||
| 1542 | const TSParseAction *action = &lookahead_iterator.actions[i]; | ||
| 1543 | if (action->type == TSParseActionTypeReduce) { | ||
| 1544 | const TSSymbol *aliases, *aliases_end; | ||
| 1545 | ts_language_aliases_for_symbol( | ||
| 1546 | self->language, | ||
| 1547 | action->reduce.symbol, | ||
| 1548 | &aliases, | ||
| 1549 | &aliases_end | ||
| 1550 | ); | ||
| 1551 | for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { | ||
| 1552 | array_search_sorted_by( | ||
| 1553 | &subgraphs, | ||
| 1554 | .symbol, | ||
| 1555 | *symbol, | ||
| 1556 | &subgraph_index, | ||
| 1557 | &exists | ||
| 1558 | ); | ||
| 1559 | if (exists) { | ||
| 1560 | AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; | ||
| 1561 | if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { | ||
| 1562 | array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { | ||
| 1563 | .state = state, | ||
| 1564 | .production_id = action->reduce.production_id, | ||
| 1565 | .child_index = action->reduce.child_count, | ||
| 1566 | .done = true, | ||
| 1567 | })); | ||
| 1568 | } | ||
| 1569 | } | ||
| 1570 | } | ||
| 1571 | } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { | ||
| 1572 | TSStateId next_state = action->shift.state; | ||
| 1573 | state_predecessor_map_add(&predecessor_map, next_state, state); | ||
| 1574 | } | ||
| 1575 | } | ||
| 1576 | } else if (lookahead_iterator.next_state != 0) { | ||
| 1577 | if (lookahead_iterator.next_state != state) { | ||
| 1578 | state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); | ||
| 1579 | } | ||
| 1580 | if (ts_language_state_is_primary(self->language, state)) { | ||
| 1581 | const TSSymbol *aliases, *aliases_end; | ||
| 1582 | ts_language_aliases_for_symbol( | ||
| 1583 | self->language, | ||
| 1584 | lookahead_iterator.symbol, | ||
| 1585 | &aliases, | ||
| 1586 | &aliases_end | ||
| 1587 | ); | ||
| 1588 | for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { | ||
| 1589 | array_search_sorted_by( | ||
| 1590 | &subgraphs, | ||
| 1591 | .symbol, | ||
| 1592 | *symbol, | ||
| 1593 | &subgraph_index, | ||
| 1594 | &exists | ||
| 1595 | ); | ||
| 1596 | if (exists) { | ||
| 1597 | AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; | ||
| 1598 | if ( | ||
| 1599 | subgraph->start_states.size == 0 || | ||
| 1600 | *array_back(&subgraph->start_states) != state | ||
| 1601 | ) | ||
| 1602 | array_push(&subgraph->start_states, state); | ||
| 1603 | } | ||
| 1604 | } | ||
| 1605 | } | ||
| 1606 | } | ||
| 1607 | } | ||
| 1608 | } | ||
| 1609 | |||
| 1610 | // For each subgraph, compute the preceding states by walking backward | ||
| 1611 | // from the end states using the predecessor map. | ||
| 1612 | Array(AnalysisSubgraphNode) next_nodes = array_new(); | ||
| 1613 | for (unsigned i = 0; i < subgraphs.size; i++) { | ||
| 1614 | AnalysisSubgraph *subgraph = &subgraphs.contents[i]; | ||
| 1615 | if (subgraph->nodes.size == 0) { | ||
| 1616 | array_delete(&subgraph->start_states); | ||
| 1617 | array_erase(&subgraphs, i); | ||
| 1618 | i--; | ||
| 1619 | continue; | ||
| 1620 | } | ||
| 1621 | array_assign(&next_nodes, &subgraph->nodes); | ||
| 1622 | while (next_nodes.size > 0) { | ||
| 1623 | AnalysisSubgraphNode node = array_pop(&next_nodes); | ||
| 1624 | if (node.child_index > 1) { | ||
| 1625 | unsigned predecessor_count; | ||
| 1626 | const TSStateId *predecessors = state_predecessor_map_get( | ||
| 1627 | &predecessor_map, | ||
| 1628 | node.state, | ||
| 1629 | &predecessor_count | ||
| 1630 | ); | ||
| 1631 | for (unsigned j = 0; j < predecessor_count; j++) { | ||
| 1632 | AnalysisSubgraphNode predecessor_node = { | ||
| 1633 | .state = predecessors[j], | ||
| 1634 | .child_index = node.child_index - 1, | ||
| 1635 | .production_id = node.production_id, | ||
| 1636 | .done = false, | ||
| 1637 | }; | ||
| 1638 | unsigned index, exists; | ||
| 1639 | array_search_sorted_with( | ||
| 1640 | &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node, | ||
| 1641 | &index, &exists | ||
| 1642 | ); | ||
| 1643 | if (!exists) { | ||
| 1644 | array_insert(&subgraph->nodes, index, predecessor_node); | ||
| 1645 | array_push(&next_nodes, predecessor_node); | ||
| 1646 | } | ||
| 1647 | } | ||
| 1648 | } | ||
| 1649 | } | ||
| 1650 | } | ||
| 1651 | |||
| 1652 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1653 | printf("\nSubgraphs:\n"); | ||
| 1654 | for (unsigned i = 0; i < subgraphs.size; i++) { | ||
| 1655 | AnalysisSubgraph *subgraph = &subgraphs.contents[i]; | ||
| 1656 | printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); | ||
| 1657 | for (unsigned j = 0; j < subgraph->start_states.size; j++) { | ||
| 1658 | printf( | ||
| 1659 | " {state: %u}\n", | ||
| 1660 | subgraph->start_states.contents[j] | ||
| 1661 | ); | ||
| 1662 | } | ||
| 1663 | for (unsigned j = 0; j < subgraph->nodes.size; j++) { | ||
| 1664 | AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; | ||
| 1665 | printf( | ||
| 1666 | " {state: %u, child_index: %u, production_id: %u, done: %d}\n", | ||
| 1667 | node->state, node->child_index, node->production_id, node->done | ||
| 1668 | ); | ||
| 1669 | } | ||
| 1670 | printf("\n"); | ||
| 1671 | } | ||
| 1672 | #endif | ||
| 1673 | |||
| 1674 | // For each non-terminal pattern, determine if the pattern can successfully match, | ||
| 1675 | // and identify all of the possible children within the pattern where matching could fail. | ||
| 1676 | bool all_patterns_are_valid = true; | ||
| 1677 | QueryAnalysis analysis = query_analysis__new(); | ||
| 1678 | for (unsigned i = 0; i < parent_step_indices.size; i++) { | ||
| 1679 | uint16_t parent_step_index = parent_step_indices.contents[i]; | ||
| 1680 | uint16_t parent_depth = self->steps.contents[parent_step_index].depth; | ||
| 1681 | TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; | ||
| 1682 | if (parent_symbol == ts_builtin_sym_error) continue; | ||
| 1683 | |||
| 1684 | // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's | ||
| 1685 | // root symbol is a terminal, then return an error. | ||
| 1686 | unsigned subgraph_index, exists; | ||
| 1687 | array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); | ||
| 1688 | if (!exists) { | ||
| 1689 | unsigned first_child_step_index = parent_step_index + 1; | ||
| 1690 | uint32_t j, child_exists; | ||
| 1691 | array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists); | ||
| 1692 | assert(child_exists); | ||
| 1693 | *error_offset = self->step_offsets.contents[j].byte_offset; | ||
| 1694 | all_patterns_are_valid = false; | ||
| 1695 | break; | ||
| 1696 | } | ||
| 1697 | |||
| 1698 | // Initialize an analysis state at every parse state in the table where | ||
| 1699 | // this parent symbol can occur. | ||
| 1700 | AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; | ||
| 1701 | analysis_state_set__clear(&analysis.states, &analysis.state_pool); | ||
| 1702 | analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); | ||
| 1703 | for (unsigned j = 0; j < subgraph->start_states.size; j++) { | ||
| 1704 | TSStateId parse_state = subgraph->start_states.contents[j]; | ||
| 1705 | analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { | ||
| 1706 | .step_index = parent_step_index + 1, | ||
| 1707 | .stack = { | ||
| 1708 | [0] = { | ||
| 1709 | .parse_state = parse_state, | ||
| 1710 | .parent_symbol = parent_symbol, | ||
| 1711 | .child_index = 0, | ||
| 1712 | .field_id = 0, | ||
| 1713 | .done = false, | ||
| 1714 | }, | ||
| 1715 | }, | ||
| 1716 | .depth = 1, | ||
| 1717 | .root_symbol = parent_symbol, | ||
| 1718 | })); | ||
| 1719 | } | ||
| 1720 | |||
| 1721 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1722 | printf( | ||
| 1723 | "\nWalk states for %s:\n", | ||
| 1724 | ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol) | ||
| 1725 | ); | ||
| 1726 | #endif | ||
| 1727 | |||
| 1728 | analysis.did_abort = false; | ||
| 1729 | ts_query__perform_analysis(self, &subgraphs, &analysis); | ||
| 1730 | |||
| 1731 | // If this pattern could not be fully analyzed, then every step should | ||
| 1732 | // be considered fallible. | ||
| 1733 | if (analysis.did_abort) { | ||
| 1734 | for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { | ||
| 1735 | QueryStep *step = &self->steps.contents[j]; | ||
| 1736 | if ( | ||
| 1737 | step->depth <= parent_depth || | ||
| 1738 | step->depth == PATTERN_DONE_MARKER | ||
| 1739 | ) break; | ||
| 1740 | if (!step->is_dead_end) { | ||
| 1741 | step->parent_pattern_guaranteed = false; | ||
| 1742 | step->root_pattern_guaranteed = false; | ||
| 1743 | } | ||
| 1744 | } | ||
| 1745 | continue; | ||
| 1746 | } | ||
| 1747 | |||
| 1748 | // If this pattern cannot match, store the pattern index so that it can be | ||
| 1749 | // returned to the caller. | ||
| 1750 | if (analysis.finished_parent_symbols.size == 0) { | ||
| 1751 | assert(analysis.final_step_indices.size > 0); | ||
| 1752 | uint16_t impossible_step_index = *array_back(&analysis.final_step_indices); | ||
| 1753 | uint32_t j, impossible_exists; | ||
| 1754 | array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists); | ||
| 1755 | if (j >= self->step_offsets.size) j = self->step_offsets.size - 1; | ||
| 1756 | *error_offset = self->step_offsets.contents[j].byte_offset; | ||
| 1757 | all_patterns_are_valid = false; | ||
| 1758 | break; | ||
| 1759 | } | ||
| 1760 | |||
| 1761 | // Mark as fallible any step where a match terminated. | ||
| 1762 | // Later, this property will be propagated to all of the step's predecessors. | ||
| 1763 | for (unsigned j = 0; j < analysis.final_step_indices.size; j++) { | ||
| 1764 | uint32_t final_step_index = analysis.final_step_indices.contents[j]; | ||
| 1765 | QueryStep *step = &self->steps.contents[final_step_index]; | ||
| 1766 | if ( | ||
| 1767 | step->depth != PATTERN_DONE_MARKER && | ||
| 1768 | step->depth > parent_depth && | ||
| 1769 | !step->is_dead_end | ||
| 1770 | ) { | ||
| 1771 | step->parent_pattern_guaranteed = false; | ||
| 1772 | step->root_pattern_guaranteed = false; | ||
| 1773 | } | ||
| 1774 | } | ||
| 1775 | } | ||
| 1776 | |||
| 1777 | // Mark as indefinite any step with captures that are used in predicates. | ||
| 1778 | Array(uint16_t) predicate_capture_ids = array_new(); | ||
| 1779 | for (unsigned i = 0; i < self->patterns.size; i++) { | ||
| 1780 | QueryPattern *pattern = &self->patterns.contents[i]; | ||
| 1781 | |||
| 1782 | // Gather all of the captures that are used in predicates for this pattern. | ||
| 1783 | array_clear(&predicate_capture_ids); | ||
| 1784 | for ( | ||
| 1785 | unsigned start = pattern->predicate_steps.offset, | ||
| 1786 | end = start + pattern->predicate_steps.length, | ||
| 1787 | j = start; j < end; j++ | ||
| 1788 | ) { | ||
| 1789 | TSQueryPredicateStep *step = &self->predicate_steps.contents[j]; | ||
| 1790 | if (step->type == TSQueryPredicateStepTypeCapture) { | ||
| 1791 | uint16_t value_id = step->value_id; | ||
| 1792 | array_insert_sorted_by(&predicate_capture_ids, , value_id); | ||
| 1793 | } | ||
| 1794 | } | ||
| 1795 | |||
| 1796 | // Find all of the steps that have these captures. | ||
| 1797 | for ( | ||
| 1798 | unsigned start = pattern->steps.offset, | ||
| 1799 | end = start + pattern->steps.length, | ||
| 1800 | j = start; j < end; j++ | ||
| 1801 | ) { | ||
| 1802 | QueryStep *step = &self->steps.contents[j]; | ||
| 1803 | for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) { | ||
| 1804 | uint16_t capture_id = step->capture_ids[k]; | ||
| 1805 | if (capture_id == NONE) break; | ||
| 1806 | unsigned index, exists; | ||
| 1807 | array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists); | ||
| 1808 | if (exists) { | ||
| 1809 | step->root_pattern_guaranteed = false; | ||
| 1810 | break; | ||
| 1811 | } | ||
| 1812 | } | ||
| 1813 | } | ||
| 1814 | } | ||
| 1815 | |||
| 1816 | // Propagate fallibility. If a pattern is fallible at a given step, then it is | ||
| 1817 | // fallible at all of its preceding steps. | ||
| 1818 | bool done = self->steps.size == 0; | ||
| 1819 | while (!done) { | ||
| 1820 | done = true; | ||
| 1821 | for (unsigned i = self->steps.size - 1; i > 0; i--) { | ||
| 1822 | QueryStep *step = &self->steps.contents[i]; | ||
| 1823 | if (step->depth == PATTERN_DONE_MARKER) continue; | ||
| 1824 | |||
| 1825 | // Determine if this step is definite or has definite alternatives. | ||
| 1826 | bool parent_pattern_guaranteed = false; | ||
| 1827 | for (;;) { | ||
| 1828 | if (step->root_pattern_guaranteed) { | ||
| 1829 | parent_pattern_guaranteed = true; | ||
| 1830 | break; | ||
| 1831 | } | ||
| 1832 | if (step->alternative_index == NONE || step->alternative_index < i) { | ||
| 1833 | break; | ||
| 1834 | } | ||
| 1835 | step = &self->steps.contents[step->alternative_index]; | ||
| 1836 | } | ||
| 1837 | |||
| 1838 | // If not, mark its predecessor as indefinite. | ||
| 1839 | if (!parent_pattern_guaranteed) { | ||
| 1840 | QueryStep *prev_step = &self->steps.contents[i - 1]; | ||
| 1841 | if ( | ||
| 1842 | !prev_step->is_dead_end && | ||
| 1843 | prev_step->depth != PATTERN_DONE_MARKER && | ||
| 1844 | prev_step->root_pattern_guaranteed | ||
| 1845 | ) { | ||
| 1846 | prev_step->root_pattern_guaranteed = false; | ||
| 1847 | done = false; | ||
| 1848 | } | ||
| 1849 | } | ||
| 1850 | } | ||
| 1851 | } | ||
| 1852 | |||
| 1853 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1854 | printf("Steps:\n"); | ||
| 1855 | for (unsigned i = 0; i < self->steps.size; i++) { | ||
| 1856 | QueryStep *step = &self->steps.contents[i]; | ||
| 1857 | if (step->depth == PATTERN_DONE_MARKER) { | ||
| 1858 | printf(" %u: DONE\n", i); | ||
| 1859 | } else { | ||
| 1860 | printf( | ||
| 1861 | " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n", | ||
| 1862 | i, | ||
| 1863 | (step->symbol == WILDCARD_SYMBOL) | ||
| 1864 | ? "ANY" | ||
| 1865 | : ts_language_symbol_name(self->language, step->symbol), | ||
| 1866 | (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"), | ||
| 1867 | step->depth, | ||
| 1868 | step->parent_pattern_guaranteed, | ||
| 1869 | step->root_pattern_guaranteed | ||
| 1870 | ); | ||
| 1871 | } | ||
| 1872 | } | ||
| 1873 | #endif | ||
| 1874 | |||
| 1875 | // Determine which repetition symbols in this language have the possibility | ||
| 1876 | // of matching non-rooted patterns in this query. These repetition symbols | ||
| 1877 | // prevent certain optimizations with range restrictions. | ||
| 1878 | analysis.did_abort = false; | ||
| 1879 | for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { | ||
| 1880 | uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i]; | ||
| 1881 | PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index]; | ||
| 1882 | |||
| 1883 | analysis_state_set__clear(&analysis.states, &analysis.state_pool); | ||
| 1884 | analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); | ||
| 1885 | for (unsigned j = 0; j < subgraphs.size; j++) { | ||
| 1886 | AnalysisSubgraph *subgraph = &subgraphs.contents[j]; | ||
| 1887 | TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol); | ||
| 1888 | if (metadata.visible || metadata.named) continue; | ||
| 1889 | |||
| 1890 | for (uint32_t k = 0; k < subgraph->start_states.size; k++) { | ||
| 1891 | TSStateId parse_state = subgraph->start_states.contents[k]; | ||
| 1892 | analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { | ||
| 1893 | .step_index = pattern_entry->step_index, | ||
| 1894 | .stack = { | ||
| 1895 | [0] = { | ||
| 1896 | .parse_state = parse_state, | ||
| 1897 | .parent_symbol = subgraph->symbol, | ||
| 1898 | .child_index = 0, | ||
| 1899 | .field_id = 0, | ||
| 1900 | .done = false, | ||
| 1901 | }, | ||
| 1902 | }, | ||
| 1903 | .root_symbol = subgraph->symbol, | ||
| 1904 | .depth = 1, | ||
| 1905 | })); | ||
| 1906 | } | ||
| 1907 | } | ||
| 1908 | |||
| 1909 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1910 | printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index); | ||
| 1911 | #endif | ||
| 1912 | |||
| 1913 | ts_query__perform_analysis( | ||
| 1914 | self, | ||
| 1915 | &subgraphs, | ||
| 1916 | &analysis | ||
| 1917 | ); | ||
| 1918 | |||
| 1919 | if (analysis.finished_parent_symbols.size > 0) { | ||
| 1920 | self->patterns.contents[pattern_entry->pattern_index].is_non_local = true; | ||
| 1921 | } | ||
| 1922 | |||
| 1923 | for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { | ||
| 1924 | TSSymbol symbol = analysis.finished_parent_symbols.contents[k]; | ||
| 1925 | array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); | ||
| 1926 | } | ||
| 1927 | } | ||
| 1928 | |||
| 1929 | #ifdef DEBUG_ANALYZE_QUERY | ||
| 1930 | if (self->repeat_symbols_with_rootless_patterns.size > 0) { | ||
| 1931 | printf("\nRepetition symbols with rootless patterns:\n"); | ||
| 1932 | printf("aborted analysis: %d\n", analysis.did_abort); | ||
| 1933 | for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { | ||
| 1934 | TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i]; | ||
| 1935 | printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); | ||
| 1936 | } | ||
| 1937 | printf("\n"); | ||
| 1938 | } | ||
| 1939 | #endif | ||
| 1940 | |||
| 1941 | // Cleanup | ||
| 1942 | for (unsigned i = 0; i < subgraphs.size; i++) { | ||
| 1943 | array_delete(&subgraphs.contents[i].start_states); | ||
| 1944 | array_delete(&subgraphs.contents[i].nodes); | ||
| 1945 | } | ||
| 1946 | array_delete(&subgraphs); | ||
| 1947 | query_analysis__delete(&analysis); | ||
| 1948 | array_delete(&next_nodes); | ||
| 1949 | array_delete(&non_rooted_pattern_start_steps); | ||
| 1950 | array_delete(&parent_step_indices); | ||
| 1951 | array_delete(&predicate_capture_ids); | ||
| 1952 | state_predecessor_map_delete(&predecessor_map); | ||
| 1953 | |||
| 1954 | return all_patterns_are_valid; | ||
| 1955 | } | ||
| 1956 | |||
| 1957 | static void ts_query__add_negated_fields( | ||
| 1958 | TSQuery *self, | ||
| 1959 | uint16_t step_index, | ||
| 1960 | TSFieldId *field_ids, | ||
| 1961 | uint16_t field_count | ||
| 1962 | ) { | ||
| 1963 | QueryStep *step = &self->steps.contents[step_index]; | ||
| 1964 | |||
| 1965 | // The negated field array stores a list of field lists, separated by zeros. | ||
| 1966 | // Try to find the start index of an existing list that matches this new list. | ||
| 1967 | bool failed_match = false; | ||
| 1968 | unsigned match_count = 0; | ||
| 1969 | unsigned start_i = 0; | ||
| 1970 | for (unsigned i = 0; i < self->negated_fields.size; i++) { | ||
| 1971 | TSFieldId existing_field_id = self->negated_fields.contents[i]; | ||
| 1972 | |||
| 1973 | // At each zero value, terminate the match attempt. If we've exactly | ||
| 1974 | // matched the new field list, then reuse this index. Otherwise, | ||
| 1975 | // start over the matching process. | ||
| 1976 | if (existing_field_id == 0) { | ||
| 1977 | if (match_count == field_count) { | ||
| 1978 | step->negated_field_list_id = start_i; | ||
| 1979 | return; | ||
| 1980 | } else { | ||
| 1981 | start_i = i + 1; | ||
| 1982 | match_count = 0; | ||
| 1983 | failed_match = false; | ||
| 1984 | } | ||
| 1985 | } | ||
| 1986 | |||
| 1987 | // If the existing list matches our new list so far, then advance | ||
| 1988 | // to the next element of the new list. | ||
| 1989 | else if ( | ||
| 1990 | match_count < field_count && | ||
| 1991 | existing_field_id == field_ids[match_count] && | ||
| 1992 | !failed_match | ||
| 1993 | ) { | ||
| 1994 | match_count++; | ||
| 1995 | } | ||
| 1996 | |||
| 1997 | // Otherwise, this existing list has failed to match. | ||
| 1998 | else { | ||
| 1999 | match_count = 0; | ||
| 2000 | failed_match = true; | ||
| 2001 | } | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | step->negated_field_list_id = self->negated_fields.size; | ||
| 2005 | array_extend(&self->negated_fields, field_count, field_ids); | ||
| 2006 | array_push(&self->negated_fields, 0); | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | static TSQueryError ts_query__parse_string_literal( | ||
| 2010 | TSQuery *self, | ||
| 2011 | Stream *stream | ||
| 2012 | ) { | ||
| 2013 | const char *string_start = stream->input; | ||
| 2014 | if (stream->next != '"') return TSQueryErrorSyntax; | ||
| 2015 | stream_advance(stream); | ||
| 2016 | const char *prev_position = stream->input; | ||
| 2017 | |||
| 2018 | bool is_escaped = false; | ||
| 2019 | array_clear(&self->string_buffer); | ||
| 2020 | for (;;) { | ||
| 2021 | if (is_escaped) { | ||
| 2022 | is_escaped = false; | ||
| 2023 | switch (stream->next) { | ||
| 2024 | case 'n': | ||
| 2025 | array_push(&self->string_buffer, '\n'); | ||
| 2026 | break; | ||
| 2027 | case 'r': | ||
| 2028 | array_push(&self->string_buffer, '\r'); | ||
| 2029 | break; | ||
| 2030 | case 't': | ||
| 2031 | array_push(&self->string_buffer, '\t'); | ||
| 2032 | break; | ||
| 2033 | case '0': | ||
| 2034 | array_push(&self->string_buffer, '\0'); | ||
| 2035 | break; | ||
| 2036 | default: | ||
| 2037 | array_extend(&self->string_buffer, stream->next_size, stream->input); | ||
| 2038 | break; | ||
| 2039 | } | ||
| 2040 | prev_position = stream->input + stream->next_size; | ||
| 2041 | } else { | ||
| 2042 | if (stream->next == '\\') { | ||
| 2043 | array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); | ||
| 2044 | prev_position = stream->input + 1; | ||
| 2045 | is_escaped = true; | ||
| 2046 | } else if (stream->next == '"') { | ||
| 2047 | array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); | ||
| 2048 | stream_advance(stream); | ||
| 2049 | return TSQueryErrorNone; | ||
| 2050 | } else if (stream->next == '\n') { | ||
| 2051 | stream_reset(stream, string_start); | ||
| 2052 | return TSQueryErrorSyntax; | ||
| 2053 | } | ||
| 2054 | } | ||
| 2055 | if (!stream_advance(stream)) { | ||
| 2056 | stream_reset(stream, string_start); | ||
| 2057 | return TSQueryErrorSyntax; | ||
| 2058 | } | ||
| 2059 | } | ||
| 2060 | } | ||
| 2061 | |||
| 2062 | // Parse a single predicate associated with a pattern, adding it to the | ||
| 2063 | // query's internal `predicate_steps` array. Predicates are arbitrary | ||
| 2064 | // S-expressions associated with a pattern which are meant to be handled at | ||
| 2065 | // a higher level of abstraction, such as the Rust/JavaScript bindings. They | ||
| 2066 | // can contain '@'-prefixed capture names, double-quoted strings, and bare | ||
| 2067 | // symbols, which also represent strings. | ||
| 2068 | static TSQueryError ts_query__parse_predicate( | ||
| 2069 | TSQuery *self, | ||
| 2070 | Stream *stream | ||
| 2071 | ) { | ||
| 2072 | if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; | ||
| 2073 | const char *predicate_name = stream->input; | ||
| 2074 | stream_scan_identifier(stream); | ||
| 2075 | uint32_t length = (uint32_t)(stream->input - predicate_name); | ||
| 2076 | uint16_t id = symbol_table_insert_name( | ||
| 2077 | &self->predicate_values, | ||
| 2078 | predicate_name, | ||
| 2079 | length | ||
| 2080 | ); | ||
| 2081 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { | ||
| 2082 | .type = TSQueryPredicateStepTypeString, | ||
| 2083 | .value_id = id, | ||
| 2084 | })); | ||
| 2085 | stream_skip_whitespace(stream); | ||
| 2086 | |||
| 2087 | for (;;) { | ||
| 2088 | if (stream->next == ')') { | ||
| 2089 | stream_advance(stream); | ||
| 2090 | stream_skip_whitespace(stream); | ||
| 2091 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { | ||
| 2092 | .type = TSQueryPredicateStepTypeDone, | ||
| 2093 | .value_id = 0, | ||
| 2094 | })); | ||
| 2095 | break; | ||
| 2096 | } | ||
| 2097 | |||
| 2098 | // Parse an '@'-prefixed capture name | ||
| 2099 | else if (stream->next == '@') { | ||
| 2100 | stream_advance(stream); | ||
| 2101 | |||
| 2102 | // Parse the capture name | ||
| 2103 | if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; | ||
| 2104 | const char *capture_name = stream->input; | ||
| 2105 | stream_scan_identifier(stream); | ||
| 2106 | uint32_t capture_length = (uint32_t)(stream->input - capture_name); | ||
| 2107 | |||
| 2108 | // Add the capture id to the first step of the pattern | ||
| 2109 | int capture_id = symbol_table_id_for_name( | ||
| 2110 | &self->captures, | ||
| 2111 | capture_name, | ||
| 2112 | capture_length | ||
| 2113 | ); | ||
| 2114 | if (capture_id == -1) { | ||
| 2115 | stream_reset(stream, capture_name); | ||
| 2116 | return TSQueryErrorCapture; | ||
| 2117 | } | ||
| 2118 | |||
| 2119 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { | ||
| 2120 | .type = TSQueryPredicateStepTypeCapture, | ||
| 2121 | .value_id = capture_id, | ||
| 2122 | })); | ||
| 2123 | } | ||
| 2124 | |||
| 2125 | // Parse a string literal | ||
| 2126 | else if (stream->next == '"') { | ||
| 2127 | TSQueryError e = ts_query__parse_string_literal(self, stream); | ||
| 2128 | if (e) return e; | ||
| 2129 | uint16_t query_id = symbol_table_insert_name( | ||
| 2130 | &self->predicate_values, | ||
| 2131 | self->string_buffer.contents, | ||
| 2132 | self->string_buffer.size | ||
| 2133 | ); | ||
| 2134 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { | ||
| 2135 | .type = TSQueryPredicateStepTypeString, | ||
| 2136 | .value_id = query_id, | ||
| 2137 | })); | ||
| 2138 | } | ||
| 2139 | |||
| 2140 | // Parse a bare symbol | ||
| 2141 | else if (stream_is_ident_start(stream)) { | ||
| 2142 | const char *symbol_start = stream->input; | ||
| 2143 | stream_scan_identifier(stream); | ||
| 2144 | uint32_t symbol_length = (uint32_t)(stream->input - symbol_start); | ||
| 2145 | uint16_t query_id = symbol_table_insert_name( | ||
| 2146 | &self->predicate_values, | ||
| 2147 | symbol_start, | ||
| 2148 | symbol_length | ||
| 2149 | ); | ||
| 2150 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { | ||
| 2151 | .type = TSQueryPredicateStepTypeString, | ||
| 2152 | .value_id = query_id, | ||
| 2153 | })); | ||
| 2154 | } | ||
| 2155 | |||
| 2156 | else { | ||
| 2157 | return TSQueryErrorSyntax; | ||
| 2158 | } | ||
| 2159 | |||
| 2160 | stream_skip_whitespace(stream); | ||
| 2161 | } | ||
| 2162 | |||
| 2163 | return 0; | ||
| 2164 | } | ||
| 2165 | |||
| 2166 | // Read one S-expression pattern from the stream, and incorporate it into | ||
| 2167 | // the query's internal state machine representation. For nested patterns, | ||
| 2168 | // this function calls itself recursively. | ||
| 2169 | // | ||
| 2170 | // The caller is responsible for passing in a dedicated CaptureQuantifiers. | ||
| 2171 | // These should not be shared between different calls to ts_query__parse_pattern! | ||
| 2172 | static TSQueryError ts_query__parse_pattern( | ||
| 2173 | TSQuery *self, | ||
| 2174 | Stream *stream, | ||
| 2175 | uint32_t depth, | ||
| 2176 | bool is_immediate, | ||
| 2177 | CaptureQuantifiers *capture_quantifiers | ||
| 2178 | ) { | ||
| 2179 | if (stream->next == 0) return TSQueryErrorSyntax; | ||
| 2180 | if (stream->next == ')' || stream->next == ']') return PARENT_DONE; | ||
| 2181 | |||
| 2182 | const uint32_t starting_step_index = self->steps.size; | ||
| 2183 | |||
| 2184 | // Store the byte offset of each step in the query. | ||
| 2185 | if ( | ||
| 2186 | self->step_offsets.size == 0 || | ||
| 2187 | array_back(&self->step_offsets)->step_index != starting_step_index | ||
| 2188 | ) { | ||
| 2189 | array_push(&self->step_offsets, ((StepOffset) { | ||
| 2190 | .step_index = starting_step_index, | ||
| 2191 | .byte_offset = stream_offset(stream), | ||
| 2192 | })); | ||
| 2193 | } | ||
| 2194 | |||
| 2195 | // An open bracket is the start of an alternation. | ||
| 2196 | if (stream->next == '[') { | ||
| 2197 | stream_advance(stream); | ||
| 2198 | stream_skip_whitespace(stream); | ||
| 2199 | |||
| 2200 | // Parse each branch, and add a placeholder step in between the branches. | ||
| 2201 | Array(uint32_t) branch_step_indices = array_new(); | ||
| 2202 | CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); | ||
| 2203 | for (;;) { | ||
| 2204 | uint32_t start_index = self->steps.size; | ||
| 2205 | TSQueryError e = ts_query__parse_pattern( | ||
| 2206 | self, | ||
| 2207 | stream, | ||
| 2208 | depth, | ||
| 2209 | is_immediate, | ||
| 2210 | &branch_capture_quantifiers | ||
| 2211 | ); | ||
| 2212 | |||
| 2213 | if (e == PARENT_DONE) { | ||
| 2214 | if (stream->next == ']' && branch_step_indices.size > 0) { | ||
| 2215 | stream_advance(stream); | ||
| 2216 | break; | ||
| 2217 | } | ||
| 2218 | e = TSQueryErrorSyntax; | ||
| 2219 | } | ||
| 2220 | if (e) { | ||
| 2221 | capture_quantifiers_delete(&branch_capture_quantifiers); | ||
| 2222 | array_delete(&branch_step_indices); | ||
| 2223 | return e; | ||
| 2224 | } | ||
| 2225 | |||
| 2226 | if (start_index == starting_step_index) { | ||
| 2227 | capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); | ||
| 2228 | } else { | ||
| 2229 | capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); | ||
| 2230 | } | ||
| 2231 | |||
| 2232 | array_push(&branch_step_indices, start_index); | ||
| 2233 | array_push(&self->steps, query_step__new(0, depth, false)); | ||
| 2234 | capture_quantifiers_clear(&branch_capture_quantifiers); | ||
| 2235 | } | ||
| 2236 | (void)array_pop(&self->steps); | ||
| 2237 | |||
| 2238 | // For all of the branches except for the last one, add the subsequent branch as an | ||
| 2239 | // alternative, and link the end of the branch to the current end of the steps. | ||
| 2240 | for (unsigned i = 0; i < branch_step_indices.size - 1; i++) { | ||
| 2241 | uint32_t step_index = branch_step_indices.contents[i]; | ||
| 2242 | uint32_t next_step_index = branch_step_indices.contents[i + 1]; | ||
| 2243 | QueryStep *start_step = &self->steps.contents[step_index]; | ||
| 2244 | QueryStep *end_step = &self->steps.contents[next_step_index - 1]; | ||
| 2245 | start_step->alternative_index = next_step_index; | ||
| 2246 | end_step->alternative_index = self->steps.size; | ||
| 2247 | end_step->is_dead_end = true; | ||
| 2248 | } | ||
| 2249 | |||
| 2250 | capture_quantifiers_delete(&branch_capture_quantifiers); | ||
| 2251 | array_delete(&branch_step_indices); | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | // An open parenthesis can be the start of three possible constructs: | ||
| 2255 | // * A grouped sequence | ||
| 2256 | // * A predicate | ||
| 2257 | // * A named node | ||
| 2258 | else if (stream->next == '(') { | ||
| 2259 | stream_advance(stream); | ||
| 2260 | stream_skip_whitespace(stream); | ||
| 2261 | |||
| 2262 | // If this parenthesis is followed by a node, then it represents a grouped sequence. | ||
| 2263 | if (stream->next == '(' || stream->next == '"' || stream->next == '[') { | ||
| 2264 | bool child_is_immediate = is_immediate; | ||
| 2265 | CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); | ||
| 2266 | for (;;) { | ||
| 2267 | if (stream->next == '.') { | ||
| 2268 | child_is_immediate = true; | ||
| 2269 | stream_advance(stream); | ||
| 2270 | stream_skip_whitespace(stream); | ||
| 2271 | } | ||
| 2272 | TSQueryError e = ts_query__parse_pattern( | ||
| 2273 | self, | ||
| 2274 | stream, | ||
| 2275 | depth, | ||
| 2276 | child_is_immediate, | ||
| 2277 | &child_capture_quantifiers | ||
| 2278 | ); | ||
| 2279 | if (e == PARENT_DONE) { | ||
| 2280 | if (stream->next == ')') { | ||
| 2281 | stream_advance(stream); | ||
| 2282 | break; | ||
| 2283 | } | ||
| 2284 | e = TSQueryErrorSyntax; | ||
| 2285 | } | ||
| 2286 | if (e) { | ||
| 2287 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2288 | return e; | ||
| 2289 | } | ||
| 2290 | |||
| 2291 | capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); | ||
| 2292 | capture_quantifiers_clear(&child_capture_quantifiers); | ||
| 2293 | child_is_immediate = false; | ||
| 2294 | } | ||
| 2295 | |||
| 2296 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2297 | } | ||
| 2298 | |||
| 2299 | // A dot/pound character indicates the start of a predicate. | ||
| 2300 | else if (stream->next == '.' || stream->next == '#') { | ||
| 2301 | stream_advance(stream); | ||
| 2302 | return ts_query__parse_predicate(self, stream); | ||
| 2303 | } | ||
| 2304 | |||
| 2305 | // Otherwise, this parenthesis is the start of a named node. | ||
| 2306 | else { | ||
| 2307 | TSSymbol symbol; | ||
| 2308 | |||
| 2309 | // Parse a normal node name | ||
| 2310 | if (stream_is_ident_start(stream)) { | ||
| 2311 | const char *node_name = stream->input; | ||
| 2312 | stream_scan_identifier(stream); | ||
| 2313 | uint32_t length = (uint32_t)(stream->input - node_name); | ||
| 2314 | |||
| 2315 | // Parse the wildcard symbol | ||
| 2316 | if (length == 1 && node_name[0] == '_') { | ||
| 2317 | symbol = WILDCARD_SYMBOL; | ||
| 2318 | } | ||
| 2319 | |||
| 2320 | else { | ||
| 2321 | symbol = ts_language_symbol_for_name( | ||
| 2322 | self->language, | ||
| 2323 | node_name, | ||
| 2324 | length, | ||
| 2325 | true | ||
| 2326 | ); | ||
| 2327 | if (!symbol) { | ||
| 2328 | stream_reset(stream, node_name); | ||
| 2329 | return TSQueryErrorNodeType; | ||
| 2330 | } | ||
| 2331 | } | ||
| 2332 | } else { | ||
| 2333 | return TSQueryErrorSyntax; | ||
| 2334 | } | ||
| 2335 | |||
| 2336 | // Add a step for the node. | ||
| 2337 | array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); | ||
| 2338 | QueryStep *step = array_back(&self->steps); | ||
| 2339 | if (ts_language_symbol_metadata(self->language, symbol).supertype) { | ||
| 2340 | step->supertype_symbol = step->symbol; | ||
| 2341 | step->symbol = WILDCARD_SYMBOL; | ||
| 2342 | } | ||
| 2343 | if (symbol == WILDCARD_SYMBOL) { | ||
| 2344 | step->is_named = true; | ||
| 2345 | } | ||
| 2346 | |||
| 2347 | stream_skip_whitespace(stream); | ||
| 2348 | |||
| 2349 | if (stream->next == '/') { | ||
| 2350 | stream_advance(stream); | ||
| 2351 | if (!stream_is_ident_start(stream)) { | ||
| 2352 | return TSQueryErrorSyntax; | ||
| 2353 | } | ||
| 2354 | |||
| 2355 | const char *node_name = stream->input; | ||
| 2356 | stream_scan_identifier(stream); | ||
| 2357 | uint32_t length = (uint32_t)(stream->input - node_name); | ||
| 2358 | |||
| 2359 | step->symbol = ts_language_symbol_for_name( | ||
| 2360 | self->language, | ||
| 2361 | node_name, | ||
| 2362 | length, | ||
| 2363 | true | ||
| 2364 | ); | ||
| 2365 | if (!step->symbol) { | ||
| 2366 | stream_reset(stream, node_name); | ||
| 2367 | return TSQueryErrorNodeType; | ||
| 2368 | } | ||
| 2369 | |||
| 2370 | stream_skip_whitespace(stream); | ||
| 2371 | } | ||
| 2372 | |||
| 2373 | // Parse the child patterns | ||
| 2374 | bool child_is_immediate = false; | ||
| 2375 | uint16_t last_child_step_index = 0; | ||
| 2376 | uint16_t negated_field_count = 0; | ||
| 2377 | TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; | ||
| 2378 | CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); | ||
| 2379 | for (;;) { | ||
| 2380 | // Parse a negated field assertion | ||
| 2381 | if (stream->next == '!') { | ||
| 2382 | stream_advance(stream); | ||
| 2383 | stream_skip_whitespace(stream); | ||
| 2384 | if (!stream_is_ident_start(stream)) { | ||
| 2385 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2386 | return TSQueryErrorSyntax; | ||
| 2387 | } | ||
| 2388 | const char *field_name = stream->input; | ||
| 2389 | stream_scan_identifier(stream); | ||
| 2390 | uint32_t length = (uint32_t)(stream->input - field_name); | ||
| 2391 | stream_skip_whitespace(stream); | ||
| 2392 | |||
| 2393 | TSFieldId field_id = ts_language_field_id_for_name( | ||
| 2394 | self->language, | ||
| 2395 | field_name, | ||
| 2396 | length | ||
| 2397 | ); | ||
| 2398 | if (!field_id) { | ||
| 2399 | stream->input = field_name; | ||
| 2400 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2401 | return TSQueryErrorField; | ||
| 2402 | } | ||
| 2403 | |||
| 2404 | // Keep the field ids sorted. | ||
| 2405 | if (negated_field_count < MAX_NEGATED_FIELD_COUNT) { | ||
| 2406 | negated_field_ids[negated_field_count] = field_id; | ||
| 2407 | negated_field_count++; | ||
| 2408 | } | ||
| 2409 | |||
| 2410 | continue; | ||
| 2411 | } | ||
| 2412 | |||
| 2413 | // Parse a sibling anchor | ||
| 2414 | if (stream->next == '.') { | ||
| 2415 | child_is_immediate = true; | ||
| 2416 | stream_advance(stream); | ||
| 2417 | stream_skip_whitespace(stream); | ||
| 2418 | } | ||
| 2419 | |||
| 2420 | uint16_t step_index = self->steps.size; | ||
| 2421 | TSQueryError e = ts_query__parse_pattern( | ||
| 2422 | self, | ||
| 2423 | stream, | ||
| 2424 | depth + 1, | ||
| 2425 | child_is_immediate, | ||
| 2426 | &child_capture_quantifiers | ||
| 2427 | ); | ||
| 2428 | if (e == PARENT_DONE) { | ||
| 2429 | if (stream->next == ')') { | ||
| 2430 | if (child_is_immediate) { | ||
| 2431 | if (last_child_step_index == 0) { | ||
| 2432 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2433 | return TSQueryErrorSyntax; | ||
| 2434 | } | ||
| 2435 | self->steps.contents[last_child_step_index].is_last_child = true; | ||
| 2436 | } | ||
| 2437 | |||
| 2438 | if (negated_field_count) { | ||
| 2439 | ts_query__add_negated_fields( | ||
| 2440 | self, | ||
| 2441 | starting_step_index, | ||
| 2442 | negated_field_ids, | ||
| 2443 | negated_field_count | ||
| 2444 | ); | ||
| 2445 | } | ||
| 2446 | |||
| 2447 | stream_advance(stream); | ||
| 2448 | break; | ||
| 2449 | } | ||
| 2450 | e = TSQueryErrorSyntax; | ||
| 2451 | } | ||
| 2452 | if (e) { | ||
| 2453 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2454 | return e; | ||
| 2455 | } | ||
| 2456 | |||
| 2457 | capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); | ||
| 2458 | |||
| 2459 | last_child_step_index = step_index; | ||
| 2460 | child_is_immediate = false; | ||
| 2461 | capture_quantifiers_clear(&child_capture_quantifiers); | ||
| 2462 | } | ||
| 2463 | capture_quantifiers_delete(&child_capture_quantifiers); | ||
| 2464 | } | ||
| 2465 | } | ||
| 2466 | |||
| 2467 | // Parse a wildcard pattern | ||
| 2468 | else if (stream->next == '_') { | ||
| 2469 | stream_advance(stream); | ||
| 2470 | stream_skip_whitespace(stream); | ||
| 2471 | |||
| 2472 | // Add a step that matches any kind of node | ||
| 2473 | array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); | ||
| 2474 | } | ||
| 2475 | |||
| 2476 | // Parse a double-quoted anonymous leaf node expression | ||
| 2477 | else if (stream->next == '"') { | ||
| 2478 | const char *string_start = stream->input; | ||
| 2479 | TSQueryError e = ts_query__parse_string_literal(self, stream); | ||
| 2480 | if (e) return e; | ||
| 2481 | |||
| 2482 | // Add a step for the node | ||
| 2483 | TSSymbol symbol = ts_language_symbol_for_name( | ||
| 2484 | self->language, | ||
| 2485 | self->string_buffer.contents, | ||
| 2486 | self->string_buffer.size, | ||
| 2487 | false | ||
| 2488 | ); | ||
| 2489 | if (!symbol) { | ||
| 2490 | stream_reset(stream, string_start + 1); | ||
| 2491 | return TSQueryErrorNodeType; | ||
| 2492 | } | ||
| 2493 | array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); | ||
| 2494 | } | ||
| 2495 | |||
| 2496 | // Parse a field-prefixed pattern | ||
| 2497 | else if (stream_is_ident_start(stream)) { | ||
| 2498 | // Parse the field name | ||
| 2499 | const char *field_name = stream->input; | ||
| 2500 | stream_scan_identifier(stream); | ||
| 2501 | uint32_t length = (uint32_t)(stream->input - field_name); | ||
| 2502 | stream_skip_whitespace(stream); | ||
| 2503 | |||
| 2504 | if (stream->next != ':') { | ||
| 2505 | stream_reset(stream, field_name); | ||
| 2506 | return TSQueryErrorSyntax; | ||
| 2507 | } | ||
| 2508 | stream_advance(stream); | ||
| 2509 | stream_skip_whitespace(stream); | ||
| 2510 | |||
| 2511 | // Parse the pattern | ||
| 2512 | CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); | ||
| 2513 | TSQueryError e = ts_query__parse_pattern( | ||
| 2514 | self, | ||
| 2515 | stream, | ||
| 2516 | depth, | ||
| 2517 | is_immediate, | ||
| 2518 | &field_capture_quantifiers | ||
| 2519 | ); | ||
| 2520 | if (e) { | ||
| 2521 | capture_quantifiers_delete(&field_capture_quantifiers); | ||
| 2522 | if (e == PARENT_DONE) e = TSQueryErrorSyntax; | ||
| 2523 | return e; | ||
| 2524 | } | ||
| 2525 | |||
| 2526 | // Add the field name to the first step of the pattern | ||
| 2527 | TSFieldId field_id = ts_language_field_id_for_name( | ||
| 2528 | self->language, | ||
| 2529 | field_name, | ||
| 2530 | length | ||
| 2531 | ); | ||
| 2532 | if (!field_id) { | ||
| 2533 | stream->input = field_name; | ||
| 2534 | return TSQueryErrorField; | ||
| 2535 | } | ||
| 2536 | |||
| 2537 | uint32_t step_index = starting_step_index; | ||
| 2538 | QueryStep *step = &self->steps.contents[step_index]; | ||
| 2539 | for (;;) { | ||
| 2540 | step->field = field_id; | ||
| 2541 | if ( | ||
| 2542 | step->alternative_index != NONE && | ||
| 2543 | step->alternative_index > step_index && | ||
| 2544 | step->alternative_index < self->steps.size | ||
| 2545 | ) { | ||
| 2546 | step_index = step->alternative_index; | ||
| 2547 | step = &self->steps.contents[step_index]; | ||
| 2548 | } else { | ||
| 2549 | break; | ||
| 2550 | } | ||
| 2551 | } | ||
| 2552 | |||
| 2553 | capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers); | ||
| 2554 | capture_quantifiers_delete(&field_capture_quantifiers); | ||
| 2555 | } | ||
| 2556 | |||
| 2557 | else { | ||
| 2558 | return TSQueryErrorSyntax; | ||
| 2559 | } | ||
| 2560 | |||
| 2561 | stream_skip_whitespace(stream); | ||
| 2562 | |||
| 2563 | // Parse suffixes modifiers for this pattern | ||
| 2564 | TSQuantifier quantifier = TSQuantifierOne; | ||
| 2565 | for (;;) { | ||
| 2566 | // Parse the one-or-more operator. | ||
| 2567 | if (stream->next == '+') { | ||
| 2568 | quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); | ||
| 2569 | |||
| 2570 | stream_advance(stream); | ||
| 2571 | stream_skip_whitespace(stream); | ||
| 2572 | |||
| 2573 | QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); | ||
| 2574 | repeat_step.alternative_index = starting_step_index; | ||
| 2575 | repeat_step.is_pass_through = true; | ||
| 2576 | repeat_step.alternative_is_immediate = true; | ||
| 2577 | array_push(&self->steps, repeat_step); | ||
| 2578 | } | ||
| 2579 | |||
| 2580 | // Parse the zero-or-more repetition operator. | ||
| 2581 | else if (stream->next == '*') { | ||
| 2582 | quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); | ||
| 2583 | |||
| 2584 | stream_advance(stream); | ||
| 2585 | stream_skip_whitespace(stream); | ||
| 2586 | |||
| 2587 | QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); | ||
| 2588 | repeat_step.alternative_index = starting_step_index; | ||
| 2589 | repeat_step.is_pass_through = true; | ||
| 2590 | repeat_step.alternative_is_immediate = true; | ||
| 2591 | array_push(&self->steps, repeat_step); | ||
| 2592 | |||
| 2593 | // Stop when `step->alternative_index` is `NONE` or it points to | ||
| 2594 | // `repeat_step` or beyond. Note that having just been pushed, | ||
| 2595 | // `repeat_step` occupies slot `self->steps.size - 1`. | ||
| 2596 | QueryStep *step = &self->steps.contents[starting_step_index]; | ||
| 2597 | while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) { | ||
| 2598 | step = &self->steps.contents[step->alternative_index]; | ||
| 2599 | } | ||
| 2600 | step->alternative_index = self->steps.size; | ||
| 2601 | } | ||
| 2602 | |||
| 2603 | // Parse the optional operator. | ||
| 2604 | else if (stream->next == '?') { | ||
| 2605 | quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); | ||
| 2606 | |||
| 2607 | stream_advance(stream); | ||
| 2608 | stream_skip_whitespace(stream); | ||
| 2609 | |||
| 2610 | QueryStep *step = &self->steps.contents[starting_step_index]; | ||
| 2611 | while (step->alternative_index != NONE && step->alternative_index < self->steps.size) { | ||
| 2612 | step = &self->steps.contents[step->alternative_index]; | ||
| 2613 | } | ||
| 2614 | step->alternative_index = self->steps.size; | ||
| 2615 | } | ||
| 2616 | |||
| 2617 | // Parse an '@'-prefixed capture pattern | ||
| 2618 | else if (stream->next == '@') { | ||
| 2619 | stream_advance(stream); | ||
| 2620 | if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; | ||
| 2621 | const char *capture_name = stream->input; | ||
| 2622 | stream_scan_identifier(stream); | ||
| 2623 | uint32_t length = (uint32_t)(stream->input - capture_name); | ||
| 2624 | stream_skip_whitespace(stream); | ||
| 2625 | |||
| 2626 | // Add the capture id to the first step of the pattern | ||
| 2627 | uint16_t capture_id = symbol_table_insert_name( | ||
| 2628 | &self->captures, | ||
| 2629 | capture_name, | ||
| 2630 | length | ||
| 2631 | ); | ||
| 2632 | |||
| 2633 | // Add the capture quantifier | ||
| 2634 | capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); | ||
| 2635 | |||
| 2636 | uint32_t step_index = starting_step_index; | ||
| 2637 | for (;;) { | ||
| 2638 | QueryStep *step = &self->steps.contents[step_index]; | ||
| 2639 | query_step__add_capture(step, capture_id); | ||
| 2640 | if ( | ||
| 2641 | step->alternative_index != NONE && | ||
| 2642 | step->alternative_index > step_index && | ||
| 2643 | step->alternative_index < self->steps.size | ||
| 2644 | ) { | ||
| 2645 | step_index = step->alternative_index; | ||
| 2646 | } else { | ||
| 2647 | break; | ||
| 2648 | } | ||
| 2649 | } | ||
| 2650 | } | ||
| 2651 | |||
| 2652 | // No more suffix modifiers | ||
| 2653 | else { | ||
| 2654 | break; | ||
| 2655 | } | ||
| 2656 | } | ||
| 2657 | |||
| 2658 | capture_quantifiers_mul(capture_quantifiers, quantifier); | ||
| 2659 | |||
| 2660 | return 0; | ||
| 2661 | } | ||
| 2662 | |||
| 2663 | TSQuery *ts_query_new( | ||
| 2664 | const TSLanguage *language, | ||
| 2665 | const char *source, | ||
| 2666 | uint32_t source_len, | ||
| 2667 | uint32_t *error_offset, | ||
| 2668 | TSQueryError *error_type | ||
| 2669 | ) { | ||
| 2670 | if ( | ||
| 2671 | !language || | ||
| 2672 | language->version > TREE_SITTER_LANGUAGE_VERSION || | ||
| 2673 | language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION | ||
| 2674 | ) { | ||
| 2675 | *error_type = TSQueryErrorLanguage; | ||
| 2676 | return NULL; | ||
| 2677 | } | ||
| 2678 | |||
| 2679 | TSQuery *self = ts_malloc(sizeof(TSQuery)); | ||
| 2680 | *self = (TSQuery) { | ||
| 2681 | .steps = array_new(), | ||
| 2682 | .pattern_map = array_new(), | ||
| 2683 | .captures = symbol_table_new(), | ||
| 2684 | .capture_quantifiers = array_new(), | ||
| 2685 | .predicate_values = symbol_table_new(), | ||
| 2686 | .predicate_steps = array_new(), | ||
| 2687 | .patterns = array_new(), | ||
| 2688 | .step_offsets = array_new(), | ||
| 2689 | .string_buffer = array_new(), | ||
| 2690 | .negated_fields = array_new(), | ||
| 2691 | .repeat_symbols_with_rootless_patterns = array_new(), | ||
| 2692 | .wildcard_root_pattern_count = 0, | ||
| 2693 | .language = ts_language_copy(language), | ||
| 2694 | }; | ||
| 2695 | |||
| 2696 | array_push(&self->negated_fields, 0); | ||
| 2697 | |||
| 2698 | // Parse all of the S-expressions in the given string. | ||
| 2699 | Stream stream = stream_new(source, source_len); | ||
| 2700 | stream_skip_whitespace(&stream); | ||
| 2701 | while (stream.input < stream.end) { | ||
| 2702 | uint32_t pattern_index = self->patterns.size; | ||
| 2703 | uint32_t start_step_index = self->steps.size; | ||
| 2704 | uint32_t start_predicate_step_index = self->predicate_steps.size; | ||
| 2705 | array_push(&self->patterns, ((QueryPattern) { | ||
| 2706 | .steps = (Slice) {.offset = start_step_index}, | ||
| 2707 | .predicate_steps = (Slice) {.offset = start_predicate_step_index}, | ||
| 2708 | .start_byte = stream_offset(&stream), | ||
| 2709 | .is_non_local = false, | ||
| 2710 | })); | ||
| 2711 | CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); | ||
| 2712 | *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); | ||
| 2713 | array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); | ||
| 2714 | |||
| 2715 | QueryPattern *pattern = array_back(&self->patterns); | ||
| 2716 | pattern->steps.length = self->steps.size - start_step_index; | ||
| 2717 | pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index; | ||
| 2718 | |||
| 2719 | // If any pattern could not be parsed, then report the error information | ||
| 2720 | // and terminate. | ||
| 2721 | if (*error_type) { | ||
| 2722 | if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; | ||
| 2723 | *error_offset = stream_offset(&stream); | ||
| 2724 | capture_quantifiers_delete(&capture_quantifiers); | ||
| 2725 | ts_query_delete(self); | ||
| 2726 | return NULL; | ||
| 2727 | } | ||
| 2728 | |||
| 2729 | // Maintain a list of capture quantifiers for each pattern | ||
| 2730 | array_push(&self->capture_quantifiers, capture_quantifiers); | ||
| 2731 | |||
| 2732 | // Maintain a map that can look up patterns for a given root symbol. | ||
| 2733 | uint16_t wildcard_root_alternative_index = NONE; | ||
| 2734 | for (;;) { | ||
| 2735 | QueryStep *step = &self->steps.contents[start_step_index]; | ||
| 2736 | |||
| 2737 | // If a pattern has a wildcard at its root, but it has a non-wildcard child, | ||
| 2738 | // then optimize the matching process by skipping matching the wildcard. | ||
| 2739 | // Later, during the matching process, the query cursor will check that | ||
| 2740 | // there is a parent node, and capture it if necessary. | ||
| 2741 | if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) { | ||
| 2742 | QueryStep *second_step = &self->steps.contents[start_step_index + 1]; | ||
| 2743 | if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) { | ||
| 2744 | wildcard_root_alternative_index = step->alternative_index; | ||
| 2745 | start_step_index += 1; | ||
| 2746 | step = second_step; | ||
| 2747 | } | ||
| 2748 | } | ||
| 2749 | |||
| 2750 | // Determine whether the pattern has a single root node. This affects | ||
| 2751 | // decisions about whether or not to start matching the pattern when | ||
| 2752 | // a query cursor has a range restriction or when immediately within an | ||
| 2753 | // error node. | ||
| 2754 | uint32_t start_depth = step->depth; | ||
| 2755 | bool is_rooted = start_depth == 0; | ||
| 2756 | for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { | ||
| 2757 | QueryStep *child_step = &self->steps.contents[step_index]; | ||
| 2758 | if (child_step->is_dead_end) break; | ||
| 2759 | if (child_step->depth == start_depth) { | ||
| 2760 | is_rooted = false; | ||
| 2761 | break; | ||
| 2762 | } | ||
| 2763 | } | ||
| 2764 | |||
| 2765 | ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) { | ||
| 2766 | .step_index = start_step_index, | ||
| 2767 | .pattern_index = pattern_index, | ||
| 2768 | .is_rooted = is_rooted | ||
| 2769 | }); | ||
| 2770 | if (step->symbol == WILDCARD_SYMBOL) { | ||
| 2771 | self->wildcard_root_pattern_count++; | ||
| 2772 | } | ||
| 2773 | |||
| 2774 | // If there are alternatives or options at the root of the pattern, | ||
| 2775 | // then add multiple entries to the pattern map. | ||
| 2776 | if (step->alternative_index != NONE) { | ||
| 2777 | start_step_index = step->alternative_index; | ||
| 2778 | } else if (wildcard_root_alternative_index != NONE) { | ||
| 2779 | start_step_index = wildcard_root_alternative_index; | ||
| 2780 | wildcard_root_alternative_index = NONE; | ||
| 2781 | } else { | ||
| 2782 | break; | ||
| 2783 | } | ||
| 2784 | } | ||
| 2785 | } | ||
| 2786 | |||
| 2787 | if (!ts_query__analyze_patterns(self, error_offset)) { | ||
| 2788 | *error_type = TSQueryErrorStructure; | ||
| 2789 | ts_query_delete(self); | ||
| 2790 | return NULL; | ||
| 2791 | } | ||
| 2792 | |||
| 2793 | array_delete(&self->string_buffer); | ||
| 2794 | return self; | ||
| 2795 | } | ||
| 2796 | |||
| 2797 | void ts_query_delete(TSQuery *self) { | ||
| 2798 | if (self) { | ||
| 2799 | array_delete(&self->steps); | ||
| 2800 | array_delete(&self->pattern_map); | ||
| 2801 | array_delete(&self->predicate_steps); | ||
| 2802 | array_delete(&self->patterns); | ||
| 2803 | array_delete(&self->step_offsets); | ||
| 2804 | array_delete(&self->string_buffer); | ||
| 2805 | array_delete(&self->negated_fields); | ||
| 2806 | array_delete(&self->repeat_symbols_with_rootless_patterns); | ||
| 2807 | ts_language_delete(self->language); | ||
| 2808 | symbol_table_delete(&self->captures); | ||
| 2809 | symbol_table_delete(&self->predicate_values); | ||
| 2810 | for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { | ||
| 2811 | CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index); | ||
| 2812 | capture_quantifiers_delete(capture_quantifiers); | ||
| 2813 | } | ||
| 2814 | array_delete(&self->capture_quantifiers); | ||
| 2815 | ts_free(self); | ||
| 2816 | } | ||
| 2817 | } | ||
| 2818 | |||
| 2819 | uint32_t ts_query_pattern_count(const TSQuery *self) { | ||
| 2820 | return self->patterns.size; | ||
| 2821 | } | ||
| 2822 | |||
| 2823 | uint32_t ts_query_capture_count(const TSQuery *self) { | ||
| 2824 | return self->captures.slices.size; | ||
| 2825 | } | ||
| 2826 | |||
| 2827 | uint32_t ts_query_string_count(const TSQuery *self) { | ||
| 2828 | return self->predicate_values.slices.size; | ||
| 2829 | } | ||
| 2830 | |||
| 2831 | const char *ts_query_capture_name_for_id( | ||
| 2832 | const TSQuery *self, | ||
| 2833 | uint32_t index, | ||
| 2834 | uint32_t *length | ||
| 2835 | ) { | ||
| 2836 | return symbol_table_name_for_id(&self->captures, index, length); | ||
| 2837 | } | ||
| 2838 | |||
| 2839 | TSQuantifier ts_query_capture_quantifier_for_id( | ||
| 2840 | const TSQuery *self, | ||
| 2841 | uint32_t pattern_index, | ||
| 2842 | uint32_t capture_index | ||
| 2843 | ) { | ||
| 2844 | CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index); | ||
| 2845 | return capture_quantifier_for_id(capture_quantifiers, capture_index); | ||
| 2846 | } | ||
| 2847 | |||
| 2848 | const char *ts_query_string_value_for_id( | ||
| 2849 | const TSQuery *self, | ||
| 2850 | uint32_t index, | ||
| 2851 | uint32_t *length | ||
| 2852 | ) { | ||
| 2853 | return symbol_table_name_for_id(&self->predicate_values, index, length); | ||
| 2854 | } | ||
| 2855 | |||
| 2856 | const TSQueryPredicateStep *ts_query_predicates_for_pattern( | ||
| 2857 | const TSQuery *self, | ||
| 2858 | uint32_t pattern_index, | ||
| 2859 | uint32_t *step_count | ||
| 2860 | ) { | ||
| 2861 | Slice slice = self->patterns.contents[pattern_index].predicate_steps; | ||
| 2862 | *step_count = slice.length; | ||
| 2863 | if (self->predicate_steps.contents == NULL) { | ||
| 2864 | return NULL; | ||
| 2865 | } | ||
| 2866 | return &self->predicate_steps.contents[slice.offset]; | ||
| 2867 | } | ||
| 2868 | |||
| 2869 | uint32_t ts_query_start_byte_for_pattern( | ||
| 2870 | const TSQuery *self, | ||
| 2871 | uint32_t pattern_index | ||
| 2872 | ) { | ||
| 2873 | return self->patterns.contents[pattern_index].start_byte; | ||
| 2874 | } | ||
| 2875 | |||
| 2876 | bool ts_query_is_pattern_rooted( | ||
| 2877 | const TSQuery *self, | ||
| 2878 | uint32_t pattern_index | ||
| 2879 | ) { | ||
| 2880 | for (unsigned i = 0; i < self->pattern_map.size; i++) { | ||
| 2881 | PatternEntry *entry = &self->pattern_map.contents[i]; | ||
| 2882 | if (entry->pattern_index == pattern_index) { | ||
| 2883 | if (!entry->is_rooted) return false; | ||
| 2884 | } | ||
| 2885 | } | ||
| 2886 | return true; | ||
| 2887 | } | ||
| 2888 | |||
| 2889 | bool ts_query_is_pattern_non_local( | ||
| 2890 | const TSQuery *self, | ||
| 2891 | uint32_t pattern_index | ||
| 2892 | ) { | ||
| 2893 | if (pattern_index < self->patterns.size) { | ||
| 2894 | return self->patterns.contents[pattern_index].is_non_local; | ||
| 2895 | } else { | ||
| 2896 | return false; | ||
| 2897 | } | ||
| 2898 | } | ||
| 2899 | |||
| 2900 | bool ts_query_is_pattern_guaranteed_at_step( | ||
| 2901 | const TSQuery *self, | ||
| 2902 | uint32_t byte_offset | ||
| 2903 | ) { | ||
| 2904 | uint32_t step_index = UINT32_MAX; | ||
| 2905 | for (unsigned i = 0; i < self->step_offsets.size; i++) { | ||
| 2906 | StepOffset *step_offset = &self->step_offsets.contents[i]; | ||
| 2907 | if (step_offset->byte_offset > byte_offset) break; | ||
| 2908 | step_index = step_offset->step_index; | ||
| 2909 | } | ||
| 2910 | if (step_index < self->steps.size) { | ||
| 2911 | return self->steps.contents[step_index].root_pattern_guaranteed; | ||
| 2912 | } else { | ||
| 2913 | return false; | ||
| 2914 | } | ||
| 2915 | } | ||
| 2916 | |||
| 2917 | bool ts_query__step_is_fallible( | ||
| 2918 | const TSQuery *self, | ||
| 2919 | uint16_t step_index | ||
| 2920 | ) { | ||
| 2921 | assert((uint32_t)step_index + 1 < self->steps.size); | ||
| 2922 | QueryStep *step = &self->steps.contents[step_index]; | ||
| 2923 | QueryStep *next_step = &self->steps.contents[step_index + 1]; | ||
| 2924 | return ( | ||
| 2925 | next_step->depth != PATTERN_DONE_MARKER && | ||
| 2926 | next_step->depth > step->depth && | ||
| 2927 | !next_step->parent_pattern_guaranteed | ||
| 2928 | ); | ||
| 2929 | } | ||
| 2930 | |||
| 2931 | void ts_query_disable_capture( | ||
| 2932 | TSQuery *self, | ||
| 2933 | const char *name, | ||
| 2934 | uint32_t length | ||
| 2935 | ) { | ||
| 2936 | // Remove capture information for any pattern step that previously | ||
| 2937 | // captured with the given name. | ||
| 2938 | int id = symbol_table_id_for_name(&self->captures, name, length); | ||
| 2939 | if (id != -1) { | ||
| 2940 | for (unsigned i = 0; i < self->steps.size; i++) { | ||
| 2941 | QueryStep *step = &self->steps.contents[i]; | ||
| 2942 | query_step__remove_capture(step, id); | ||
| 2943 | } | ||
| 2944 | } | ||
| 2945 | } | ||
| 2946 | |||
| 2947 | void ts_query_disable_pattern( | ||
| 2948 | TSQuery *self, | ||
| 2949 | uint32_t pattern_index | ||
| 2950 | ) { | ||
| 2951 | // Remove the given pattern from the pattern map. Its steps will still | ||
| 2952 | // be in the `steps` array, but they will never be read. | ||
| 2953 | for (unsigned i = 0; i < self->pattern_map.size; i++) { | ||
| 2954 | PatternEntry *pattern = &self->pattern_map.contents[i]; | ||
| 2955 | if (pattern->pattern_index == pattern_index) { | ||
| 2956 | array_erase(&self->pattern_map, i); | ||
| 2957 | i--; | ||
| 2958 | } | ||
| 2959 | } | ||
| 2960 | } | ||
| 2961 | |||
| 2962 | /*************** | ||
| 2963 | * QueryCursor | ||
| 2964 | ***************/ | ||
| 2965 | |||
| 2966 | TSQueryCursor *ts_query_cursor_new(void) { | ||
| 2967 | TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); | ||
| 2968 | *self = (TSQueryCursor) { | ||
| 2969 | .did_exceed_match_limit = false, | ||
| 2970 | .ascending = false, | ||
| 2971 | .halted = false, | ||
| 2972 | .states = array_new(), | ||
| 2973 | .finished_states = array_new(), | ||
| 2974 | .capture_list_pool = capture_list_pool_new(), | ||
| 2975 | .start_byte = 0, | ||
| 2976 | .end_byte = UINT32_MAX, | ||
| 2977 | .start_point = {0, 0}, | ||
| 2978 | .end_point = POINT_MAX, | ||
| 2979 | .max_start_depth = UINT32_MAX, | ||
| 2980 | }; | ||
| 2981 | array_reserve(&self->states, 8); | ||
| 2982 | array_reserve(&self->finished_states, 8); | ||
| 2983 | return self; | ||
| 2984 | } | ||
| 2985 | |||
| 2986 | void ts_query_cursor_delete(TSQueryCursor *self) { | ||
| 2987 | array_delete(&self->states); | ||
| 2988 | array_delete(&self->finished_states); | ||
| 2989 | ts_tree_cursor_delete(&self->cursor); | ||
| 2990 | capture_list_pool_delete(&self->capture_list_pool); | ||
| 2991 | ts_free(self); | ||
| 2992 | } | ||
| 2993 | |||
| 2994 | bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) { | ||
| 2995 | return self->did_exceed_match_limit; | ||
| 2996 | } | ||
| 2997 | |||
| 2998 | uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) { | ||
| 2999 | return self->capture_list_pool.max_capture_list_count; | ||
| 3000 | } | ||
| 3001 | |||
| 3002 | void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { | ||
| 3003 | self->capture_list_pool.max_capture_list_count = limit; | ||
| 3004 | } | ||
| 3005 | |||
| 3006 | #ifdef DEBUG_EXECUTE_QUERY | ||
| 3007 | #define LOG(...) fprintf(stderr, __VA_ARGS__) | ||
| 3008 | #else | ||
| 3009 | #define LOG(...) | ||
| 3010 | #endif | ||
| 3011 | |||
| 3012 | void ts_query_cursor_exec( | ||
| 3013 | TSQueryCursor *self, | ||
| 3014 | const TSQuery *query, | ||
| 3015 | TSNode node | ||
| 3016 | ) { | ||
| 3017 | if (query) { | ||
| 3018 | LOG("query steps:\n"); | ||
| 3019 | for (unsigned i = 0; i < query->steps.size; i++) { | ||
| 3020 | QueryStep *step = &query->steps.contents[i]; | ||
| 3021 | LOG(" %u: {", i); | ||
| 3022 | if (step->depth == PATTERN_DONE_MARKER) { | ||
| 3023 | LOG("DONE"); | ||
| 3024 | } else if (step->is_dead_end) { | ||
| 3025 | LOG("dead_end"); | ||
| 3026 | } else if (step->is_pass_through) { | ||
| 3027 | LOG("pass_through"); | ||
| 3028 | } else if (step->symbol != WILDCARD_SYMBOL) { | ||
| 3029 | LOG("symbol: %s", query->language->symbol_names[step->symbol]); | ||
| 3030 | } else { | ||
| 3031 | LOG("symbol: *"); | ||
| 3032 | } | ||
| 3033 | if (step->field) { | ||
| 3034 | LOG(", field: %s", query->language->field_names[step->field]); | ||
| 3035 | } | ||
| 3036 | if (step->alternative_index != NONE) { | ||
| 3037 | LOG(", alternative: %u", step->alternative_index); | ||
| 3038 | } | ||
| 3039 | LOG("},\n"); | ||
| 3040 | } | ||
| 3041 | } | ||
| 3042 | |||
| 3043 | array_clear(&self->states); | ||
| 3044 | array_clear(&self->finished_states); | ||
| 3045 | ts_tree_cursor_reset(&self->cursor, node); | ||
| 3046 | capture_list_pool_reset(&self->capture_list_pool); | ||
| 3047 | self->on_visible_node = true; | ||
| 3048 | self->next_state_id = 0; | ||
| 3049 | self->depth = 0; | ||
| 3050 | self->ascending = false; | ||
| 3051 | self->halted = false; | ||
| 3052 | self->query = query; | ||
| 3053 | self->did_exceed_match_limit = false; | ||
| 3054 | } | ||
| 3055 | |||
| 3056 | void ts_query_cursor_set_byte_range( | ||
| 3057 | TSQueryCursor *self, | ||
| 3058 | uint32_t start_byte, | ||
| 3059 | uint32_t end_byte | ||
| 3060 | ) { | ||
| 3061 | if (end_byte == 0) { | ||
| 3062 | end_byte = UINT32_MAX; | ||
| 3063 | } | ||
| 3064 | self->start_byte = start_byte; | ||
| 3065 | self->end_byte = end_byte; | ||
| 3066 | } | ||
| 3067 | |||
| 3068 | void ts_query_cursor_set_point_range( | ||
| 3069 | TSQueryCursor *self, | ||
| 3070 | TSPoint start_point, | ||
| 3071 | TSPoint end_point | ||
| 3072 | ) { | ||
| 3073 | if (end_point.row == 0 && end_point.column == 0) { | ||
| 3074 | end_point = POINT_MAX; | ||
| 3075 | } | ||
| 3076 | self->start_point = start_point; | ||
| 3077 | self->end_point = end_point; | ||
| 3078 | } | ||
| 3079 | |||
| 3080 | // Search through all of the in-progress states, and find the captured | ||
| 3081 | // node that occurs earliest in the document. | ||
| 3082 | static bool ts_query_cursor__first_in_progress_capture( | ||
| 3083 | TSQueryCursor *self, | ||
| 3084 | uint32_t *state_index, | ||
| 3085 | uint32_t *byte_offset, | ||
| 3086 | uint32_t *pattern_index, | ||
| 3087 | bool *root_pattern_guaranteed | ||
| 3088 | ) { | ||
| 3089 | bool result = false; | ||
| 3090 | *state_index = UINT32_MAX; | ||
| 3091 | *byte_offset = UINT32_MAX; | ||
| 3092 | *pattern_index = UINT32_MAX; | ||
| 3093 | for (unsigned i = 0; i < self->states.size; i++) { | ||
| 3094 | QueryState *state = &self->states.contents[i]; | ||
| 3095 | if (state->dead) continue; | ||
| 3096 | |||
| 3097 | const CaptureList *captures = capture_list_pool_get( | ||
| 3098 | &self->capture_list_pool, | ||
| 3099 | state->capture_list_id | ||
| 3100 | ); | ||
| 3101 | if (state->consumed_capture_count >= captures->size) { | ||
| 3102 | continue; | ||
| 3103 | } | ||
| 3104 | |||
| 3105 | TSNode node = captures->contents[state->consumed_capture_count].node; | ||
| 3106 | if ( | ||
| 3107 | ts_node_end_byte(node) <= self->start_byte || | ||
| 3108 | point_lte(ts_node_end_point(node), self->start_point) | ||
| 3109 | ) { | ||
| 3110 | state->consumed_capture_count++; | ||
| 3111 | i--; | ||
| 3112 | continue; | ||
| 3113 | } | ||
| 3114 | |||
| 3115 | uint32_t node_start_byte = ts_node_start_byte(node); | ||
| 3116 | if ( | ||
| 3117 | !result || | ||
| 3118 | node_start_byte < *byte_offset || | ||
| 3119 | (node_start_byte == *byte_offset && state->pattern_index < *pattern_index) | ||
| 3120 | ) { | ||
| 3121 | QueryStep *step = &self->query->steps.contents[state->step_index]; | ||
| 3122 | if (root_pattern_guaranteed) { | ||
| 3123 | *root_pattern_guaranteed = step->root_pattern_guaranteed; | ||
| 3124 | } else if (step->root_pattern_guaranteed) { | ||
| 3125 | continue; | ||
| 3126 | } | ||
| 3127 | |||
| 3128 | result = true; | ||
| 3129 | *state_index = i; | ||
| 3130 | *byte_offset = node_start_byte; | ||
| 3131 | *pattern_index = state->pattern_index; | ||
| 3132 | } | ||
| 3133 | } | ||
| 3134 | return result; | ||
| 3135 | } | ||
| 3136 | |||
| 3137 | // Determine which node is first in a depth-first traversal | ||
| 3138 | int ts_query_cursor__compare_nodes(TSNode left, TSNode right) { | ||
| 3139 | if (left.id != right.id) { | ||
| 3140 | uint32_t left_start = ts_node_start_byte(left); | ||
| 3141 | uint32_t right_start = ts_node_start_byte(right); | ||
| 3142 | if (left_start < right_start) return -1; | ||
| 3143 | if (left_start > right_start) return 1; | ||
| 3144 | uint32_t left_node_count = ts_node_end_byte(left); | ||
| 3145 | uint32_t right_node_count = ts_node_end_byte(right); | ||
| 3146 | if (left_node_count > right_node_count) return -1; | ||
| 3147 | if (left_node_count < right_node_count) return 1; | ||
| 3148 | } | ||
| 3149 | return 0; | ||
| 3150 | } | ||
| 3151 | |||
| 3152 | // Determine if either state contains a superset of the other state's captures. | ||
| 3153 | void ts_query_cursor__compare_captures( | ||
| 3154 | TSQueryCursor *self, | ||
| 3155 | QueryState *left_state, | ||
| 3156 | QueryState *right_state, | ||
| 3157 | bool *left_contains_right, | ||
| 3158 | bool *right_contains_left | ||
| 3159 | ) { | ||
| 3160 | const CaptureList *left_captures = capture_list_pool_get( | ||
| 3161 | &self->capture_list_pool, | ||
| 3162 | left_state->capture_list_id | ||
| 3163 | ); | ||
| 3164 | const CaptureList *right_captures = capture_list_pool_get( | ||
| 3165 | &self->capture_list_pool, | ||
| 3166 | right_state->capture_list_id | ||
| 3167 | ); | ||
| 3168 | *left_contains_right = true; | ||
| 3169 | *right_contains_left = true; | ||
| 3170 | unsigned i = 0, j = 0; | ||
| 3171 | for (;;) { | ||
| 3172 | if (i < left_captures->size) { | ||
| 3173 | if (j < right_captures->size) { | ||
| 3174 | TSQueryCapture *left = &left_captures->contents[i]; | ||
| 3175 | TSQueryCapture *right = &right_captures->contents[j]; | ||
| 3176 | if (left->node.id == right->node.id && left->index == right->index) { | ||
| 3177 | i++; | ||
| 3178 | j++; | ||
| 3179 | } else { | ||
| 3180 | switch (ts_query_cursor__compare_nodes(left->node, right->node)) { | ||
| 3181 | case -1: | ||
| 3182 | *right_contains_left = false; | ||
| 3183 | i++; | ||
| 3184 | break; | ||
| 3185 | case 1: | ||
| 3186 | *left_contains_right = false; | ||
| 3187 | j++; | ||
| 3188 | break; | ||
| 3189 | default: | ||
| 3190 | *right_contains_left = false; | ||
| 3191 | *left_contains_right = false; | ||
| 3192 | i++; | ||
| 3193 | j++; | ||
| 3194 | break; | ||
| 3195 | } | ||
| 3196 | } | ||
| 3197 | } else { | ||
| 3198 | *right_contains_left = false; | ||
| 3199 | break; | ||
| 3200 | } | ||
| 3201 | } else { | ||
| 3202 | if (j < right_captures->size) { | ||
| 3203 | *left_contains_right = false; | ||
| 3204 | } | ||
| 3205 | break; | ||
| 3206 | } | ||
| 3207 | } | ||
| 3208 | } | ||
| 3209 | |||
| 3210 | static void ts_query_cursor__add_state( | ||
| 3211 | TSQueryCursor *self, | ||
| 3212 | const PatternEntry *pattern | ||
| 3213 | ) { | ||
| 3214 | QueryStep *step = &self->query->steps.contents[pattern->step_index]; | ||
| 3215 | uint32_t start_depth = self->depth - step->depth; | ||
| 3216 | |||
| 3217 | // Keep the states array in ascending order of start_depth and pattern_index, | ||
| 3218 | // so that it can be processed more efficiently elsewhere. Usually, there is | ||
| 3219 | // no work to do here because of two facts: | ||
| 3220 | // * States with lower start_depth are naturally added first due to the | ||
| 3221 | // order in which nodes are visited. | ||
| 3222 | // * Earlier patterns are naturally added first because of the ordering of the | ||
| 3223 | // pattern_map data structure that's used to initiate matches. | ||
| 3224 | // | ||
| 3225 | // This loop is only needed in cases where two conditions hold: | ||
| 3226 | // * A pattern consists of more than one sibling node, so that its states | ||
| 3227 | // remain in progress after exiting the node that started the match. | ||
| 3228 | // * The first node in the pattern matches against multiple nodes at the | ||
| 3229 | // same depth. | ||
| 3230 | // | ||
| 3231 | // An example of this is the pattern '((comment)* (function))'. If multiple | ||
| 3232 | // `comment` nodes appear in a row, then we may initiate a new state for this | ||
| 3233 | // pattern while another state for the same pattern is already in progress. | ||
| 3234 | // If there are multiple patterns like this in a query, then this loop will | ||
| 3235 | // need to execute in order to keep the states ordered by pattern_index. | ||
| 3236 | uint32_t index = self->states.size; | ||
| 3237 | while (index > 0) { | ||
| 3238 | QueryState *prev_state = &self->states.contents[index - 1]; | ||
| 3239 | if (prev_state->start_depth < start_depth) break; | ||
| 3240 | if (prev_state->start_depth == start_depth) { | ||
| 3241 | // Avoid inserting an unnecessary duplicate state, which would be | ||
| 3242 | // immediately pruned by the longest-match criteria. | ||
| 3243 | if ( | ||
| 3244 | prev_state->pattern_index == pattern->pattern_index && | ||
| 3245 | prev_state->step_index == pattern->step_index | ||
| 3246 | ) return; | ||
| 3247 | if (prev_state->pattern_index <= pattern->pattern_index) break; | ||
| 3248 | } | ||
| 3249 | index--; | ||
| 3250 | } | ||
| 3251 | |||
| 3252 | LOG( | ||
| 3253 | " start state. pattern:%u, step:%u\n", | ||
| 3254 | pattern->pattern_index, | ||
| 3255 | pattern->step_index | ||
| 3256 | ); | ||
| 3257 | array_insert(&self->states, index, ((QueryState) { | ||
| 3258 | .id = UINT32_MAX, | ||
| 3259 | .capture_list_id = NONE, | ||
| 3260 | .step_index = pattern->step_index, | ||
| 3261 | .pattern_index = pattern->pattern_index, | ||
| 3262 | .start_depth = start_depth, | ||
| 3263 | .consumed_capture_count = 0, | ||
| 3264 | .seeking_immediate_match = true, | ||
| 3265 | .has_in_progress_alternatives = false, | ||
| 3266 | .needs_parent = step->depth == 1, | ||
| 3267 | .dead = false, | ||
| 3268 | })); | ||
| 3269 | } | ||
| 3270 | |||
| 3271 | // Acquire a capture list for this state. If there are no capture lists left in the | ||
| 3272 | // pool, this will steal the capture list from another existing state, and mark that | ||
| 3273 | // other state as 'dead'. | ||
| 3274 | static CaptureList *ts_query_cursor__prepare_to_capture( | ||
| 3275 | TSQueryCursor *self, | ||
| 3276 | QueryState *state, | ||
| 3277 | unsigned state_index_to_preserve | ||
| 3278 | ) { | ||
| 3279 | if (state->capture_list_id == NONE) { | ||
| 3280 | state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); | ||
| 3281 | |||
| 3282 | // If there are no capture lists left in the pool, then terminate whichever | ||
| 3283 | // state has captured the earliest node in the document, and steal its | ||
| 3284 | // capture list. | ||
| 3285 | if (state->capture_list_id == NONE) { | ||
| 3286 | self->did_exceed_match_limit = true; | ||
| 3287 | uint32_t state_index, byte_offset, pattern_index; | ||
| 3288 | if ( | ||
| 3289 | ts_query_cursor__first_in_progress_capture( | ||
| 3290 | self, | ||
| 3291 | &state_index, | ||
| 3292 | &byte_offset, | ||
| 3293 | &pattern_index, | ||
| 3294 | NULL | ||
| 3295 | ) && | ||
| 3296 | state_index != state_index_to_preserve | ||
| 3297 | ) { | ||
| 3298 | LOG( | ||
| 3299 | " abandon state. index:%u, pattern:%u, offset:%u.\n", | ||
| 3300 | state_index, pattern_index, byte_offset | ||
| 3301 | ); | ||
| 3302 | QueryState *other_state = &self->states.contents[state_index]; | ||
| 3303 | state->capture_list_id = other_state->capture_list_id; | ||
| 3304 | other_state->capture_list_id = NONE; | ||
| 3305 | other_state->dead = true; | ||
| 3306 | CaptureList *list = capture_list_pool_get_mut( | ||
| 3307 | &self->capture_list_pool, | ||
| 3308 | state->capture_list_id | ||
| 3309 | ); | ||
| 3310 | array_clear(list); | ||
| 3311 | return list; | ||
| 3312 | } else { | ||
| 3313 | LOG(" ran out of capture lists"); | ||
| 3314 | return NULL; | ||
| 3315 | } | ||
| 3316 | } | ||
| 3317 | } | ||
| 3318 | return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id); | ||
| 3319 | } | ||
| 3320 | |||
| 3321 | static void ts_query_cursor__capture( | ||
| 3322 | TSQueryCursor *self, | ||
| 3323 | QueryState *state, | ||
| 3324 | QueryStep *step, | ||
| 3325 | TSNode node | ||
| 3326 | ) { | ||
| 3327 | if (state->dead) return; | ||
| 3328 | CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); | ||
| 3329 | if (!capture_list) { | ||
| 3330 | state->dead = true; | ||
| 3331 | return; | ||
| 3332 | } | ||
| 3333 | |||
| 3334 | for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { | ||
| 3335 | uint16_t capture_id = step->capture_ids[j]; | ||
| 3336 | if (step->capture_ids[j] == NONE) break; | ||
| 3337 | array_push(capture_list, ((TSQueryCapture) { node, capture_id })); | ||
| 3338 | LOG( | ||
| 3339 | " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n", | ||
| 3340 | ts_node_type(node), | ||
| 3341 | state->pattern_index, | ||
| 3342 | capture_id, | ||
| 3343 | capture_list->size | ||
| 3344 | ); | ||
| 3345 | } | ||
| 3346 | } | ||
| 3347 | |||
| 3348 | // Duplicate the given state and insert the newly-created state immediately after | ||
| 3349 | // the given state in the `states` array. Ensures that the given state reference is | ||
| 3350 | // still valid, even if the states array is reallocated. | ||
| 3351 | static QueryState *ts_query_cursor__copy_state( | ||
| 3352 | TSQueryCursor *self, | ||
| 3353 | QueryState **state_ref | ||
| 3354 | ) { | ||
| 3355 | const QueryState *state = *state_ref; | ||
| 3356 | uint32_t state_index = (uint32_t)(state - self->states.contents); | ||
| 3357 | QueryState copy = *state; | ||
| 3358 | copy.capture_list_id = NONE; | ||
| 3359 | |||
| 3360 | // If the state has captures, copy its capture list. | ||
| 3361 | if (state->capture_list_id != NONE) { | ||
| 3362 | CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index); | ||
| 3363 | if (!new_captures) return NULL; | ||
| 3364 | const CaptureList *old_captures = capture_list_pool_get( | ||
| 3365 | &self->capture_list_pool, | ||
| 3366 | state->capture_list_id | ||
| 3367 | ); | ||
| 3368 | array_push_all(new_captures, old_captures); | ||
| 3369 | } | ||
| 3370 | |||
| 3371 | array_insert(&self->states, state_index + 1, copy); | ||
| 3372 | *state_ref = &self->states.contents[state_index]; | ||
| 3373 | return &self->states.contents[state_index + 1]; | ||
| 3374 | } | ||
| 3375 | |||
| 3376 | static inline bool ts_query_cursor__should_descend( | ||
| 3377 | TSQueryCursor *self, | ||
| 3378 | bool node_intersects_range | ||
| 3379 | ) { | ||
| 3380 | |||
| 3381 | if (node_intersects_range && self->depth < self->max_start_depth) { | ||
| 3382 | return true; | ||
| 3383 | } | ||
| 3384 | |||
| 3385 | // If there are in-progress matches whose remaining steps occur | ||
| 3386 | // deeper in the tree, then descend. | ||
| 3387 | for (unsigned i = 0; i < self->states.size; i++) { | ||
| 3388 | QueryState *state = &self->states.contents[i];; | ||
| 3389 | QueryStep *next_step = &self->query->steps.contents[state->step_index]; | ||
| 3390 | if ( | ||
| 3391 | next_step->depth != PATTERN_DONE_MARKER && | ||
| 3392 | state->start_depth + next_step->depth > self->depth | ||
| 3393 | ) { | ||
| 3394 | return true; | ||
| 3395 | } | ||
| 3396 | } | ||
| 3397 | |||
| 3398 | if (self->depth >= self->max_start_depth) { | ||
| 3399 | return false; | ||
| 3400 | } | ||
| 3401 | |||
| 3402 | // If the current node is hidden, then a non-rooted pattern might match | ||
| 3403 | // one if its roots inside of this node, and match another of its roots | ||
| 3404 | // as part of a sibling node, so we may need to descend. | ||
| 3405 | if (!self->on_visible_node) { | ||
| 3406 | // Descending into a repetition node outside of the range can be | ||
| 3407 | // expensive, because these nodes can have many visible children. | ||
| 3408 | // Avoid descending into repetition nodes unless we have already | ||
| 3409 | // determined that this query can match rootless patterns inside | ||
| 3410 | // of this type of repetition node. | ||
| 3411 | Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor); | ||
| 3412 | if (ts_subtree_is_repetition(subtree)) { | ||
| 3413 | bool exists; | ||
| 3414 | uint32_t index; | ||
| 3415 | array_search_sorted_by( | ||
| 3416 | &self->query->repeat_symbols_with_rootless_patterns,, | ||
| 3417 | ts_subtree_symbol(subtree), | ||
| 3418 | &index, | ||
| 3419 | &exists | ||
| 3420 | ); | ||
| 3421 | return exists; | ||
| 3422 | } | ||
| 3423 | |||
| 3424 | return true; | ||
| 3425 | } | ||
| 3426 | |||
| 3427 | return false; | ||
| 3428 | } | ||
| 3429 | |||
| 3430 | // Walk the tree, processing patterns until at least one pattern finishes, | ||
| 3431 | // If one or more patterns finish, return `true` and store their states in the | ||
| 3432 | // `finished_states` array. Multiple patterns can finish on the same node. If | ||
| 3433 | // there are no more matches, return `false`. | ||
| 3434 | static inline bool ts_query_cursor__advance( | ||
| 3435 | TSQueryCursor *self, | ||
| 3436 | bool stop_on_definite_step | ||
| 3437 | ) { | ||
| 3438 | bool did_match = false; | ||
| 3439 | for (;;) { | ||
| 3440 | if (self->halted) { | ||
| 3441 | while (self->states.size > 0) { | ||
| 3442 | QueryState state = array_pop(&self->states); | ||
| 3443 | capture_list_pool_release( | ||
| 3444 | &self->capture_list_pool, | ||
| 3445 | state.capture_list_id | ||
| 3446 | ); | ||
| 3447 | } | ||
| 3448 | } | ||
| 3449 | |||
| 3450 | if (did_match || self->halted) return did_match; | ||
| 3451 | |||
| 3452 | // Exit the current node. | ||
| 3453 | if (self->ascending) { | ||
| 3454 | if (self->on_visible_node) { | ||
| 3455 | LOG( | ||
| 3456 | "leave node. depth:%u, type:%s\n", | ||
| 3457 | self->depth, | ||
| 3458 | ts_node_type(ts_tree_cursor_current_node(&self->cursor)) | ||
| 3459 | ); | ||
| 3460 | |||
| 3461 | // After leaving a node, remove any states that cannot make further progress. | ||
| 3462 | uint32_t deleted_count = 0; | ||
| 3463 | for (unsigned i = 0, n = self->states.size; i < n; i++) { | ||
| 3464 | QueryState *state = &self->states.contents[i]; | ||
| 3465 | QueryStep *step = &self->query->steps.contents[state->step_index]; | ||
| 3466 | |||
| 3467 | // If a state completed its pattern inside of this node, but was deferred from finishing | ||
| 3468 | // in order to search for longer matches, mark it as finished. | ||
| 3469 | if ( | ||
| 3470 | step->depth == PATTERN_DONE_MARKER && | ||
| 3471 | (state->start_depth > self->depth || self->depth == 0) | ||
| 3472 | ) { | ||
| 3473 | LOG(" finish pattern %u\n", state->pattern_index); | ||
| 3474 | array_push(&self->finished_states, *state); | ||
| 3475 | did_match = true; | ||
| 3476 | deleted_count++; | ||
| 3477 | } | ||
| 3478 | |||
| 3479 | // If a state needed to match something within this node, then remove that state | ||
| 3480 | // as it has failed to match. | ||
| 3481 | else if ( | ||
| 3482 | step->depth != PATTERN_DONE_MARKER && | ||
| 3483 | (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth | ||
| 3484 | ) { | ||
| 3485 | LOG( | ||
| 3486 | " failed to match. pattern:%u, step:%u\n", | ||
| 3487 | state->pattern_index, | ||
| 3488 | state->step_index | ||
| 3489 | ); | ||
| 3490 | capture_list_pool_release( | ||
| 3491 | &self->capture_list_pool, | ||
| 3492 | state->capture_list_id | ||
| 3493 | ); | ||
| 3494 | deleted_count++; | ||
| 3495 | } | ||
| 3496 | |||
| 3497 | else if (deleted_count > 0) { | ||
| 3498 | self->states.contents[i - deleted_count] = *state; | ||
| 3499 | } | ||
| 3500 | } | ||
| 3501 | self->states.size -= deleted_count; | ||
| 3502 | } | ||
| 3503 | |||
| 3504 | // Leave this node by stepping to its next sibling or to its parent. | ||
| 3505 | switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { | ||
| 3506 | case TreeCursorStepVisible: | ||
| 3507 | if (!self->on_visible_node) { | ||
| 3508 | self->depth++; | ||
| 3509 | self->on_visible_node = true; | ||
| 3510 | } | ||
| 3511 | self->ascending = false; | ||
| 3512 | break; | ||
| 3513 | case TreeCursorStepHidden: | ||
| 3514 | if (self->on_visible_node) { | ||
| 3515 | self->depth--; | ||
| 3516 | self->on_visible_node = false; | ||
| 3517 | } | ||
| 3518 | self->ascending = false; | ||
| 3519 | break; | ||
| 3520 | default: | ||
| 3521 | if (ts_tree_cursor_goto_parent(&self->cursor)) { | ||
| 3522 | self->depth--; | ||
| 3523 | } else { | ||
| 3524 | LOG("halt at root\n"); | ||
| 3525 | self->halted = true; | ||
| 3526 | } | ||
| 3527 | } | ||
| 3528 | } | ||
| 3529 | |||
| 3530 | // Enter a new node. | ||
| 3531 | else { | ||
| 3532 | // Get the properties of the current node. | ||
| 3533 | TSNode node = ts_tree_cursor_current_node(&self->cursor); | ||
| 3534 | TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); | ||
| 3535 | bool parent_precedes_range = !ts_node_is_null(parent_node) && ( | ||
| 3536 | ts_node_end_byte(parent_node) <= self->start_byte || | ||
| 3537 | point_lte(ts_node_end_point(parent_node), self->start_point) | ||
| 3538 | ); | ||
| 3539 | bool parent_follows_range = !ts_node_is_null(parent_node) && ( | ||
| 3540 | ts_node_start_byte(parent_node) >= self->end_byte || | ||
| 3541 | point_gte(ts_node_start_point(parent_node), self->end_point) | ||
| 3542 | ); | ||
| 3543 | bool node_precedes_range = parent_precedes_range || ( | ||
| 3544 | ts_node_end_byte(node) <= self->start_byte || | ||
| 3545 | point_lte(ts_node_end_point(node), self->start_point) | ||
| 3546 | ); | ||
| 3547 | bool node_follows_range = parent_follows_range || ( | ||
| 3548 | ts_node_start_byte(node) >= self->end_byte || | ||
| 3549 | point_gte(ts_node_start_point(node), self->end_point) | ||
| 3550 | ); | ||
| 3551 | bool parent_intersects_range = !parent_precedes_range && !parent_follows_range; | ||
| 3552 | bool node_intersects_range = !node_precedes_range && !node_follows_range; | ||
| 3553 | |||
| 3554 | if (self->on_visible_node) { | ||
| 3555 | TSSymbol symbol = ts_node_symbol(node); | ||
| 3556 | bool is_named = ts_node_is_named(node); | ||
| 3557 | bool has_later_siblings; | ||
| 3558 | bool has_later_named_siblings; | ||
| 3559 | bool can_have_later_siblings_with_this_field; | ||
| 3560 | TSFieldId field_id = 0; | ||
| 3561 | TSSymbol supertypes[8] = {0}; | ||
| 3562 | unsigned supertype_count = 8; | ||
| 3563 | ts_tree_cursor_current_status( | ||
| 3564 | &self->cursor, | ||
| 3565 | &field_id, | ||
| 3566 | &has_later_siblings, | ||
| 3567 | &has_later_named_siblings, | ||
| 3568 | &can_have_later_siblings_with_this_field, | ||
| 3569 | supertypes, | ||
| 3570 | &supertype_count | ||
| 3571 | ); | ||
| 3572 | LOG( | ||
| 3573 | "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", | ||
| 3574 | self->depth, | ||
| 3575 | ts_node_type(node), | ||
| 3576 | ts_language_field_name_for_id(self->query->language, field_id), | ||
| 3577 | ts_node_start_point(node).row, | ||
| 3578 | self->states.size, | ||
| 3579 | self->finished_states.size | ||
| 3580 | ); | ||
| 3581 | |||
| 3582 | bool node_is_error = symbol == ts_builtin_sym_error; | ||
| 3583 | bool parent_is_error = | ||
| 3584 | !ts_node_is_null(parent_node) && | ||
| 3585 | ts_node_symbol(parent_node) == ts_builtin_sym_error; | ||
| 3586 | |||
| 3587 | // Add new states for any patterns whose root node is a wildcard. | ||
| 3588 | if (!node_is_error) { | ||
| 3589 | for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { | ||
| 3590 | PatternEntry *pattern = &self->query->pattern_map.contents[i]; | ||
| 3591 | |||
| 3592 | // If this node matches the first step of the pattern, then add a new | ||
| 3593 | // state at the start of this pattern. | ||
| 3594 | QueryStep *step = &self->query->steps.contents[pattern->step_index]; | ||
| 3595 | uint32_t start_depth = self->depth - step->depth; | ||
| 3596 | if ( | ||
| 3597 | (pattern->is_rooted ? | ||
| 3598 | node_intersects_range : | ||
| 3599 | (parent_intersects_range && !parent_is_error)) && | ||
| 3600 | (!step->field || field_id == step->field) && | ||
| 3601 | (!step->supertype_symbol || supertype_count > 0) && | ||
| 3602 | (start_depth <= self->max_start_depth) | ||
| 3603 | ) { | ||
| 3604 | ts_query_cursor__add_state(self, pattern); | ||
| 3605 | } | ||
| 3606 | } | ||
| 3607 | } | ||
| 3608 | |||
| 3609 | // Add new states for any patterns whose root node matches this node. | ||
| 3610 | unsigned i; | ||
| 3611 | if (ts_query__pattern_map_search(self->query, symbol, &i)) { | ||
| 3612 | PatternEntry *pattern = &self->query->pattern_map.contents[i]; | ||
| 3613 | |||
| 3614 | QueryStep *step = &self->query->steps.contents[pattern->step_index]; | ||
| 3615 | uint32_t start_depth = self->depth - step->depth; | ||
| 3616 | do { | ||
| 3617 | // If this node matches the first step of the pattern, then add a new | ||
| 3618 | // state at the start of this pattern. | ||
| 3619 | if ( | ||
| 3620 | (pattern->is_rooted ? | ||
| 3621 | node_intersects_range : | ||
| 3622 | (parent_intersects_range && !parent_is_error)) && | ||
| 3623 | (!step->field || field_id == step->field) && | ||
| 3624 | (start_depth <= self->max_start_depth) | ||
| 3625 | ) { | ||
| 3626 | ts_query_cursor__add_state(self, pattern); | ||
| 3627 | } | ||
| 3628 | |||
| 3629 | // Advance to the next pattern whose root node matches this node. | ||
| 3630 | i++; | ||
| 3631 | if (i == self->query->pattern_map.size) break; | ||
| 3632 | pattern = &self->query->pattern_map.contents[i]; | ||
| 3633 | step = &self->query->steps.contents[pattern->step_index]; | ||
| 3634 | } while (step->symbol == symbol); | ||
| 3635 | } | ||
| 3636 | |||
| 3637 | // Update all of the in-progress states with current node. | ||
| 3638 | for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) { | ||
| 3639 | QueryState *state = &self->states.contents[j]; | ||
| 3640 | QueryStep *step = &self->query->steps.contents[state->step_index]; | ||
| 3641 | state->has_in_progress_alternatives = false; | ||
| 3642 | copy_count = 0; | ||
| 3643 | |||
| 3644 | // Check that the node matches all of the criteria for the next | ||
| 3645 | // step of the pattern. | ||
| 3646 | if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; | ||
| 3647 | |||
| 3648 | // Determine if this node matches this step of the pattern, and also | ||
| 3649 | // if this node can have later siblings that match this step of the | ||
| 3650 | // pattern. | ||
| 3651 | bool node_does_match = false; | ||
| 3652 | if (step->symbol == WILDCARD_SYMBOL) { | ||
| 3653 | node_does_match = !node_is_error && (is_named || !step->is_named); | ||
| 3654 | } else { | ||
| 3655 | node_does_match = symbol == step->symbol; | ||
| 3656 | } | ||
| 3657 | bool later_sibling_can_match = has_later_siblings; | ||
| 3658 | if ((step->is_immediate && is_named) || state->seeking_immediate_match) { | ||
| 3659 | later_sibling_can_match = false; | ||
| 3660 | } | ||
| 3661 | if (step->is_last_child && has_later_named_siblings) { | ||
| 3662 | node_does_match = false; | ||
| 3663 | } | ||
| 3664 | if (step->supertype_symbol) { | ||
| 3665 | bool has_supertype = false; | ||
| 3666 | for (unsigned k = 0; k < supertype_count; k++) { | ||
| 3667 | if (supertypes[k] == step->supertype_symbol) { | ||
| 3668 | has_supertype = true; | ||
| 3669 | break; | ||
| 3670 | } | ||
| 3671 | } | ||
| 3672 | if (!has_supertype) node_does_match = false; | ||
| 3673 | } | ||
| 3674 | if (step->field) { | ||
| 3675 | if (step->field == field_id) { | ||
| 3676 | if (!can_have_later_siblings_with_this_field) { | ||
| 3677 | later_sibling_can_match = false; | ||
| 3678 | } | ||
| 3679 | } else { | ||
| 3680 | node_does_match = false; | ||
| 3681 | } | ||
| 3682 | } | ||
| 3683 | |||
| 3684 | if (step->negated_field_list_id) { | ||
| 3685 | TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; | ||
| 3686 | for (;;) { | ||
| 3687 | TSFieldId negated_field_id = *negated_field_ids; | ||
| 3688 | if (negated_field_id) { | ||
| 3689 | negated_field_ids++; | ||
| 3690 | if (ts_node_child_by_field_id(node, negated_field_id).id) { | ||
| 3691 | node_does_match = false; | ||
| 3692 | break; | ||
| 3693 | } | ||
| 3694 | } else { | ||
| 3695 | break; | ||
| 3696 | } | ||
| 3697 | } | ||
| 3698 | } | ||
| 3699 | |||
| 3700 | // Remove states immediately if it is ever clear that they cannot match. | ||
| 3701 | if (!node_does_match) { | ||
| 3702 | if (!later_sibling_can_match) { | ||
| 3703 | LOG( | ||
| 3704 | " discard state. pattern:%u, step:%u\n", | ||
| 3705 | state->pattern_index, | ||
| 3706 | state->step_index | ||
| 3707 | ); | ||
| 3708 | capture_list_pool_release( | ||
| 3709 | &self->capture_list_pool, | ||
| 3710 | state->capture_list_id | ||
| 3711 | ); | ||
| 3712 | array_erase(&self->states, j); | ||
| 3713 | j--; | ||
| 3714 | } | ||
| 3715 | continue; | ||
| 3716 | } | ||
| 3717 | |||
| 3718 | // Some patterns can match their root node in multiple ways, capturing different | ||
| 3719 | // children. If this pattern step could match later children within the same | ||
| 3720 | // parent, then this query state cannot simply be updated in place. It must be | ||
| 3721 | // split into two states: one that matches this node, and one which skips over | ||
| 3722 | // this node, to preserve the possibility of matching later siblings. | ||
| 3723 | if (later_sibling_can_match && ( | ||
| 3724 | step->contains_captures || | ||
| 3725 | ts_query__step_is_fallible(self->query, state->step_index) | ||
| 3726 | )) { | ||
| 3727 | if (ts_query_cursor__copy_state(self, &state)) { | ||
| 3728 | LOG( | ||
| 3729 | " split state for capture. pattern:%u, step:%u\n", | ||
| 3730 | state->pattern_index, | ||
| 3731 | state->step_index | ||
| 3732 | ); | ||
| 3733 | copy_count++; | ||
| 3734 | } | ||
| 3735 | } | ||
| 3736 | |||
| 3737 | // If this pattern started with a wildcard, such that the pattern map | ||
| 3738 | // actually points to the *second* step of the pattern, then check | ||
| 3739 | // that the node has a parent, and capture the parent node if necessary. | ||
| 3740 | if (state->needs_parent) { | ||
| 3741 | TSNode parent = ts_tree_cursor_parent_node(&self->cursor); | ||
| 3742 | if (ts_node_is_null(parent)) { | ||
| 3743 | LOG(" missing parent node\n"); | ||
| 3744 | state->dead = true; | ||
| 3745 | } else { | ||
| 3746 | state->needs_parent = false; | ||
| 3747 | QueryStep *skipped_wildcard_step = step; | ||
| 3748 | do { | ||
| 3749 | skipped_wildcard_step--; | ||
| 3750 | } while ( | ||
| 3751 | skipped_wildcard_step->is_dead_end || | ||
| 3752 | skipped_wildcard_step->is_pass_through || | ||
| 3753 | skipped_wildcard_step->depth > 0 | ||
| 3754 | ); | ||
| 3755 | if (skipped_wildcard_step->capture_ids[0] != NONE) { | ||
| 3756 | LOG(" capture wildcard parent\n"); | ||
| 3757 | ts_query_cursor__capture( | ||
| 3758 | self, | ||
| 3759 | state, | ||
| 3760 | skipped_wildcard_step, | ||
| 3761 | parent | ||
| 3762 | ); | ||
| 3763 | } | ||
| 3764 | } | ||
| 3765 | } | ||
| 3766 | |||
| 3767 | // If the current node is captured in this pattern, add it to the capture list. | ||
| 3768 | if (step->capture_ids[0] != NONE) { | ||
| 3769 | ts_query_cursor__capture(self, state, step, node); | ||
| 3770 | } | ||
| 3771 | |||
| 3772 | if (state->dead) { | ||
| 3773 | array_erase(&self->states, j); | ||
| 3774 | j--; | ||
| 3775 | continue; | ||
| 3776 | } | ||
| 3777 | |||
| 3778 | // Advance this state to the next step of its pattern. | ||
| 3779 | state->step_index++; | ||
| 3780 | state->seeking_immediate_match = false; | ||
| 3781 | LOG( | ||
| 3782 | " advance state. pattern:%u, step:%u\n", | ||
| 3783 | state->pattern_index, | ||
| 3784 | state->step_index | ||
| 3785 | ); | ||
| 3786 | |||
| 3787 | QueryStep *next_step = &self->query->steps.contents[state->step_index]; | ||
| 3788 | if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; | ||
| 3789 | |||
| 3790 | // If this state's next step has an alternative step, then copy the state in order | ||
| 3791 | // to pursue both alternatives. The alternative step itself may have an alternative, | ||
| 3792 | // so this is an interactive process. | ||
| 3793 | unsigned end_index = j + 1; | ||
| 3794 | for (unsigned k = j; k < end_index; k++) { | ||
| 3795 | QueryState *child_state = &self->states.contents[k]; | ||
| 3796 | QueryStep *child_step = &self->query->steps.contents[child_state->step_index]; | ||
| 3797 | if (child_step->alternative_index != NONE) { | ||
| 3798 | // A "dead-end" step exists only to add a non-sequential jump into the step sequence, | ||
| 3799 | // via its alternative index. When a state reaches a dead-end step, it jumps straight | ||
| 3800 | // to the step's alternative. | ||
| 3801 | if (child_step->is_dead_end) { | ||
| 3802 | child_state->step_index = child_step->alternative_index; | ||
| 3803 | k--; | ||
| 3804 | continue; | ||
| 3805 | } | ||
| 3806 | |||
| 3807 | // A "pass-through" step exists only to add a branch into the step sequence, | ||
| 3808 | // via its alternative_index. When a state reaches a pass-through step, it splits | ||
| 3809 | // in order to process the alternative step, and then it advances to the next step. | ||
| 3810 | if (child_step->is_pass_through) { | ||
| 3811 | child_state->step_index++; | ||
| 3812 | k--; | ||
| 3813 | } | ||
| 3814 | |||
| 3815 | QueryState *copy = ts_query_cursor__copy_state(self, &child_state); | ||
| 3816 | if (copy) { | ||
| 3817 | LOG( | ||
| 3818 | " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", | ||
| 3819 | copy->pattern_index, | ||
| 3820 | copy->step_index, | ||
| 3821 | next_step->alternative_index, | ||
| 3822 | next_step->alternative_is_immediate, | ||
| 3823 | capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size | ||
| 3824 | ); | ||
| 3825 | end_index++; | ||
| 3826 | copy_count++; | ||
| 3827 | copy->step_index = child_step->alternative_index; | ||
| 3828 | if (child_step->alternative_is_immediate) { | ||
| 3829 | copy->seeking_immediate_match = true; | ||
| 3830 | } | ||
| 3831 | } | ||
| 3832 | } | ||
| 3833 | } | ||
| 3834 | } | ||
| 3835 | |||
| 3836 | for (unsigned j = 0; j < self->states.size; j++) { | ||
| 3837 | QueryState *state = &self->states.contents[j]; | ||
| 3838 | if (state->dead) { | ||
| 3839 | array_erase(&self->states, j); | ||
| 3840 | j--; | ||
| 3841 | continue; | ||
| 3842 | } | ||
| 3843 | |||
| 3844 | // Enforce the longest-match criteria. When a query pattern contains optional or | ||
| 3845 | // repeated nodes, this is necessary to avoid multiple redundant states, where | ||
| 3846 | // one state has a strict subset of another state's captures. | ||
| 3847 | bool did_remove = false; | ||
| 3848 | for (unsigned k = j + 1; k < self->states.size; k++) { | ||
| 3849 | QueryState *other_state = &self->states.contents[k]; | ||
| 3850 | |||
| 3851 | // Query states are kept in ascending order of start_depth and pattern_index. | ||
| 3852 | // Since the longest-match criteria is only used for deduping matches of the same | ||
| 3853 | // pattern and root node, we only need to perform pairwise comparisons within a | ||
| 3854 | // small slice of the states array. | ||
| 3855 | if ( | ||
| 3856 | other_state->start_depth != state->start_depth || | ||
| 3857 | other_state->pattern_index != state->pattern_index | ||
| 3858 | ) break; | ||
| 3859 | |||
| 3860 | bool left_contains_right, right_contains_left; | ||
| 3861 | ts_query_cursor__compare_captures( | ||
| 3862 | self, | ||
| 3863 | state, | ||
| 3864 | other_state, | ||
| 3865 | &left_contains_right, | ||
| 3866 | &right_contains_left | ||
| 3867 | ); | ||
| 3868 | if (left_contains_right) { | ||
| 3869 | if (state->step_index == other_state->step_index) { | ||
| 3870 | LOG( | ||
| 3871 | " drop shorter state. pattern: %u, step_index: %u\n", | ||
| 3872 | state->pattern_index, | ||
| 3873 | state->step_index | ||
| 3874 | ); | ||
| 3875 | capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); | ||
| 3876 | array_erase(&self->states, k); | ||
| 3877 | k--; | ||
| 3878 | continue; | ||
| 3879 | } | ||
| 3880 | other_state->has_in_progress_alternatives = true; | ||
| 3881 | } | ||
| 3882 | if (right_contains_left) { | ||
| 3883 | if (state->step_index == other_state->step_index) { | ||
| 3884 | LOG( | ||
| 3885 | " drop shorter state. pattern: %u, step_index: %u\n", | ||
| 3886 | state->pattern_index, | ||
| 3887 | state->step_index | ||
| 3888 | ); | ||
| 3889 | capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); | ||
| 3890 | array_erase(&self->states, j); | ||
| 3891 | j--; | ||
| 3892 | did_remove = true; | ||
| 3893 | break; | ||
| 3894 | } | ||
| 3895 | state->has_in_progress_alternatives = true; | ||
| 3896 | } | ||
| 3897 | } | ||
| 3898 | |||
| 3899 | // If the state is at the end of its pattern, remove it from the list | ||
| 3900 | // of in-progress states and add it to the list of finished states. | ||
| 3901 | if (!did_remove) { | ||
| 3902 | LOG( | ||
| 3903 | " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", | ||
| 3904 | state->pattern_index, | ||
| 3905 | state->start_depth, | ||
| 3906 | state->step_index, | ||
| 3907 | capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size | ||
| 3908 | ); | ||
| 3909 | QueryStep *next_step = &self->query->steps.contents[state->step_index]; | ||
| 3910 | if (next_step->depth == PATTERN_DONE_MARKER) { | ||
| 3911 | if (state->has_in_progress_alternatives) { | ||
| 3912 | LOG(" defer finishing pattern %u\n", state->pattern_index); | ||
| 3913 | } else { | ||
| 3914 | LOG(" finish pattern %u\n", state->pattern_index); | ||
| 3915 | array_push(&self->finished_states, *state); | ||
| 3916 | array_erase(&self->states, (uint32_t)(state - self->states.contents)); | ||
| 3917 | did_match = true; | ||
| 3918 | j--; | ||
| 3919 | } | ||
| 3920 | } | ||
| 3921 | } | ||
| 3922 | } | ||
| 3923 | } | ||
| 3924 | |||
| 3925 | if (ts_query_cursor__should_descend(self, node_intersects_range)) { | ||
| 3926 | switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) { | ||
| 3927 | case TreeCursorStepVisible: | ||
| 3928 | self->depth++; | ||
| 3929 | self->on_visible_node = true; | ||
| 3930 | continue; | ||
| 3931 | case TreeCursorStepHidden: | ||
| 3932 | self->on_visible_node = false; | ||
| 3933 | continue; | ||
| 3934 | default: | ||
| 3935 | break; | ||
| 3936 | } | ||
| 3937 | } | ||
| 3938 | |||
| 3939 | self->ascending = true; | ||
| 3940 | } | ||
| 3941 | } | ||
| 3942 | } | ||
| 3943 | |||
| 3944 | bool ts_query_cursor_next_match( | ||
| 3945 | TSQueryCursor *self, | ||
| 3946 | TSQueryMatch *match | ||
| 3947 | ) { | ||
| 3948 | if (self->finished_states.size == 0) { | ||
| 3949 | if (!ts_query_cursor__advance(self, false)) { | ||
| 3950 | return false; | ||
| 3951 | } | ||
| 3952 | } | ||
| 3953 | |||
| 3954 | QueryState *state = &self->finished_states.contents[0]; | ||
| 3955 | if (state->id == UINT32_MAX) state->id = self->next_state_id++; | ||
| 3956 | match->id = state->id; | ||
| 3957 | match->pattern_index = state->pattern_index; | ||
| 3958 | const CaptureList *captures = capture_list_pool_get( | ||
| 3959 | &self->capture_list_pool, | ||
| 3960 | state->capture_list_id | ||
| 3961 | ); | ||
| 3962 | match->captures = captures->contents; | ||
| 3963 | match->capture_count = captures->size; | ||
| 3964 | capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); | ||
| 3965 | array_erase(&self->finished_states, 0); | ||
| 3966 | return true; | ||
| 3967 | } | ||
| 3968 | |||
| 3969 | void ts_query_cursor_remove_match( | ||
| 3970 | TSQueryCursor *self, | ||
| 3971 | uint32_t match_id | ||
| 3972 | ) { | ||
| 3973 | for (unsigned i = 0; i < self->finished_states.size; i++) { | ||
| 3974 | const QueryState *state = &self->finished_states.contents[i]; | ||
| 3975 | if (state->id == match_id) { | ||
| 3976 | capture_list_pool_release( | ||
| 3977 | &self->capture_list_pool, | ||
| 3978 | state->capture_list_id | ||
| 3979 | ); | ||
| 3980 | array_erase(&self->finished_states, i); | ||
| 3981 | return; | ||
| 3982 | } | ||
| 3983 | } | ||
| 3984 | |||
| 3985 | // Remove unfinished query states as well to prevent future | ||
| 3986 | // captures for a match being removed. | ||
| 3987 | for (unsigned i = 0; i < self->states.size; i++) { | ||
| 3988 | const QueryState *state = &self->states.contents[i]; | ||
| 3989 | if (state->id == match_id) { | ||
| 3990 | capture_list_pool_release( | ||
| 3991 | &self->capture_list_pool, | ||
| 3992 | state->capture_list_id | ||
| 3993 | ); | ||
| 3994 | array_erase(&self->states, i); | ||
| 3995 | return; | ||
| 3996 | } | ||
| 3997 | } | ||
| 3998 | } | ||
| 3999 | |||
| 4000 | bool ts_query_cursor_next_capture( | ||
| 4001 | TSQueryCursor *self, | ||
| 4002 | TSQueryMatch *match, | ||
| 4003 | uint32_t *capture_index | ||
| 4004 | ) { | ||
| 4005 | // The goal here is to return captures in order, even though they may not | ||
| 4006 | // be discovered in order, because patterns can overlap. Search for matches | ||
| 4007 | // until there is a finished capture that is before any unfinished capture. | ||
| 4008 | for (;;) { | ||
| 4009 | // First, find the earliest capture in an unfinished match. | ||
| 4010 | uint32_t first_unfinished_capture_byte; | ||
| 4011 | uint32_t first_unfinished_pattern_index; | ||
| 4012 | uint32_t first_unfinished_state_index; | ||
| 4013 | bool first_unfinished_state_is_definite = false; | ||
| 4014 | ts_query_cursor__first_in_progress_capture( | ||
| 4015 | self, | ||
| 4016 | &first_unfinished_state_index, | ||
| 4017 | &first_unfinished_capture_byte, | ||
| 4018 | &first_unfinished_pattern_index, | ||
| 4019 | &first_unfinished_state_is_definite | ||
| 4020 | ); | ||
| 4021 | |||
| 4022 | // Then find the earliest capture in a finished match. It must occur | ||
| 4023 | // before the first capture in an *unfinished* match. | ||
| 4024 | QueryState *first_finished_state = NULL; | ||
| 4025 | uint32_t first_finished_capture_byte = first_unfinished_capture_byte; | ||
| 4026 | uint32_t first_finished_pattern_index = first_unfinished_pattern_index; | ||
| 4027 | for (unsigned i = 0; i < self->finished_states.size;) { | ||
| 4028 | QueryState *state = &self->finished_states.contents[i]; | ||
| 4029 | const CaptureList *captures = capture_list_pool_get( | ||
| 4030 | &self->capture_list_pool, | ||
| 4031 | state->capture_list_id | ||
| 4032 | ); | ||
| 4033 | |||
| 4034 | // Remove states whose captures are all consumed. | ||
| 4035 | if (state->consumed_capture_count >= captures->size) { | ||
| 4036 | capture_list_pool_release( | ||
| 4037 | &self->capture_list_pool, | ||
| 4038 | state->capture_list_id | ||
| 4039 | ); | ||
| 4040 | array_erase(&self->finished_states, i); | ||
| 4041 | continue; | ||
| 4042 | } | ||
| 4043 | |||
| 4044 | TSNode node = captures->contents[state->consumed_capture_count].node; | ||
| 4045 | |||
| 4046 | bool node_precedes_range = ( | ||
| 4047 | ts_node_end_byte(node) <= self->start_byte || | ||
| 4048 | point_lte(ts_node_end_point(node), self->start_point) | ||
| 4049 | ); | ||
| 4050 | bool node_follows_range = ( | ||
| 4051 | ts_node_start_byte(node) >= self->end_byte || | ||
| 4052 | point_gte(ts_node_start_point(node), self->end_point) | ||
| 4053 | ); | ||
| 4054 | bool node_outside_of_range = node_precedes_range || node_follows_range; | ||
| 4055 | |||
| 4056 | // Skip captures that are outside of the cursor's range. | ||
| 4057 | if (node_outside_of_range) { | ||
| 4058 | state->consumed_capture_count++; | ||
| 4059 | continue; | ||
| 4060 | } | ||
| 4061 | |||
| 4062 | uint32_t node_start_byte = ts_node_start_byte(node); | ||
| 4063 | if ( | ||
| 4064 | node_start_byte < first_finished_capture_byte || | ||
| 4065 | ( | ||
| 4066 | node_start_byte == first_finished_capture_byte && | ||
| 4067 | state->pattern_index < first_finished_pattern_index | ||
| 4068 | ) | ||
| 4069 | ) { | ||
| 4070 | first_finished_state = state; | ||
| 4071 | first_finished_capture_byte = node_start_byte; | ||
| 4072 | first_finished_pattern_index = state->pattern_index; | ||
| 4073 | } | ||
| 4074 | i++; | ||
| 4075 | } | ||
| 4076 | |||
| 4077 | // If there is finished capture that is clearly before any unfinished | ||
| 4078 | // capture, then return its match, and its capture index. Internally | ||
| 4079 | // record the fact that the capture has been 'consumed'. | ||
| 4080 | QueryState *state; | ||
| 4081 | if (first_finished_state) { | ||
| 4082 | state = first_finished_state; | ||
| 4083 | } else if (first_unfinished_state_is_definite) { | ||
| 4084 | state = &self->states.contents[first_unfinished_state_index]; | ||
| 4085 | } else { | ||
| 4086 | state = NULL; | ||
| 4087 | } | ||
| 4088 | |||
| 4089 | if (state) { | ||
| 4090 | if (state->id == UINT32_MAX) state->id = self->next_state_id++; | ||
| 4091 | match->id = state->id; | ||
| 4092 | match->pattern_index = state->pattern_index; | ||
| 4093 | const CaptureList *captures = capture_list_pool_get( | ||
| 4094 | &self->capture_list_pool, | ||
| 4095 | state->capture_list_id | ||
| 4096 | ); | ||
| 4097 | match->captures = captures->contents; | ||
| 4098 | match->capture_count = captures->size; | ||
| 4099 | *capture_index = state->consumed_capture_count; | ||
| 4100 | state->consumed_capture_count++; | ||
| 4101 | return true; | ||
| 4102 | } | ||
| 4103 | |||
| 4104 | if (capture_list_pool_is_empty(&self->capture_list_pool)) { | ||
| 4105 | LOG( | ||
| 4106 | " abandon state. index:%u, pattern:%u, offset:%u.\n", | ||
| 4107 | first_unfinished_state_index, | ||
| 4108 | first_unfinished_pattern_index, | ||
| 4109 | first_unfinished_capture_byte | ||
| 4110 | ); | ||
| 4111 | capture_list_pool_release( | ||
| 4112 | &self->capture_list_pool, | ||
| 4113 | self->states.contents[first_unfinished_state_index].capture_list_id | ||
| 4114 | ); | ||
| 4115 | array_erase(&self->states, first_unfinished_state_index); | ||
| 4116 | } | ||
| 4117 | |||
| 4118 | // If there are no finished matches that are ready to be returned, then | ||
| 4119 | // continue finding more matches. | ||
| 4120 | if ( | ||
| 4121 | !ts_query_cursor__advance(self, true) && | ||
| 4122 | self->finished_states.size == 0 | ||
| 4123 | ) return false; | ||
| 4124 | } | ||
| 4125 | } | ||
| 4126 | |||
| 4127 | void ts_query_cursor_set_max_start_depth( | ||
| 4128 | TSQueryCursor *self, | ||
| 4129 | uint32_t max_start_depth | ||
| 4130 | ) { | ||
| 4131 | self->max_start_depth = max_start_depth; | ||
| 4132 | } | ||
| 4133 | |||
| 4134 | #undef LOG | ||
