.github
workflows build.yml
content help.txt ollama.txt
queries bash.scm c.scm cpp.scm css.scm dockerfile.scm go.scm html.scm javascript.scm lua.scm markdown.scm php.scm python.scm sql.scm tsx.scm typescript.scm
samples format.txt lsp.c ollama.py test.c test.cpp test.css test.dockerfile test.html test.js test.lua test.md test.php test.py test.rb test.sh test.sql test.ts test.tsx
vendor
github.com
mattn
go-runewidth .travis.yml LICENSE README.md go.test.sh runewidth.go runewidth_appengine.go runewidth_js.go runewidth_posix.go runewidth_table.go runewidth_windows.go
mitjafelicijan
go-tree-sitter
bash binding.go parser.c parser.h scanner.c
c binding.go parser.c parser.h
cpp binding.go parser.c parser.h scanner.c
css binding.go parser.c parser.h scanner.c
dockerfile binding.go parser.c parser.h scanner.c
golang binding.go parser.c parser.h
html binding.go parser.c parser.h scanner.c tag.h
javascript binding.go parser.c parser.h scanner.c
lua binding.go parser.c parser.h scanner.c
markdown
tree-sitter-markdown binding.go parser.c parser.h scanner.c
php
tree_sitter .keep alloc.h array.h parser.h
binding.go parser.c parser.h scanner.c scanner.h
python binding.go parser.c parser.h scanner.c
sql
tree_sitter .keep alloc.h array.h parser.h
binding.go parser.c scanner.c
typescript
tsx binding.go parser.c parser.h scanner.c scanner.h
typescript binding.go parser.c parser.h scanner.c scanner.h
.gitignore LICENSE Makefile README.md alloc.c alloc.h api.h array.h atomic.h bindings.c bindings.go bindings.h bits.h clock.h error_costs.h get_changed_ranges.c get_changed_ranges.h host.h iter.go language.c language.h length.h lexer.c lexer.h node.c parser.c parser.h point.h ptypes.h query.c reduce_action.h reusable_node.h stack.c stack.h subtree.c subtree.h test_grammar.go test_grammar.js test_grammar_generate.sh tree.c tree.h tree_cursor.c tree_cursor.h umachine.h unicode.h urename.h utf.h utf16.h utf8.h wasm_store.c wasm_store.h
nsf
termbox-go AUTHORS LICENSE README.md api.go api_common.go api_windows.go collect_terminfo.py escwait.go escwait_darwin.go syscalls_darwin.go syscalls_darwin_amd64.go syscalls_dragonfly.go syscalls_freebsd.go syscalls_linux.go syscalls_netbsd.go syscalls_openbsd.go syscalls_windows.go termbox.go termbox_common.go termbox_windows.go terminfo.go terminfo_builtin.go
modules.txt
.gitignore LICENSE Makefile README.md buffer.go colors.go command.go config.go editor.go embed.go ftypes.go go.mod go.sum info.go intro.go kevent.go lsp.go main.go ollama.go replace.go syntax.go theme.go treesitter.txt
vendor/github.com/mitjafelicijan/go-tree-sitter/php/scanner.h raw
  1#include "tree_sitter/array.h"
  2#include "tree_sitter/parser.h"
  3
  4#include <string.h>
  5#include <wchar.h>
  6#include <wctype.h>
  7
  8enum TokenType {
  9    AUTOMATIC_SEMICOLON,
 10    ENCAPSED_STRING_CHARS,
 11    ENCAPSED_STRING_CHARS_AFTER_VARIABLE,
 12    EXECUTION_STRING_CHARS,
 13    EXECUTION_STRING_CHARS_AFTER_VARIABLE,
 14    ENCAPSED_STRING_CHARS_HEREDOC,
 15    ENCAPSED_STRING_CHARS_AFTER_VARIABLE_HEREDOC,
 16    EOF_TOKEN,
 17    HEREDOC_START,
 18    HEREDOC_END,
 19    NOWDOC_STRING,
 20    SENTINEL_ERROR, // Unused token used to indicate error recovery mode
 21};
 22
 23typedef Array(int32_t) String;
 24
 25static inline bool string_eq(String *self, String *other) {
 26    if (self->size != other->size) {
 27        return false;
 28    }
 29    if (self->size == 0) {
 30        return self->size == other->size;
 31    }
 32    return memcmp(self->contents, other->contents, self->size * sizeof(self->contents[0])) == 0;
 33}
 34
 35typedef struct {
 36    bool end_word_indentation_allowed;
 37    String word;
 38} Heredoc;
 39
 40#define heredoc_new()                                                                                                  \
 41    {                                                                                                                  \
 42        .end_word_indentation_allowed = false,                                                                         \
 43        .word = array_new(),                                                                                           \
 44    };
 45
 46typedef struct {
 47    bool has_leading_whitespace;
 48    Array(Heredoc) heredocs;
 49} Scanner;
 50
 51typedef enum { Error, End } ScanContentResult;
 52
 53static inline void reset_heredoc(Heredoc *heredoc) {
 54    array_delete(&heredoc->word);
 55    heredoc->end_word_indentation_allowed = false;
 56}
 57
 58static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
 59
 60static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
 61
 62static unsigned serialize(Scanner *scanner, char *buffer) {
 63    unsigned size = 0;
 64
 65    buffer[size++] = (char)scanner->heredocs.size;
 66    for (unsigned j = 0; j < scanner->heredocs.size; j++) {
 67        Heredoc *heredoc = &scanner->heredocs.contents[j];
 68        unsigned word_size = heredoc->word.size * sizeof(heredoc->word.contents[0]);
 69        if (size + 5 + word_size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
 70            return 0;
 71        }
 72        buffer[size++] = (char)heredoc->end_word_indentation_allowed;
 73        memcpy(&buffer[size], &heredoc->word.size, sizeof(uint32_t));
 74        size += sizeof(uint32_t);
 75        if (heredoc->word.size > 0) {
 76            memcpy(&buffer[size], heredoc->word.contents, word_size);
 77            size += word_size;
 78        }
 79    }
 80
 81    return size;
 82}
 83
 84static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
 85    unsigned size = 0;
 86    scanner->has_leading_whitespace = false;
 87
 88    for (uint32_t i = 0; i < scanner->heredocs.size; i++) {
 89        reset_heredoc(array_get(&scanner->heredocs, i));
 90    }
 91
 92    if (length == 0) {
 93        return;
 94    }
 95
 96    uint8_t open_heredoc_count = buffer[size++];
 97    for (unsigned i = 0; i < open_heredoc_count; i++) {
 98        Heredoc *heredoc = NULL;
 99        if (i < scanner->heredocs.size) {
100            heredoc = array_get(&scanner->heredocs, i);
101        } else {
102            Heredoc new_heredoc = heredoc_new();
103            array_push(&scanner->heredocs, new_heredoc);
104            heredoc = array_back(&scanner->heredocs);
105        }
106
107        heredoc->end_word_indentation_allowed = buffer[size++];
108        memcpy(&heredoc->word.size, &buffer[size], sizeof(uint32_t));
109        size += sizeof(uint32_t);
110        unsigned word_size = heredoc->word.size * sizeof(heredoc->word.contents[0]);
111        if (word_size > 0) {
112            array_reserve(&heredoc->word, heredoc->word.size);
113            memcpy(heredoc->word.contents, &buffer[size], word_size);
114            size += word_size;
115        }
116    }
117
118    assert(size == length);
119}
120
121static inline bool scan_whitespace(TSLexer *lexer) {
122    for (;;) {
123        while (iswspace(lexer->lookahead)) {
124            advance(lexer);
125        }
126
127        if (lexer->lookahead == '/') {
128            advance(lexer);
129
130            if (lexer->lookahead == '/') {
131                advance(lexer);
132                while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
133                    advance(lexer);
134                }
135            } else {
136                return false;
137            }
138        } else {
139            return true;
140        }
141    }
142}
143
144static inline bool is_valid_name_char(TSLexer *lexer) {
145    return iswalnum(lexer->lookahead) || lexer->lookahead == '_' || lexer->lookahead >= 0x80;
146}
147
148static inline bool is_escapable_sequence(TSLexer *lexer) {
149    // Note: remember to also update the escape_sequence rule in the
150    // main grammar whenever changing this method
151    int32_t letter = lexer->lookahead;
152
153    if (letter == 'n' || letter == 'r' || letter == 't' || letter == 'v' || letter == 'e' || letter == 'f' ||
154        letter == '\\' || letter == '$' || letter == '"') {
155        return true;
156    }
157
158    // Hex
159    if (letter == 'x') {
160        advance(lexer);
161        return iswxdigit(lexer->lookahead);
162    }
163
164    // Unicode
165    if (letter == 'u') {
166        return true; // We handle the case where this is not really an escape
167                     // sequence in grammar.js - this is needed to support the
168                     // edge case "\u{$a}" in which case "\u" is to be
169                     // interpreted as characters and {$a} as a variable
170    }
171
172    // Octal
173    return iswdigit(lexer->lookahead) && lexer->lookahead >= '0' && lexer->lookahead <= '7';
174}
175
176static String scan_heredoc_word(TSLexer *lexer) {
177    String result = (String)array_new();
178
179    while (is_valid_name_char(lexer)) {
180        array_push(&result, lexer->lookahead);
181        advance(lexer);
182    }
183
184    return result;
185}
186
187static inline bool scan_nowdoc_string(Scanner *scanner, TSLexer *lexer) {
188    bool has_consumed_content = false;
189    if (scanner->heredocs.size == 0) {
190        return false;
191    }
192
193    // While PHP requires the nowdoc end tag to be the very first on a new line,
194    // there may be an arbitrary amount of whitespace before the closing token
195    while (iswspace(lexer->lookahead)) {
196        advance(lexer);
197        has_consumed_content = true;
198    }
199
200    bool end_tag_matched = false;
201    String heredoc_tag = array_back(&scanner->heredocs)->word;
202
203    for (uint32_t i = 0; i < heredoc_tag.size; i++) {
204        if (lexer->lookahead != heredoc_tag.contents[i]) {
205            break;
206        }
207        advance(lexer);
208        has_consumed_content = true;
209
210        end_tag_matched = (i == heredoc_tag.size - 1 && (iswspace(lexer->lookahead) || lexer->lookahead == ';' ||
211                                                         lexer->lookahead == ',' || lexer->lookahead == ')'));
212    }
213
214    if (end_tag_matched) {
215        // There may be an arbitrary amount of white space after the end tag
216        while (iswspace(lexer->lookahead) && lexer->lookahead != '\r' && lexer->lookahead != '\n') {
217            advance(lexer);
218            has_consumed_content = true;
219        }
220
221        // Return to allow the end tag parsing if we've encountered an end tag
222        // at a valid position
223        if (lexer->lookahead == ';' || lexer->lookahead == ',' || lexer->lookahead == ')' || lexer->lookahead == '\n' ||
224            lexer->lookahead == '\r') {
225            // , and ) is needed to support heredoc in function arguments
226            return false;
227        }
228    }
229
230    for (bool has_content = has_consumed_content;; has_content = true) {
231        lexer->mark_end(lexer);
232
233        switch (lexer->lookahead) {
234            case '\n':
235            case '\r':
236                return has_content;
237            default:
238                if (lexer->eof(lexer)) {
239                    return false;
240                }
241                advance(lexer);
242        }
243    }
244
245    return false;
246}
247
248static bool scan_encapsed_part_string(Scanner *scanner, TSLexer *lexer, bool is_after_variable, bool is_heredoc,
249                                      bool is_execution_string) {
250    bool has_consumed_content = false;
251
252    if (is_heredoc && scanner->heredocs.size > 0) {
253        // While PHP requires the heredoc end tag to be the very first on a new
254        // line, there may be an arbitrary amount of whitespace before the
255        // closing token However, we should not consume \r or \n
256        while (iswspace(lexer->lookahead) && lexer->lookahead != '\r' && lexer->lookahead != '\n') {
257            advance(lexer);
258            has_consumed_content = true;
259        }
260
261        String heredoc_tag = array_back(&scanner->heredocs)->word;
262
263        bool end_tag_matched = false;
264
265        for (uint32_t i = 0; i < heredoc_tag.size; i++) {
266            if (lexer->lookahead != heredoc_tag.contents[i]) {
267                break;
268            }
269            has_consumed_content = true;
270            advance(lexer);
271
272            end_tag_matched = (i == heredoc_tag.size - 1 && (iswspace(lexer->lookahead) || lexer->lookahead == ';' ||
273                                                             lexer->lookahead == ',' || lexer->lookahead == ')'));
274        }
275
276        if (end_tag_matched) {
277            // There may be an arbitrary amount of white space after the end tag
278            // However, we should not consume \r or \n
279            while (iswspace(lexer->lookahead) && lexer->lookahead != '\r' && lexer->lookahead != '\n') {
280                advance(lexer);
281                has_consumed_content = true;
282            }
283
284            // Return to allow the end tag parsing if we've encountered an end
285            // tag at a valid position
286            if (lexer->lookahead == ';' || lexer->lookahead == ',' || lexer->lookahead == ')' ||
287                lexer->lookahead == '\n' || lexer->lookahead == '\r') {
288                // , and ) is needed to support heredoc in function arguments
289                return false;
290            }
291        }
292    }
293
294    for (bool has_content = has_consumed_content;; has_content = true) {
295        lexer->mark_end(lexer);
296
297        switch (lexer->lookahead) {
298            case '"':
299                if (!is_heredoc && !is_execution_string) {
300                    return has_content;
301                }
302                advance(lexer);
303                break;
304            case '`':
305                if (is_execution_string) {
306                    return has_content;
307                }
308                advance(lexer);
309                break;
310            case '\n':
311            case '\r':
312                if (is_heredoc) {
313                    return has_content;
314                }
315                advance(lexer);
316                break;
317            case '\\':
318                advance(lexer);
319
320                // \{ should not be interpreted as an escape sequence, but both
321                // should be consumed as normal characters
322                if (lexer->lookahead == '{') {
323                    advance(lexer);
324                    break;
325                }
326
327                if (is_execution_string && lexer->lookahead == '`') {
328                    return has_content;
329                }
330
331                if (is_heredoc && lexer->lookahead == '\\') {
332                    advance(lexer);
333                    break;
334                }
335
336                if (is_escapable_sequence(lexer)) {
337                    return has_content;
338                }
339                break;
340            case '$':
341                advance(lexer);
342
343                if ((is_valid_name_char(lexer) && !iswdigit(lexer->lookahead)) || lexer->lookahead == '{') {
344                    return has_content;
345                }
346                break;
347            case '-':
348                if (is_after_variable) {
349                    advance(lexer);
350                    if (lexer->lookahead == '>') {
351                        advance(lexer);
352                        if (is_valid_name_char(lexer)) {
353                            return has_content;
354                        }
355                        break;
356                    }
357                    break;
358                }
359            case '[':
360                if (is_after_variable) {
361                    return has_content;
362                }
363                advance(lexer);
364                break;
365            case '{':
366                advance(lexer);
367                if (lexer->lookahead == '$') {
368                    return has_content;
369                }
370                break;
371            default:
372                if (lexer->eof(lexer)) {
373                    return false;
374                }
375                advance(lexer);
376        }
377
378        is_after_variable = false;
379    }
380
381    return false;
382}
383
384static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
385    const bool is_error_recovery = valid_symbols[SENTINEL_ERROR];
386
387    if (is_error_recovery) {
388        return false;
389    }
390
391    scanner->has_leading_whitespace = false;
392
393    lexer->mark_end(lexer);
394
395    if (valid_symbols[ENCAPSED_STRING_CHARS_AFTER_VARIABLE]) {
396        lexer->result_symbol = ENCAPSED_STRING_CHARS_AFTER_VARIABLE;
397        return scan_encapsed_part_string(scanner, lexer,
398                                         /* is_after_variable */ true,
399                                         /* is_heredoc */ false,
400                                         /* is_execution_string */ false);
401    }
402
403    if (valid_symbols[ENCAPSED_STRING_CHARS]) {
404        lexer->result_symbol = ENCAPSED_STRING_CHARS;
405        return scan_encapsed_part_string(scanner, lexer,
406                                         /* is_after_variable */ false,
407                                         /* is_heredoc */ false,
408                                         /* is_execution_string */ false);
409    }
410
411    if (valid_symbols[EXECUTION_STRING_CHARS_AFTER_VARIABLE]) {
412        lexer->result_symbol = EXECUTION_STRING_CHARS_AFTER_VARIABLE;
413        return scan_encapsed_part_string(scanner, lexer,
414                                         /* is_after_variable */ true,
415                                         /* is_heredoc */ false,
416                                         /* is_execution_string */ true);
417    }
418
419    if (valid_symbols[EXECUTION_STRING_CHARS]) {
420        lexer->result_symbol = EXECUTION_STRING_CHARS;
421        return scan_encapsed_part_string(scanner, lexer,
422                                         /* is_after_variable */ false,
423                                         /* is_heredoc */ false,
424                                         /* is_execution_string */ true);
425    }
426
427    if (valid_symbols[ENCAPSED_STRING_CHARS_AFTER_VARIABLE_HEREDOC]) {
428        lexer->result_symbol = ENCAPSED_STRING_CHARS_AFTER_VARIABLE_HEREDOC;
429        return scan_encapsed_part_string(scanner, lexer,
430                                         /* is_after_variable */ true,
431                                         /* is_heredoc */ true,
432                                         /* is_execution_string */ false);
433    }
434
435    if (valid_symbols[ENCAPSED_STRING_CHARS_HEREDOC]) {
436        lexer->result_symbol = ENCAPSED_STRING_CHARS_HEREDOC;
437        return scan_encapsed_part_string(scanner, lexer,
438                                         /* is_after_variable */ false,
439                                         /* is_heredoc */ true,
440                                         /* is_execution_string */ false);
441    }
442
443    if (valid_symbols[NOWDOC_STRING]) {
444        lexer->result_symbol = NOWDOC_STRING;
445        return scan_nowdoc_string(scanner, lexer);
446    }
447
448    if (valid_symbols[HEREDOC_END]) {
449        lexer->result_symbol = HEREDOC_END;
450        if (scanner->heredocs.size == 0) {
451            return false;
452        }
453
454        Heredoc heredoc = *array_back(&scanner->heredocs);
455
456        while (iswspace(lexer->lookahead)) {
457            skip(lexer);
458        }
459
460        String word = scan_heredoc_word(lexer);
461        if (!string_eq(&word, &heredoc.word)) {
462            array_delete(&word);
463            return false;
464        }
465        array_delete(&word);
466
467        lexer->mark_end(lexer);
468        array_delete(&array_pop(&scanner->heredocs).word);
469        return true;
470    }
471
472    if (!scan_whitespace(lexer)) {
473        return false;
474    }
475
476    if (valid_symbols[EOF_TOKEN] && lexer->eof(lexer)) {
477        lexer->result_symbol = EOF_TOKEN;
478        return true;
479    }
480
481    if (valid_symbols[HEREDOC_START]) {
482        lexer->result_symbol = HEREDOC_START;
483        Heredoc heredoc = heredoc_new();
484
485        while (iswspace(lexer->lookahead)) {
486            skip(lexer);
487        }
488
489        heredoc.word = scan_heredoc_word(lexer);
490        if (heredoc.word.size == 0) {
491            array_delete(&heredoc.word);
492            return false;
493        }
494        lexer->mark_end(lexer);
495
496        array_push(&scanner->heredocs, heredoc);
497        return true;
498    }
499
500    if (valid_symbols[AUTOMATIC_SEMICOLON]) {
501        lexer->result_symbol = AUTOMATIC_SEMICOLON;
502
503        if (lexer->lookahead != '?') {
504            return false;
505        }
506
507        advance(lexer);
508
509        return lexer->lookahead == '>';
510    }
511
512    return false;
513}
514
515static inline void *external_scanner_create() {
516    Scanner *scanner = ts_calloc(1, sizeof(Scanner));
517    array_init(&scanner->heredocs);
518    return scanner;
519}
520
521static inline unsigned external_scanner_serialize(void *payload, char *buffer) {
522    Scanner *scanner = (Scanner *)payload;
523    return serialize(scanner, buffer);
524}
525
526static inline void external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
527    Scanner *scanner = (Scanner *)payload;
528    deserialize(scanner, buffer, length);
529}
530
531static inline bool external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
532    Scanner *scanner = (Scanner *)payload;
533    return scan(scanner, lexer, valid_symbols);
534}
535
536static inline void external_scanner_destroy(void *payload) {
537    Scanner *scanner = (Scanner *)payload;
538    for (size_t i = 0; i < scanner->heredocs.size; i++) {
539        array_delete(&scanner->heredocs.contents[i].word);
540    }
541    array_delete(&scanner->heredocs);
542    ts_free(scanner);
543}