1 files changed, 543 insertions, 0 deletions
diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/php/scanner.h b/vendor/github.com/mitjafelicijan/go-tree-sitter/php/scanner.h
new file mode 100644
index 0000000..e16a21e
--- /dev/null
+++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/php/scanner.h
@@ -0,0 +1,543 @@
+#include "tree_sitter/array.h"
+#include "tree_sitter/parser.h"
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+enum TokenType {
+    AUTOMATIC_SEMICOLON,
+    ENCAPSED_STRING_CHARS,
+    ENCAPSED_STRING_CHARS_AFTER_VARIABLE,
+    EXECUTION_STRING_CHARS,
+    EXECUTION_STRING_CHARS_AFTER_VARIABLE,
+    ENCAPSED_STRING_CHARS_HEREDOC,
+    ENCAPSED_STRING_CHARS_AFTER_VARIABLE_HEREDOC,
+    EOF_TOKEN,
+    HEREDOC_START,
+    HEREDOC_END,
+    NOWDOC_STRING,
+    SENTINEL_ERROR, // Unused token used to indicate error recovery mode
+};
+typedef Array(int32_t) String;
+static inline bool string_eq(String *self, String *other) {
+    if (self->size != other->size) {
+        return false;
+    }
+    if (self->size == 0) {
+        return self->size == other->size;
+    }
+    return memcmp(self->contents, other->contents, self->size * sizeof(self->contents[0])) == 0;
+}
+typedef struct {
+    bool end_word_indentation_allowed;
+    String word;
+} Heredoc;
+#define heredoc_new()                                                                                                  \
+    {                                                                                                                  \
+        .end_word_indentation_allowed = false,                                                                         \
+        .word = array_new(),                                                                                           \
+    };
+typedef struct {
+    bool has_leading_whitespace;
+    Array(Heredoc) heredocs;
+} Scanner;
+typedef enum { Error, End } ScanContentResult;
+static inline void reset_heredoc(Heredoc *heredoc) {
+    array_delete(&heredoc->word);
+    heredoc->end_word_indentation_allowed = false;
+}
+static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+static unsigned serialize(Scanner *scanner, char *buffer) {
+    unsigned size = 0;
+    buffer[size++] = (char)scanner->heredocs.size;
+    for (unsigned j = 0; j < scanner->heredocs.size; j++) {
+        Heredoc *heredoc = &scanner->heredocs.contents[j];
+        unsigned word_size = heredoc->word.size * sizeof(heredoc->word.contents[0]);
+        if (size + 5 + word_size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
+            return 0;
+        }
+        buffer[size++] = (char)heredoc->end_word_indentation_allowed;
+        memcpy(&buffer[size], &heredoc->word.size, sizeof(uint32_t));
+        size += sizeof(uint32_t);
+        if (heredoc->word.size > 0) {
+            memcpy(&buffer[size], heredoc->word.contents, word_size);
+            size += word_size;
+        }
+    }
+    return size;
+}
+static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
+    unsigned size = 0;
+    scanner->has_leading_whitespace = false;
+    for (uint32_t i = 0; i < scanner->heredocs.size; i++) {
+        reset_heredoc(array_get(&scanner->heredocs, i));
+    }
+    if (length == 0) {
+        return;
+    }
+    uint8_t open_heredoc_count = buffer[size++];
+    for (unsigned i = 0; i < open_heredoc_count; i++) {
+        Heredoc *heredoc = NULL;
+        if (i < scanner->heredocs.size) {
+            heredoc = array_get(&scanner->heredocs, i);
+        } else {
+            Heredoc new_heredoc = heredoc_new();
+            array_push(&scanner->heredocs, new_heredoc);
+            heredoc = array_back(&scanner->heredocs);
+        }
+        heredoc->end_word_indentation_allowed = buffer[size++];
+        memcpy(&heredoc->word.size, &buffer[size], sizeof(uint32_t));
+        size += sizeof(uint32_t);
+        unsigned word_size = heredoc->word.size * sizeof(heredoc->word.contents[0]);
+        if (word_size > 0) {
+            array_reserve(&heredoc->word, heredoc->word.size);
+            memcpy(heredoc->word.contents, &buffer[size], word_size);
+            size += word_size;
+        }
+    }
+    assert(size == length);
+}
+static inline bool scan_whitespace(TSLexer *lexer) {
+    for (;;) {
+        while (iswspace(lexer->lookahead)) {
+            advance(lexer);
+        }
+        if (lexer->lookahead == '/') {
+            advance(lexer);
+            if (lexer->lookahead == '/') {
+                advance(lexer);
+                while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
+                    advance(lexer);
+                }
+            } else {
+                return false;
+            }
+        } else {
+            return true;
+        }
+    }
+}
+static inline bool is_valid_name_char(TSLexer *lexer) {
+    return iswalnum(lexer->lookahead) || lexer->lookahead == '_' || lexer->lookahead >= 0x80;
+}
+static inline bool is_escapable_sequence(TSLexer *lexer) {
+    // Note: remember to also update the escape_sequence rule in the
+    // main grammar whenever changing this method
+    int32_t letter = lexer->lookahead;
+    if (letter == 'n' || letter == 'r' || letter == 't' || letter == 'v' || letter == 'e' || letter == 'f' ||
+        letter == '\\' || letter == '$' || letter == '"') {
+        return true;
+    }
+    // Hex
+    if (letter == 'x') {
+        advance(lexer);
+        return iswxdigit(lexer->lookahead);
+    }
+    // Unicode
+    if (letter == 'u') {
+        return true; // We handle the case where this is not really an escape
+                     // sequence in grammar.js - this is needed to support the
+                     // edge case "\u{$a}" in which case "\u" is to be
+                     // interpreted as characters and {$a} as a variable
+    }
+    // Octal
+    return iswdigit(lexer->lookahead) && lexer->lookahead >= '0' && lexer->lookahead <= '7';
+}
+static String scan_heredoc_word(TSLexer *lexer) {
+    String result = (String)array_new();
+    while (is_valid_name_char(lexer)) {
+        array_push(&result, lexer->lookahead);
+        advance(lexer);
+    }
+    return result;
+}
+static inline bool scan_nowdoc_string(Scanner *scanner, TSLexer *lexer) {
+    bool has_consumed_content = false;
+    if (scanner->heredocs.size == 0) {
+        return false;
+    }
+    // While PHP requires the nowdoc end tag to be the very first on a new line,
+    // there may be an arbitrary amount of whitespace before the closing token
+    while (iswspace(lexer->lookahead)) {
+        advance(lexer);
+        has_consumed_content = true;
+    }
+    bool end_tag_matched = false;
+    String heredoc_tag = array_back(&scanner->heredocs)->word;
+    for (uint32_t i = 0; i < heredoc_tag.size; i++) {
+        if (lexer->lookahead != heredoc_tag.contents[i]) {
+            break;
+        }
+        advance(lexer);
+        has_consumed_content = true;
+        end_tag_matched = (i == heredoc_tag.size - 1 && (iswspace(lexer->lookahead) || lexer->lookahead == ';' ||
+                                                         lexer->lookahead == ',' || lexer->lookahead == ')'));
+    }
+    if (end_tag_matched) {
+        // There may be an arbitrary amount of white space after the end tag
+        while (iswspace(lexer->lookahead) && lexer->lookahead != '\r' && lexer->lookahead != '\n') {
+            advance(lexer);
+            has_consumed_content = true;
+        }
+        // Return to allow the end tag parsing if we've encountered an end tag
+        // at a valid position
+        if (lexer->lookahead == ';' || lexer->lookahead == ',' || lexer->lookahead == ')' || lexer->lookahead == '\n' ||
+            lexer->lookahead == '\r') {
+            // , and ) is needed to support heredoc in function arguments
+            return false;
+        }
+    }
+    for (bool has_content = has_consumed_content;; has_content = true) {
+        lexer->mark_end(lexer);
+        switch (lexer->lookahead) {
+            case '\n':
+            case '\r':
+                return has_content;
+            default:
+                if (lexer->eof(lexer)) {
+                    return false;
+                }
+                advance(lexer);
+        }
+    }
+    return false;
+}
+static bool scan_encapsed_part_string(Scanner *scanner, TSLexer *lexer, bool is_after_variable, bool is_heredoc,
+                                      bool is_execution_string) {
+    bool has_consumed_content = false;
+    if (is_heredoc && scanner->heredocs.size > 0) {
+        // While PHP requires the heredoc end tag to be the very first on a new
+        // line, there may be an arbitrary amount of whitespace before the
+        // closing token However, we should not consume \r or \n
+        while (iswspace(lexer->lookahead) && lexer->lookahead != '\r' && lexer->lookahead != '\n') {
+            advance(lexer);
+            has_consumed_content = true;
+        }
+        String heredoc_tag = array_back(&scanner->heredocs)->word;
+        bool end_tag_matched = false;
+        for (uint32_t i = 0; i < heredoc_tag.size; i++) {
+            if (lexer->lookahead != heredoc_tag.contents[i]) {
+                break;
+            }
+            has_consumed_content = true;
+            advance(lexer);
+            end_tag_matched = (i == heredoc_tag.size - 1 && (iswspace(lexer->lookahead) || lexer->lookahead == ';' ||
+                                                             lexer->lookahead == ',' || lexer->lookahead == ')'));
+        }
+        if (end_tag_matched) {
+            // There may be an arbitrary amount of white space after the end tag
+            // However, we should not consume \r or \n
+            while (iswspace(lexer->lookahead) && lexer->lookahead != '\r' && lexer->lookahead != '\n') {
+                advance(lexer);
+                has_consumed_content = true;
+            }
+            // Return to allow the end tag parsing if we've encountered an end
+            // tag at a valid position
+            if (lexer->lookahead == ';' || lexer->lookahead == ',' || lexer->lookahead == ')' ||
+                lexer->lookahead == '\n' || lexer->lookahead == '\r') {
+                // , and ) is needed to support heredoc in function arguments
+                return false;
+            }
+        }
+    }
+    for (bool has_content = has_consumed_content;; has_content = true) {
+        lexer->mark_end(lexer);
+        switch (lexer->lookahead) {
+            case '"':
+                if (!is_heredoc && !is_execution_string) {
+                    return has_content;
+                }
+                advance(lexer);
+                break;
+            case '`':
+                if (is_execution_string) {
+                    return has_content;
+                }
+                advance(lexer);
+                break;
+            case '\n':
+            case '\r':
+                if (is_heredoc) {
+                    return has_content;
+                }
+                advance(lexer);
+                break;
+            case '\\':
+                advance(lexer);
+                // \{ should not be interpreted as an escape sequence, but both
+                // should be consumed as normal characters
+                if (lexer->lookahead == '{') {
+                    advance(lexer);
+                    break;
+                }
+                if (is_execution_string && lexer->lookahead == '`') {
+                    return has_content;
+                }
+                if (is_heredoc && lexer->lookahead == '\\') {
+                    advance(lexer);
+                    break;
+                }
+                if (is_escapable_sequence(lexer)) {
+                    return has_content;
+                }
+                break;
+            case '$':
+                advance(lexer);
+                if ((is_valid_name_char(lexer) && !iswdigit(lexer->lookahead)) || lexer->lookahead == '{') {
+                    return has_content;
+                }
+                break;
+            case '-':
+                if (is_after_variable) {
+                    advance(lexer);
+                    if (lexer->lookahead == '>') {
+                        advance(lexer);
+                        if (is_valid_name_char(lexer)) {
+                            return has_content;
+                        }
+                        break;
+                    }
+                    break;
+                }
+            case '[':
+                if (is_after_variable) {
+                    return has_content;
+                }
+                advance(lexer);
+                break;
+            case '{':
+                advance(lexer);
+                if (lexer->lookahead == '$') {
+                    return has_content;
+                }
+                break;
+            default:
+                if (lexer->eof(lexer)) {
+                    return false;
+                }
+                advance(lexer);
+        }
+        is_after_variable = false;
+    }
+    return false;
+}
+static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
+    const bool is_error_recovery = valid_symbols[SENTINEL_ERROR];
+    if (is_error_recovery) {
+        return false;
+    }
+    scanner->has_leading_whitespace = false;
+    lexer->mark_end(lexer);
+    if (valid_symbols[ENCAPSED_STRING_CHARS_AFTER_VARIABLE]) {
+        lexer->result_symbol = ENCAPSED_STRING_CHARS_AFTER_VARIABLE;
+        return scan_encapsed_part_string(scanner, lexer,
+                                         /* is_after_variable */ true,
+                                         /* is_heredoc */ false,
+                                         /* is_execution_string */ false);
+    }
+    if (valid_symbols[ENCAPSED_STRING_CHARS]) {
+        lexer->result_symbol = ENCAPSED_STRING_CHARS;
+        return scan_encapsed_part_string(scanner, lexer,
+                                         /* is_after_variable */ false,
+                                         /* is_heredoc */ false,
+                                         /* is_execution_string */ false);
+    }
+    if (valid_symbols[EXECUTION_STRING_CHARS_AFTER_VARIABLE]) {
+        lexer->result_symbol = EXECUTION_STRING_CHARS_AFTER_VARIABLE;
+        return scan_encapsed_part_string(scanner, lexer,
+                                         /* is_after_variable */ true,
+                                         /* is_heredoc */ false,
+                                         /* is_execution_string */ true);
+    }
+    if (valid_symbols[EXECUTION_STRING_CHARS]) {
+        lexer->result_symbol = EXECUTION_STRING_CHARS;
+        return scan_encapsed_part_string(scanner, lexer,
+                                         /* is_after_variable */ false,
+                                         /* is_heredoc */ false,
+                                         /* is_execution_string */ true);
+    }
+    if (valid_symbols[ENCAPSED_STRING_CHARS_AFTER_VARIABLE_HEREDOC]) {
+        lexer->result_symbol = ENCAPSED_STRING_CHARS_AFTER_VARIABLE_HEREDOC;
+        return scan_encapsed_part_string(scanner, lexer,
+                                         /* is_after_variable */ true,
+                                         /* is_heredoc */ true,
+                                         /* is_execution_string */ false);
+    }
+    if (valid_symbols[ENCAPSED_STRING_CHARS_HEREDOC]) {
+        lexer->result_symbol = ENCAPSED_STRING_CHARS_HEREDOC;
+        return scan_encapsed_part_string(scanner, lexer,
+                                         /* is_after_variable */ false,
+                                         /* is_heredoc */ true,
+                                         /* is_execution_string */ false);
+    }
+    if (valid_symbols[NOWDOC_STRING]) {
+        lexer->result_symbol = NOWDOC_STRING;
+        return scan_nowdoc_string(scanner, lexer);
+    }
+    if (valid_symbols[HEREDOC_END]) {
+        lexer->result_symbol = HEREDOC_END;
+        if (scanner->heredocs.size == 0) {
+            return false;
+        }
+        Heredoc heredoc = *array_back(&scanner->heredocs);
+        while (iswspace(lexer->lookahead)) {
+            skip(lexer);
+        }
+        String word = scan_heredoc_word(lexer);
+        if (!string_eq(&word, &heredoc.word)) {
+            array_delete(&word);
+            return false;
+        }
+        array_delete(&word);
+        lexer->mark_end(lexer);
+        array_delete(&array_pop(&scanner->heredocs).word);
+        return true;
+    }
+    if (!scan_whitespace(lexer)) {
+        return false;
+    }
+    if (valid_symbols[EOF_TOKEN] && lexer->eof(lexer)) {
+        lexer->result_symbol = EOF_TOKEN;
+        return true;
+    }
+    if (valid_symbols[HEREDOC_START]) {
+        lexer->result_symbol = HEREDOC_START;
+        Heredoc heredoc = heredoc_new();
+        while (iswspace(lexer->lookahead)) {
+            skip(lexer);
+        }
+        heredoc.word = scan_heredoc_word(lexer);
+        if (heredoc.word.size == 0) {
+            array_delete(&heredoc.word);
+            return false;
+        }
+        lexer->mark_end(lexer);
+        array_push(&scanner->heredocs, heredoc);
+        return true;
+    }
+    if (valid_symbols[AUTOMATIC_SEMICOLON]) {
+        lexer->result_symbol = AUTOMATIC_SEMICOLON;
+        if (lexer->lookahead != '?') {
+            return false;
+        }
+        advance(lexer);
+        return lexer->lookahead == '>';
+    }
+    return false;
+}
+static inline void *external_scanner_create() {
+    Scanner *scanner = ts_calloc(1, sizeof(Scanner));
+    array_init(&scanner->heredocs);
+    return scanner;
+}
+static inline unsigned external_scanner_serialize(void *payload, char *buffer) {
+    Scanner *scanner = (Scanner *)payload;
+    return serialize(scanner, buffer);
+}
+static inline void external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
+    Scanner *scanner = (Scanner *)payload;
+    deserialize(scanner, buffer, length);
+}
+static inline bool external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+    Scanner *scanner = (Scanner *)payload;
+    return scan(scanner, lexer, valid_symbols);
+}
+static inline void external_scanner_destroy(void *payload) {
+    Scanner *scanner = (Scanner *)payload;
+    for (size_t i = 0; i < scanner->heredocs.size; i++) {
+        array_delete(&scanner->heredocs.contents[i].word);
+    }
+    array_delete(&scanner->heredocs);
+    ts_free(scanner);
+}

diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/php/scanner.h b/vendor/github.com/mitjafelicijan/go-tree-sitter/php/scanner.h new file mode 100644 index 0000000..e16a21e --- /dev/null +++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/php/scanner.h
@@ -0,0 +1,543 @@
	1	#include "tree_sitter/array.h"
	2	#include "tree_sitter/parser.h"
	3
	4	#include <string.h>
	5	#include <wchar.h>
	6	#include <wctype.h>
	7
	8	enum TokenType {
	9	AUTOMATIC_SEMICOLON,
	10	ENCAPSED_STRING_CHARS,
	11	ENCAPSED_STRING_CHARS_AFTER_VARIABLE,
	12	EXECUTION_STRING_CHARS,
	13	EXECUTION_STRING_CHARS_AFTER_VARIABLE,
	14	ENCAPSED_STRING_CHARS_HEREDOC,
	15	ENCAPSED_STRING_CHARS_AFTER_VARIABLE_HEREDOC,
	16	EOF_TOKEN,
	17	HEREDOC_START,
	18	HEREDOC_END,
	19	NOWDOC_STRING,
	20	SENTINEL_ERROR, // Unused token used to indicate error recovery mode
	21	};
	22
	23	typedef Array(int32_t) String;
	24
	25	static inline bool string_eq(String self, String other) {
	26	if (self->size != other->size) {
	27	return false;
	28	}
	29	if (self->size == 0) {
	30	return self->size == other->size;
	31	}
	32	return memcmp(self->contents, other->contents, self->size * sizeof(self->contents[0])) == 0;
	33	}
	34
	35	typedef struct {
	36	bool end_word_indentation_allowed;
	37	String word;
	38	} Heredoc;
	39
	40	#define heredoc_new() \
	41	{ \
	42	.end_word_indentation_allowed = false, \
	43	.word = array_new(), \
	44	};
	45
	46	typedef struct {
	47	bool has_leading_whitespace;
	48	Array(Heredoc) heredocs;
	49	} Scanner;
	50
	51	typedef enum { Error, End } ScanContentResult;
	52
	53	static inline void reset_heredoc(Heredoc *heredoc) {
	54	array_delete(&heredoc->word);
	55	heredoc->end_word_indentation_allowed = false;
	56	}
	57
	58	static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
	59
	60	static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
	61
	62	static unsigned serialize(Scanner scanner, char buffer) {
	63	unsigned size = 0;
	64
	65	buffer[size++] = (char)scanner->heredocs.size;
	66	for (unsigned j = 0; j < scanner->heredocs.size; j++) {
	67	Heredoc *heredoc = &scanner->heredocs.contents[j];
	68	unsigned word_size = heredoc->word.size * sizeof(heredoc->word.contents[0]);
	69	if (size + 5 + word_size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
	70	return 0;
	71	}
	72	buffer[size++] = (char)heredoc->end_word_indentation_allowed;
	73	memcpy(&buffer[size], &heredoc->word.size, sizeof(uint32_t));
	74	size += sizeof(uint32_t);
	75	if (heredoc->word.size > 0) {
	76	memcpy(&buffer[size], heredoc->word.contents, word_size);
	77	size += word_size;
	78	}
	79	}
	80
	81	return size;
	82	}
	83
	84	static void deserialize(Scanner scanner, const char buffer, unsigned length) {
	85	unsigned size = 0;
	86	scanner->has_leading_whitespace = false;
	87
	88	for (uint32_t i = 0; i < scanner->heredocs.size; i++) {
	89	reset_heredoc(array_get(&scanner->heredocs, i));
	90	}
	91
	92	if (length == 0) {
	93	return;
	94	}
	95
	96	uint8_t open_heredoc_count = buffer[size++];
	97	for (unsigned i = 0; i < open_heredoc_count; i++) {
	98	Heredoc *heredoc = NULL;
	99	if (i < scanner->heredocs.size) {
	100	heredoc = array_get(&scanner->heredocs, i);
	101	} else {
	102	Heredoc new_heredoc = heredoc_new();
	103	array_push(&scanner->heredocs, new_heredoc);
	104	heredoc = array_back(&scanner->heredocs);
	105	}
	106
	107	heredoc->end_word_indentation_allowed = buffer[size++];
	108	memcpy(&heredoc->word.size, &buffer[size], sizeof(uint32_t));
	109	size += sizeof(uint32_t);
	110	unsigned word_size = heredoc->word.size * sizeof(heredoc->word.contents[0]);
	111	if (word_size > 0) {
	112	array_reserve(&heredoc->word, heredoc->word.size);
	113	memcpy(heredoc->word.contents, &buffer[size], word_size);
	114	size += word_size;
	115	}
	116	}
	117
	118	assert(size == length);
	119	}
	120
	121	static inline bool scan_whitespace(TSLexer *lexer) {
	122	for (;;) {
	123	while (iswspace(lexer->lookahead)) {
	124	advance(lexer);
	125	}
	126
	127	if (lexer->lookahead == '/') {
	128	advance(lexer);
	129
	130	if (lexer->lookahead == '/') {
	131	advance(lexer);
	132	while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
	133	advance(lexer);
	134	}
	135	} else {
	136	return false;
	137	}
	138	} else {
	139	return true;
	140	}
	141	}
	142	}
	143
	144	static inline bool is_valid_name_char(TSLexer *lexer) {
	145	return iswalnum(lexer->lookahead) \|\| lexer->lookahead == '_' \|\| lexer->lookahead >= 0x80;
	146	}
	147
	148	static inline bool is_escapable_sequence(TSLexer *lexer) {
	149	// Note: remember to also update the escape_sequence rule in the
	150	// main grammar whenever changing this method
	151	int32_t letter = lexer->lookahead;
	152
	153	if (letter == 'n' \|\| letter == 'r' \|\| letter == 't' \|\| letter == 'v' \|\| letter == 'e' \|\| letter == 'f' \|\|
	154	letter == '\\' \|\| letter == '$' \|\| letter == '"') {
	155	return true;
	156	}
	157
	158	// Hex
	159	if (letter == 'x') {
	160	advance(lexer);
	161	return iswxdigit(lexer->lookahead);
	162	}
	163
	164	// Unicode
	165	if (letter == 'u') {
	166	return true; // We handle the case where this is not really an escape
	167	// sequence in grammar.js - this is needed to support the
	168	// edge case "\u{$a}" in which case "\u" is to be
	169	// interpreted as characters and {$a} as a variable
	170	}
	171
	172	// Octal
	173	return iswdigit(lexer->lookahead) && lexer->lookahead >= '0' && lexer->lookahead <= '7';
	174	}
	175
	176	static String scan_heredoc_word(TSLexer *lexer) {
	177	String result = (String)array_new();
	178
	179	while (is_valid_name_char(lexer)) {
	180	array_push(&result, lexer->lookahead);
	181	advance(lexer);
	182	}
	183
	184	return result;
	185	}
	186
	187	static inline bool scan_nowdoc_string(Scanner scanner, TSLexer lexer) {
	188	bool has_consumed_content = false;
	189	if (scanner->heredocs.size == 0) {
	190	return false;
	191	}
	192
	193	// While PHP requires the nowdoc end tag to be the very first on a new line,
	194	// there may be an arbitrary amount of whitespace before the closing token
	195	while (iswspace(lexer->lookahead)) {
	196	advance(lexer);
	197	has_consumed_content = true;
	198	}
	199
	200	bool end_tag_matched = false;
	201	String heredoc_tag = array_back(&scanner->heredocs)->word;
	202
	203	for (uint32_t i = 0; i < heredoc_tag.size; i++) {
	204	if (lexer->lookahead != heredoc_tag.contents[i]) {
	205	break;
	206	}
	207	advance(lexer);
	208	has_consumed_content = true;
	209
	210	end_tag_matched = (i == heredoc_tag.size - 1 && (iswspace(lexer->lookahead) \|\| lexer->lookahead == ';' \|\|
	211	lexer->lookahead == ',' \|\| lexer->lookahead == ')'));
	212	}
	213
	214	if (end_tag_matched) {
	215	// There may be an arbitrary amount of white space after the end tag
	216	while (iswspace(lexer->lookahead) && lexer->lookahead != '\r' && lexer->lookahead != '\n') {
	217	advance(lexer);
	218	has_consumed_content = true;
	219	}
	220
	221	// Return to allow the end tag parsing if we've encountered an end tag
	222	// at a valid position
	223	if (lexer->lookahead == ';' \|\| lexer->lookahead == ',' \|\| lexer->lookahead == ')' \|\| lexer->lookahead == '\n' \|\|
	224	lexer->lookahead == '\r') {
	225	// , and ) is needed to support heredoc in function arguments
	226	return false;
	227	}
	228	}
	229
	230	for (bool has_content = has_consumed_content;; has_content = true) {
	231	lexer->mark_end(lexer);
	232
	233	switch (lexer->lookahead) {
	234	case '\n':
	235	case '\r':
	236	return has_content;
	237	default:
	238	if (lexer->eof(lexer)) {
	239	return false;
	240	}
	241	advance(lexer);
	242	}
	243	}
	244
	245	return false;
	246	}
	247
	248	static bool scan_encapsed_part_string(Scanner scanner, TSLexer lexer, bool is_after_variable, bool is_heredoc,
	249	bool is_execution_string) {
	250	bool has_consumed_content = false;
	251
	252	if (is_heredoc && scanner->heredocs.size > 0) {
	253	// While PHP requires the heredoc end tag to be the very first on a new
	254	// line, there may be an arbitrary amount of whitespace before the
	255	// closing token However, we should not consume \r or \n
	256	while (iswspace(lexer->lookahead) && lexer->lookahead != '\r' && lexer->lookahead != '\n') {
	257	advance(lexer);
	258	has_consumed_content = true;
	259	}
	260
	261	String heredoc_tag = array_back(&scanner->heredocs)->word;
	262
	263	bool end_tag_matched = false;
	264
	265	for (uint32_t i = 0; i < heredoc_tag.size; i++) {
	266	if (lexer->lookahead != heredoc_tag.contents[i]) {
	267	break;
	268	}
	269	has_consumed_content = true;
	270	advance(lexer);
	271
	272	end_tag_matched = (i == heredoc_tag.size - 1 && (iswspace(lexer->lookahead) \|\| lexer->lookahead == ';' \|\|
	273	lexer->lookahead == ',' \|\| lexer->lookahead == ')'));
	274	}
	275
	276	if (end_tag_matched) {
	277	// There may be an arbitrary amount of white space after the end tag
	278	// However, we should not consume \r or \n
	279	while (iswspace(lexer->lookahead) && lexer->lookahead != '\r' && lexer->lookahead != '\n') {
	280	advance(lexer);
	281	has_consumed_content = true;
	282	}
	283
	284	// Return to allow the end tag parsing if we've encountered an end
	285	// tag at a valid position
	286	if (lexer->lookahead == ';' \|\| lexer->lookahead == ',' \|\| lexer->lookahead == ')' \|\|
	287	lexer->lookahead == '\n' \|\| lexer->lookahead == '\r') {
	288	// , and ) is needed to support heredoc in function arguments
	289	return false;
	290	}
	291	}
	292	}
	293
	294	for (bool has_content = has_consumed_content;; has_content = true) {
	295	lexer->mark_end(lexer);
	296
	297	switch (lexer->lookahead) {
	298	case '"':
	299	if (!is_heredoc && !is_execution_string) {
	300	return has_content;
	301	}
	302	advance(lexer);
	303	break;
	304	case '`':
	305	if (is_execution_string) {
	306	return has_content;
	307	}
	308	advance(lexer);
	309	break;
	310	case '\n':
	311	case '\r':
	312	if (is_heredoc) {
	313	return has_content;
	314	}
	315	advance(lexer);
	316	break;
	317	case '\\':
	318	advance(lexer);
	319
	320	// \{ should not be interpreted as an escape sequence, but both
	321	// should be consumed as normal characters
	322	if (lexer->lookahead == '{') {
	323	advance(lexer);
	324	break;
	325	}
	326
	327	if (is_execution_string && lexer->lookahead == '`') {
	328	return has_content;
	329	}
	330
	331	if (is_heredoc && lexer->lookahead == '\\') {
	332	advance(lexer);
	333	break;
	334	}
	335
	336	if (is_escapable_sequence(lexer)) {
	337	return has_content;
	338	}
	339	break;
	340	case '$':
	341	advance(lexer);
	342
	343	if ((is_valid_name_char(lexer) && !iswdigit(lexer->lookahead)) \|\| lexer->lookahead == '{') {
	344	return has_content;
	345	}
	346	break;
	347	case '-':
	348	if (is_after_variable) {
	349	advance(lexer);
	350	if (lexer->lookahead == '>') {
	351	advance(lexer);
	352	if (is_valid_name_char(lexer)) {
	353	return has_content;
	354	}
	355	break;
	356	}
	357	break;
	358	}
	359	case '[':
	360	if (is_after_variable) {
	361	return has_content;
	362	}
	363	advance(lexer);
	364	break;
	365	case '{':
	366	advance(lexer);
	367	if (lexer->lookahead == '$') {
	368	return has_content;
	369	}
	370	break;
	371	default:
	372	if (lexer->eof(lexer)) {
	373	return false;
	374	}
	375	advance(lexer);
	376	}
	377
	378	is_after_variable = false;
	379	}
	380
	381	return false;
	382	}
	383
	384	static bool scan(Scanner scanner, TSLexer lexer, const bool *valid_symbols) {
	385	const bool is_error_recovery = valid_symbols[SENTINEL_ERROR];
	386
	387	if (is_error_recovery) {
	388	return false;
	389	}
	390
	391	scanner->has_leading_whitespace = false;
	392
	393	lexer->mark_end(lexer);
	394
	395	if (valid_symbols[ENCAPSED_STRING_CHARS_AFTER_VARIABLE]) {
	396	lexer->result_symbol = ENCAPSED_STRING_CHARS_AFTER_VARIABLE;
	397	return scan_encapsed_part_string(scanner, lexer,
	398	/* is_after_variable */ true,
	399	/* is_heredoc */ false,
	400	/* is_execution_string */ false);
	401	}
	402
	403	if (valid_symbols[ENCAPSED_STRING_CHARS]) {
	404	lexer->result_symbol = ENCAPSED_STRING_CHARS;
	405	return scan_encapsed_part_string(scanner, lexer,
	406	/* is_after_variable */ false,
	407	/* is_heredoc */ false,
	408	/* is_execution_string */ false);
	409	}
	410
	411	if (valid_symbols[EXECUTION_STRING_CHARS_AFTER_VARIABLE]) {
	412	lexer->result_symbol = EXECUTION_STRING_CHARS_AFTER_VARIABLE;
	413	return scan_encapsed_part_string(scanner, lexer,
	414	/* is_after_variable */ true,
	415	/* is_heredoc */ false,
	416	/* is_execution_string */ true);
	417	}
	418
	419	if (valid_symbols[EXECUTION_STRING_CHARS]) {
	420	lexer->result_symbol = EXECUTION_STRING_CHARS;
	421	return scan_encapsed_part_string(scanner, lexer,
	422	/* is_after_variable */ false,
	423	/* is_heredoc */ false,
	424	/* is_execution_string */ true);
	425	}
	426
	427	if (valid_symbols[ENCAPSED_STRING_CHARS_AFTER_VARIABLE_HEREDOC]) {
	428	lexer->result_symbol = ENCAPSED_STRING_CHARS_AFTER_VARIABLE_HEREDOC;
	429	return scan_encapsed_part_string(scanner, lexer,
	430	/* is_after_variable */ true,
	431	/* is_heredoc */ true,
	432	/* is_execution_string */ false);
	433	}
	434
	435	if (valid_symbols[ENCAPSED_STRING_CHARS_HEREDOC]) {
	436	lexer->result_symbol = ENCAPSED_STRING_CHARS_HEREDOC;
	437	return scan_encapsed_part_string(scanner, lexer,
	438	/* is_after_variable */ false,
	439	/* is_heredoc */ true,
	440	/* is_execution_string */ false);
	441	}
	442
	443	if (valid_symbols[NOWDOC_STRING]) {
	444	lexer->result_symbol = NOWDOC_STRING;
	445	return scan_nowdoc_string(scanner, lexer);
	446	}
	447
	448	if (valid_symbols[HEREDOC_END]) {
	449	lexer->result_symbol = HEREDOC_END;
	450	if (scanner->heredocs.size == 0) {
	451	return false;
	452	}
	453
	454	Heredoc heredoc = *array_back(&scanner->heredocs);
	455
	456	while (iswspace(lexer->lookahead)) {
	457	skip(lexer);
	458	}
	459
	460	String word = scan_heredoc_word(lexer);
	461	if (!string_eq(&word, &heredoc.word)) {
	462	array_delete(&word);
	463	return false;
	464	}
	465	array_delete(&word);
	466
	467	lexer->mark_end(lexer);
	468	array_delete(&array_pop(&scanner->heredocs).word);
	469	return true;
	470	}
	471
	472	if (!scan_whitespace(lexer)) {
	473	return false;
	474	}
	475
	476	if (valid_symbols[EOF_TOKEN] && lexer->eof(lexer)) {
	477	lexer->result_symbol = EOF_TOKEN;
	478	return true;
	479	}
	480
	481	if (valid_symbols[HEREDOC_START]) {
	482	lexer->result_symbol = HEREDOC_START;
	483	Heredoc heredoc = heredoc_new();
	484
	485	while (iswspace(lexer->lookahead)) {
	486	skip(lexer);
	487	}
	488
	489	heredoc.word = scan_heredoc_word(lexer);
	490	if (heredoc.word.size == 0) {
	491	array_delete(&heredoc.word);
	492	return false;
	493	}
	494	lexer->mark_end(lexer);
	495
	496	array_push(&scanner->heredocs, heredoc);
	497	return true;
	498	}
	499
	500	if (valid_symbols[AUTOMATIC_SEMICOLON]) {
	501	lexer->result_symbol = AUTOMATIC_SEMICOLON;
	502
	503	if (lexer->lookahead != '?') {
	504	return false;
	505	}
	506
	507	advance(lexer);
	508
	509	return lexer->lookahead == '>';
	510	}
	511
	512	return false;
	513	}
	514
	515	static inline void *external_scanner_create() {
	516	Scanner *scanner = ts_calloc(1, sizeof(Scanner));
	517	array_init(&scanner->heredocs);
	518	return scanner;
	519	}
	520
	521	static inline unsigned external_scanner_serialize(void payload, char buffer) {
	522	Scanner scanner = (Scanner )payload;
	523	return serialize(scanner, buffer);
	524	}
	525
	526	static inline void external_scanner_deserialize(void payload, const char buffer, unsigned length) {
	527	Scanner scanner = (Scanner )payload;
	528	deserialize(scanner, buffer, length);
	529	}
	530
	531	static inline bool external_scanner_scan(void payload, TSLexer lexer, const bool *valid_symbols) {
	532	Scanner scanner = (Scanner )payload;
	533	return scan(scanner, lexer, valid_symbols);
	534	}
	535
	536	static inline void external_scanner_destroy(void *payload) {
	537	Scanner scanner = (Scanner )payload;
	538	for (size_t i = 0; i < scanner->heredocs.size; i++) {
	539	array_delete(&scanner->heredocs.contents[i].word);
	540	}
	541	array_delete(&scanner->heredocs);
	542	ts_free(scanner);
	543	}