1#include "parser.h"
2#include <wctype.h>
3#include <stdio.h>
4
5enum TokenType {
6 BLOCK_COMMENT_START,
7 BLOCK_COMMENT_CONTENT,
8 BLOCK_COMMENT_END,
9
10 STRING_START,
11 STRING_CONTENT,
12 STRING_END,
13};
14
15static inline void consume(TSLexer *lexer) { lexer->advance(lexer, false); }
16static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
17
18static inline bool consume_char(char c, TSLexer *lexer) {
19 if (lexer->lookahead != c) {
20 return false;
21 }
22
23 consume(lexer);
24 return true;
25}
26
27static inline uint8_t consume_and_count_char(char c, TSLexer *lexer) {
28 uint8_t count = 0;
29 while (lexer->lookahead == c) {
30 ++count;
31 consume(lexer);
32 }
33 return count;
34}
35
36static inline void skip_whitespaces(TSLexer *lexer) {
37 while (iswspace(lexer->lookahead)) {
38 skip(lexer);
39 }
40}
41
42void *tree_sitter_lua_external_scanner_create() { return NULL; }
43void tree_sitter_lua_external_scanner_destroy(void *payload) {}
44
45char ending_char = 0;
46uint8_t level_count = 0;
47
48static inline void reset_state() {
49 ending_char = 0;
50 level_count = 0;
51}
52
53unsigned tree_sitter_lua_external_scanner_serialize(void *payload, char *buffer) {
54 buffer[0] = ending_char;
55 buffer[1] = level_count;
56 return 2;
57}
58
59void tree_sitter_lua_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
60 if (length == 0) return;
61 ending_char = buffer[0];
62 if (length == 1) return;
63 level_count = buffer[1];
64}
65
66static bool scan_block_start(TSLexer *lexer) {
67 if (consume_char('[', lexer)) {
68 uint8_t level = consume_and_count_char('=', lexer);
69
70 if (consume_char('[', lexer)) {
71 level_count = level;
72 return true;
73 }
74 }
75
76 return false;
77}
78
79static bool scan_block_end(TSLexer *lexer) {
80 if (consume_char(']', lexer)) {
81 uint8_t level = consume_and_count_char('=', lexer);
82
83 if (level_count == level && consume_char(']', lexer)) {
84 return true;
85 }
86 }
87
88 return false;
89}
90
91static bool scan_block_content(TSLexer *lexer) {
92 while (lexer->lookahead != 0) {
93 if (lexer->lookahead == ']') {
94 lexer->mark_end(lexer);
95
96 if (scan_block_end(lexer)) {
97 return true;
98 }
99 } else {
100 consume(lexer);
101 }
102 }
103
104 return false;
105}
106
107static bool scan_comment_start(TSLexer *lexer) {
108 if (consume_char('-', lexer) && consume_char('-', lexer)) {
109 lexer->mark_end(lexer);
110
111 if (scan_block_start(lexer)) {
112 lexer->mark_end(lexer);
113 lexer->result_symbol = BLOCK_COMMENT_START;
114 return true;
115 }
116 }
117
118 return false;
119}
120
121static bool scan_comment_content(TSLexer *lexer) {
122 if (ending_char == 0) { // block comment
123 if (scan_block_content(lexer)) {
124 lexer->result_symbol = BLOCK_COMMENT_CONTENT;
125 return true;
126 }
127
128 return false;
129 }
130
131 while (lexer->lookahead != 0) {
132 if (lexer->lookahead == ending_char) {
133 reset_state();
134 lexer->result_symbol = BLOCK_COMMENT_CONTENT;
135 return true;
136 }
137
138 consume(lexer);
139 }
140
141 return false;
142}
143
144static bool scan_string_start(TSLexer *lexer) {
145 if (lexer->lookahead == '"' || lexer->lookahead == '\'') {
146 ending_char = lexer->lookahead;
147 consume(lexer);
148 return true;
149 }
150
151 if (scan_block_start(lexer)) {
152 return true;
153 }
154
155 return false;
156}
157
158static bool scan_string_end(TSLexer *lexer) {
159 if (ending_char == 0) { // block string
160 return scan_block_end(lexer);
161 }
162
163 if (consume_char(ending_char, lexer)) {
164 return true;
165 }
166
167 return false;
168}
169
170static bool scan_string_content(TSLexer *lexer) {
171 if (ending_char == 0) { // block string
172 return scan_block_content(lexer);
173 }
174
175 while (lexer->lookahead != '\n' && lexer->lookahead != 0 && lexer->lookahead != ending_char) {
176 if (consume_char('\\', lexer) && consume_char('z', lexer)) {
177 while (iswspace(lexer->lookahead)) {
178 consume(lexer);
179 }
180 continue;
181 };
182
183 if (lexer->lookahead == 0) {
184 return true;
185 }
186
187 consume(lexer);
188 }
189
190 return true;
191}
192
193bool tree_sitter_lua_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
194 if (valid_symbols[STRING_END] && scan_string_end(lexer)) {
195 reset_state();
196 lexer->result_symbol = STRING_END;
197 return true;
198 }
199
200 if (valid_symbols[STRING_CONTENT] && scan_string_content(lexer)) {
201 lexer->result_symbol = STRING_CONTENT;
202 return true;
203 }
204
205 if (valid_symbols[BLOCK_COMMENT_END] && ending_char == 0 && scan_block_end(lexer)) {
206 reset_state();
207 lexer->result_symbol = BLOCK_COMMENT_END;
208 return true;
209 }
210
211 if (valid_symbols[BLOCK_COMMENT_CONTENT] && scan_comment_content(lexer)) {
212 return true;
213 }
214
215 skip_whitespaces(lexer);
216
217 if (valid_symbols[STRING_START] && scan_string_start(lexer)) {
218 lexer->result_symbol = STRING_START;
219 return true;
220 }
221
222 if (valid_symbols[BLOCK_COMMENT_START]) {
223 if (scan_comment_start(lexer)) {
224 return true;
225 }
226 }
227
228 return false;
229}