1#include <stdio.h>
2#include "tree_sitter/parser.h"
3#include <wctype.h>
4
5enum TokenType {
6 BLOCK_COMMENT_START,
7 BLOCK_COMMENT_CONTENT,
8 BLOCK_COMMENT_END,
9
10 BLOCK_STRING_START,
11 BLOCK_STRING_CONTENT,
12 BLOCK_STRING_END,
13};
14
15static inline void consume(TSLexer *lexer) { lexer->advance(lexer, false); }
16
17static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
18
19static inline bool consume_char(char c, TSLexer *lexer) {
20 if (lexer->lookahead != c) {
21 return false;
22 }
23
24 consume(lexer);
25 return true;
26}
27
28static inline uint8_t consume_and_count_char(char c, TSLexer *lexer) {
29 uint8_t count = 0;
30 while (lexer->lookahead == c) {
31 ++count;
32 consume(lexer);
33 }
34 return count;
35}
36
37static inline void skip_whitespaces(TSLexer *lexer) {
38 while (iswspace(lexer->lookahead)) {
39 skip(lexer);
40 }
41}
42
43typedef struct {
44 char ending_char;
45 uint8_t level_count;
46} Scanner;
47
48static inline void reset_state(Scanner *scanner) {
49 scanner->ending_char = 0;
50 scanner->level_count = 0;
51}
52
53void *tree_sitter_lua_external_scanner_create() {
54 Scanner *scanner = calloc(1, sizeof(Scanner));
55 return scanner;
56}
57
58void tree_sitter_lua_external_scanner_destroy(void *payload) {
59 Scanner *scanner = (Scanner *)payload;
60 free(scanner);
61}
62
63unsigned tree_sitter_lua_external_scanner_serialize(void *payload, char *buffer) {
64 Scanner *scanner = (Scanner *)payload;
65 buffer[0] = scanner->ending_char;
66 buffer[1] = (char)scanner->level_count;
67 return 2;
68}
69
70void tree_sitter_lua_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
71 Scanner *scanner = (Scanner *)payload;
72 if (length == 0) return;
73 scanner->ending_char = buffer[0];
74 if (length == 1) return;
75 scanner->level_count = buffer[1];
76}
77
78static bool scan_block_start(Scanner *scanner, TSLexer *lexer) {
79 if (consume_char('[', lexer)) {
80 uint8_t level = consume_and_count_char('=', lexer);
81
82 if (consume_char('[', lexer)) {
83 scanner->level_count = level;
84 return true;
85 }
86 }
87
88 return false;
89}
90
91static bool scan_block_end(Scanner *scanner, TSLexer *lexer) {
92 if (consume_char(']', lexer)) {
93 uint8_t level = consume_and_count_char('=', lexer);
94
95 if (scanner->level_count == level && consume_char(']', lexer)) {
96 return true;
97 }
98 }
99
100 return false;
101}
102
103static bool scan_block_content(Scanner *scanner, TSLexer *lexer) {
104 while (lexer->lookahead != 0) {
105 if (lexer->lookahead == ']') {
106 lexer->mark_end(lexer);
107
108 if (scan_block_end(scanner, lexer)) {
109 return true;
110 }
111 } else {
112 consume(lexer);
113 }
114 }
115
116 return false;
117}
118
119static bool scan_comment_start(Scanner *scanner, TSLexer *lexer) {
120 if (consume_char('-', lexer) && consume_char('-', lexer)) {
121 lexer->mark_end(lexer);
122
123 if (scan_block_start(scanner, lexer)) {
124 lexer->mark_end(lexer);
125 lexer->result_symbol = BLOCK_COMMENT_START;
126 return true;
127 }
128 }
129
130 return false;
131}
132
133static bool scan_comment_content(Scanner *scanner, TSLexer *lexer) {
134 if (scanner->ending_char == 0) { // block comment
135 if (scan_block_content(scanner, lexer)) {
136 lexer->result_symbol = BLOCK_COMMENT_CONTENT;
137 return true;
138 }
139
140 return false;
141 }
142
143 while (lexer->lookahead != 0) {
144 if (lexer->lookahead == scanner->ending_char) {
145 reset_state(scanner);
146 lexer->result_symbol = BLOCK_COMMENT_CONTENT;
147 return true;
148 }
149
150 consume(lexer);
151 }
152
153 return false;
154}
155
156bool tree_sitter_lua_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
157 Scanner *scanner = (Scanner *)payload;
158
159 if (valid_symbols[BLOCK_STRING_END] && scan_block_end(scanner, lexer)) {
160 reset_state(scanner);
161 lexer->result_symbol = BLOCK_STRING_END;
162 return true;
163 }
164
165 if (valid_symbols[BLOCK_STRING_CONTENT] && scan_block_content(scanner, lexer)) {
166 lexer->result_symbol = BLOCK_STRING_CONTENT;
167 return true;
168 }
169
170 if (valid_symbols[BLOCK_COMMENT_END] && scanner->ending_char == 0 && scan_block_end(scanner, lexer)) {
171 reset_state(scanner);
172 lexer->result_symbol = BLOCK_COMMENT_END;
173 return true;
174 }
175
176 if (valid_symbols[BLOCK_COMMENT_CONTENT] && scan_comment_content(scanner, lexer)) {
177 return true;
178 }
179
180 skip_whitespaces(lexer);
181
182 if (valid_symbols[BLOCK_STRING_START] && scan_block_start(scanner, lexer)) {
183 lexer->result_symbol = BLOCK_STRING_START;
184 return true;
185 }
186
187 if (valid_symbols[BLOCK_COMMENT_START]) {
188 if (scan_comment_start(scanner, lexer)) {
189 return true;
190 }
191 }
192
193 return false;
194}