1#include "tree_sitter/parser.h"
2
3#include <ctype.h>
4#include <stdio.h>
5#include <string.h>
6#include <wctype.h>
7
8enum {
9 NEWLINE,
10 BACKSLASH,
11 NL_COMMA,
12 FLOAT,
13 BLOCK_COMMENT,
14 BRACKET,
15 QUOTE,
16};
17
18static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
19
20static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
21
22void *tree_sitter_odin_external_scanner_create() { return NULL; }
23
24unsigned tree_sitter_odin_external_scanner_serialize(void *payload, char *buffer) { return 0; }
25
26void tree_sitter_odin_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {}
27
28bool tree_sitter_odin_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
29 if (valid_symbols[FLOAT]) {
30 while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
31 skip(lexer);
32 }
33
34 if (!valid_symbols[NEWLINE]) { // skip newlines too
35 while (iswspace(lexer->lookahead)) {
36 skip(lexer);
37 }
38 }
39
40 // basically, -? [0-9]+ \. [0-9]*, BUT a second . after isnt allowed
41 // cuz it could be ..< operator,
42 // it can have an i at the end for imaginary numbers
43 // and exponents, [eE][+-]?[0-9]+, imaginary comes after
44
45 // needs ONE of these two to be float
46 bool found_decimal = false;
47 bool found_exponent = false;
48 bool found_number_before_decimal = false;
49 bool found_number_after_decimal = false;
50 bool found_number_after_expontent = false;
51 for (int i = 0;; i++) {
52 switch (lexer->lookahead) {
53 case '.':
54 if ((found_decimal || found_exponent) &&
55 (found_number_after_decimal || found_number_before_decimal)) {
56 lexer->result_symbol = FLOAT;
57 lexer->mark_end(lexer);
58 return true;
59 } else {
60 lexer->mark_end(lexer);
61 found_decimal = true;
62 advance(lexer);
63 if (lexer->lookahead == '.') {
64 advance(lexer);
65 goto newline;
66 }
67 lexer->mark_end(lexer);
68 if (!isdigit(lexer->lookahead) && (found_number_after_decimal || found_number_before_decimal)) {
69 lexer->result_symbol = FLOAT;
70 return true;
71 }
72 }
73 break;
74 case 'i':
75 case 'j':
76 case 'k':
77 if (!found_number_after_decimal) {
78 goto newline;
79 }
80 if ((found_decimal || found_exponent) &&
81 (found_number_after_decimal || found_number_before_decimal)) {
82 advance(lexer);
83 lexer->result_symbol = FLOAT;
84 lexer->mark_end(lexer);
85 return true;
86 }
87 goto newline;
88 case 'e':
89 case 'E':
90 if ((found_exponent) && (found_number_after_decimal || found_number_before_decimal)) {
91 lexer->result_symbol = FLOAT;
92 lexer->mark_end(lexer);
93 return true;
94 } else if (found_number_before_decimal || found_number_after_decimal) {
95 found_exponent = true;
96 advance(lexer);
97 } else {
98 goto newline;
99 }
100 break;
101 case '+':
102 case '-':
103 if (i == 0 || (found_exponent && !found_number_after_expontent)) {
104 advance(lexer);
105 } else {
106 goto newline;
107 }
108 break;
109 default:
110 if (lexer->lookahead <= 255 && isdigit(lexer->lookahead)) {
111 advance(lexer);
112 if (found_decimal) {
113 found_number_after_decimal = true;
114 } else {
115 found_number_before_decimal = true;
116 }
117 if (found_exponent && !found_number_after_expontent) {
118 found_number_after_expontent = true;
119 }
120 } else {
121 if ((found_decimal || found_exponent) &&
122 (found_number_after_decimal || found_number_before_decimal)) {
123 lexer->result_symbol = FLOAT;
124 lexer->mark_end(lexer);
125 return true;
126 }
127 if (found_number_before_decimal) {
128 return false; // number needs to match
129 }
130 goto newline;
131 }
132 }
133 }
134 }
135
136 if (valid_symbols[NL_COMMA]) {
137 while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
138 skip(lexer);
139 }
140
141 if (lexer->lookahead == ',') {
142 advance(lexer);
143 lexer->result_symbol = NL_COMMA;
144 lexer->mark_end(lexer);
145 while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
146 advance(lexer);
147 }
148
149 if (lexer->lookahead == '\n') {
150 while (iswspace(lexer->lookahead)) {
151 advance(lexer);
152 }
153 return lexer->lookahead != '}';
154 }
155 }
156 }
157
158newline:
159 if (valid_symbols[NEWLINE]) {
160 while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
161 skip(lexer);
162 }
163
164 if (lexer->lookahead == '\n') {
165 advance(lexer);
166 lexer->result_symbol = NEWLINE;
167 lexer->mark_end(lexer);
168
169 uint32_t nl_count = 0;
170
171 while (iswspace(lexer->lookahead)) {
172 if (lexer->lookahead == '\n') {
173 nl_count++;
174 }
175 skip(lexer);
176 }
177
178 const char *where = "where";
179 const char *_else = "else";
180 const char *bracket = "{";
181
182 char next_word[6] = {0};
183
184 // check for where and _else
185
186 for (int i = 0; i < 5; i++) {
187 if (iswspace(lexer->lookahead)) {
188 break;
189 }
190 next_word[i] = (char)lexer->lookahead;
191 advance(lexer);
192 }
193
194 if (strcmp(next_word, where) == 0 || strcmp(next_word, _else) == 0) {
195 if (!iswspace(lexer->lookahead)) {
196 return true;
197 }
198 goto backslash;
199 }
200
201 if (strcmp(next_word, bracket) == 0 && nl_count == 0 && valid_symbols[BRACKET]) {
202 return false;
203 }
204
205 return true;
206 }
207 // if (lexer->lookahead == ';') {
208 // advance(lexer);
209 // lexer->result_symbol = SEPARATOR;
210 // lexer->mark_end(lexer);
211 // while (iswspace(lexer->lookahead)) {
212 // advance(lexer);
213 // }
214 // return true;
215 // }
216 }
217
218backslash:
219 if (valid_symbols[BACKSLASH] && lexer->lookahead == '\\') {
220 advance(lexer);
221 if (lexer->lookahead == '\n') {
222 advance(lexer);
223 while (iswspace(lexer->lookahead)) {
224 advance(lexer);
225 }
226 lexer->result_symbol = BACKSLASH;
227 return true;
228 }
229 }
230
231 while (iswspace(lexer->lookahead)) {
232 skip(lexer);
233 }
234
235 if (valid_symbols[BLOCK_COMMENT] && lexer->lookahead == '/') {
236 advance(lexer);
237 if (lexer->lookahead != '*') {
238 return false;
239 }
240 advance(lexer);
241
242 if (lexer->lookahead == '"') {
243 return false;
244 }
245
246 bool after_star = false;
247 unsigned nesting_depth = 1;
248 for (;;) {
249 switch (lexer->lookahead) {
250 case '\0':
251 return false;
252 case '*':
253 advance(lexer);
254 after_star = true;
255 break;
256 case '/':
257 if (after_star) {
258 advance(lexer);
259 after_star = false;
260 nesting_depth--;
261 if (nesting_depth == 0) {
262 lexer->result_symbol = BLOCK_COMMENT;
263 return true;
264 }
265 } else {
266 advance(lexer);
267 after_star = false;
268 if (lexer->lookahead == '*') {
269 nesting_depth++;
270 advance(lexer);
271 }
272 }
273 break;
274 default:
275 advance(lexer);
276 after_star = false;
277 break;
278 }
279 }
280 }
281
282 return false;
283}
284
285void tree_sitter_odin_external_scanner_destroy(void *payload) {}