1#include "parser.h"
  2#include <wctype.h>
  3#include <stdio.h>
  4
  5enum TokenType {
  6  BLOCK_COMMENT_START,
  7  BLOCK_COMMENT_CONTENT,
  8  BLOCK_COMMENT_END,
  9
 10  STRING_START,
 11  STRING_CONTENT,
 12  STRING_END,
 13};
 14
 15static inline void consume(TSLexer *lexer) { lexer->advance(lexer, false); }
 16static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
 17
 18static inline bool consume_char(char c, TSLexer *lexer) {
 19  if (lexer->lookahead != c) {
 20    return false;
 21  }
 22
 23  consume(lexer);
 24  return true;
 25}
 26
 27static inline uint8_t consume_and_count_char(char c, TSLexer *lexer) {
 28  uint8_t count = 0;
 29  while (lexer->lookahead == c) {
 30    ++count;
 31    consume(lexer);
 32  }
 33  return count;
 34}
 35
 36static inline void skip_whitespaces(TSLexer *lexer) {
 37  while (iswspace(lexer->lookahead)) {
 38    skip(lexer);
 39  }
 40}
 41
 42void *tree_sitter_lua_external_scanner_create() { return NULL; }
 43void tree_sitter_lua_external_scanner_destroy(void *payload) {}
 44
 45char ending_char = 0;
 46uint8_t level_count = 0;
 47
 48static inline void reset_state() {
 49  ending_char = 0;
 50  level_count = 0;
 51}
 52
 53unsigned tree_sitter_lua_external_scanner_serialize(void *payload, char *buffer) {
 54  buffer[0] = ending_char;
 55  buffer[1] = level_count;
 56  return 2;
 57}
 58
 59void tree_sitter_lua_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
 60  if (length == 0) return;
 61  ending_char = buffer[0];
 62  if (length == 1) return;
 63  level_count = buffer[1];
 64}
 65
 66static bool scan_block_start(TSLexer *lexer) {
 67  if (consume_char('[', lexer)) {
 68    uint8_t level = consume_and_count_char('=', lexer);
 69
 70    if (consume_char('[', lexer)) {
 71      level_count = level;
 72      return true;
 73    }
 74  }
 75
 76  return false;
 77}
 78
 79static bool scan_block_end(TSLexer *lexer) {
 80  if (consume_char(']', lexer)) {
 81    uint8_t level = consume_and_count_char('=', lexer);
 82
 83    if (level_count == level && consume_char(']', lexer)) {
 84      return true;
 85    }
 86  }
 87
 88  return false;
 89}
 90
 91static bool scan_block_content(TSLexer *lexer) {
 92  while (lexer->lookahead != 0) {
 93    if (lexer->lookahead == ']') {
 94      lexer->mark_end(lexer);
 95
 96      if (scan_block_end(lexer)) {
 97        return true;
 98      }
 99    } else {
100      consume(lexer);
101    }
102  }
103
104  return false;
105}
106
107static bool scan_comment_start(TSLexer *lexer) {
108  if (consume_char('-', lexer) && consume_char('-', lexer)) {
109    lexer->mark_end(lexer);
110
111    if (scan_block_start(lexer)) {
112      lexer->mark_end(lexer);
113      lexer->result_symbol = BLOCK_COMMENT_START;
114      return true;
115    }
116  }
117
118  return false;
119}
120
121static bool scan_comment_content(TSLexer *lexer) {
122  if (ending_char == 0) { // block comment
123    if (scan_block_content(lexer)) {
124      lexer->result_symbol = BLOCK_COMMENT_CONTENT;
125      return true;
126    }
127
128    return false;
129  }
130
131  while (lexer->lookahead != 0) {
132    if (lexer->lookahead == ending_char) {
133      reset_state();
134      lexer->result_symbol = BLOCK_COMMENT_CONTENT;
135      return true;
136    }
137
138    consume(lexer);
139  }
140
141  return false;
142}
143
144static bool scan_string_start(TSLexer *lexer) {
145  if (lexer->lookahead == '"' || lexer->lookahead == '\'') {
146    ending_char = lexer->lookahead;
147    consume(lexer);
148    return true;
149  }
150
151  if (scan_block_start(lexer)) {
152    return true;
153  }
154
155  return false;
156}
157
158static bool scan_string_end(TSLexer *lexer) {
159  if (ending_char == 0) { // block string
160    return scan_block_end(lexer);
161  }
162
163  if (consume_char(ending_char, lexer)) {
164    return true;
165  }
166
167  return false;
168}
169
170static bool scan_string_content(TSLexer *lexer) {
171  if (ending_char == 0) { // block string
172    return scan_block_content(lexer);
173  }
174
175  while (lexer->lookahead != '\n' && lexer->lookahead != 0 && lexer->lookahead != ending_char) {
176    if (consume_char('\\', lexer) && consume_char('z', lexer)) {
177      while (iswspace(lexer->lookahead)) {
178        consume(lexer);
179      }
180      continue;
181    };
182
183    if (lexer->lookahead == 0) {
184      return true;
185    }
186
187    consume(lexer);
188  }
189
190  return true;
191}
192
193bool tree_sitter_lua_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
194  if (valid_symbols[STRING_END] && scan_string_end(lexer)) {
195    reset_state();
196    lexer->result_symbol = STRING_END;
197    return true;
198  }
199
200  if (valid_symbols[STRING_CONTENT] && scan_string_content(lexer)) {
201    lexer->result_symbol = STRING_CONTENT;
202    return true;
203  }
204
205  if (valid_symbols[BLOCK_COMMENT_END] && ending_char == 0 && scan_block_end(lexer)) {
206    reset_state();
207    lexer->result_symbol = BLOCK_COMMENT_END;
208    return true;
209  }
210
211  if (valid_symbols[BLOCK_COMMENT_CONTENT] && scan_comment_content(lexer)) {
212    return true;
213  }
214
215  skip_whitespaces(lexer);
216
217  if (valid_symbols[STRING_START] && scan_string_start(lexer)) {
218    lexer->result_symbol = STRING_START;
219    return true;
220  }
221
222  if (valid_symbols[BLOCK_COMMENT_START]) {
223    if (scan_comment_start(lexer)) {
224      return true;
225    }
226  }
227
228  return false;
229}