1#include <stdio.h>
  2#include "tree_sitter/parser.h"
  3#include <wctype.h>
  4
  5enum TokenType {
  6  BLOCK_COMMENT_START,
  7  BLOCK_COMMENT_CONTENT,
  8  BLOCK_COMMENT_END,
  9
 10  BLOCK_STRING_START,
 11  BLOCK_STRING_CONTENT,
 12  BLOCK_STRING_END,
 13};
 14
 15static inline void consume(TSLexer *lexer) { lexer->advance(lexer, false); }
 16
 17static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
 18
 19static inline bool consume_char(char c, TSLexer *lexer) {
 20  if (lexer->lookahead != c) {
 21    return false;
 22  }
 23
 24  consume(lexer);
 25  return true;
 26}
 27
 28static inline uint8_t consume_and_count_char(char c, TSLexer *lexer) {
 29  uint8_t count = 0;
 30  while (lexer->lookahead == c) {
 31    ++count;
 32    consume(lexer);
 33  }
 34  return count;
 35}
 36
 37static inline void skip_whitespaces(TSLexer *lexer) {
 38  while (iswspace(lexer->lookahead)) {
 39    skip(lexer);
 40  }
 41}
 42
 43typedef struct {
 44  char ending_char;
 45  uint8_t level_count;
 46} Scanner;
 47
 48static inline void reset_state(Scanner *scanner) {
 49  scanner->ending_char = 0;
 50  scanner->level_count = 0;
 51}
 52
 53void *tree_sitter_lua_external_scanner_create() {
 54  Scanner *scanner = calloc(1, sizeof(Scanner));
 55  return scanner;
 56}
 57
 58void tree_sitter_lua_external_scanner_destroy(void *payload) {
 59  Scanner *scanner = (Scanner *)payload;
 60  free(scanner);
 61}
 62
 63unsigned tree_sitter_lua_external_scanner_serialize(void *payload, char *buffer) {
 64  Scanner *scanner = (Scanner *)payload;
 65  buffer[0] = scanner->ending_char;
 66  buffer[1] = (char)scanner->level_count;
 67  return 2;
 68}
 69
 70void tree_sitter_lua_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
 71  Scanner *scanner = (Scanner *)payload;
 72  if (length == 0) return;
 73  scanner->ending_char = buffer[0];
 74  if (length == 1) return;
 75  scanner->level_count = buffer[1];
 76}
 77
 78static bool scan_block_start(Scanner *scanner, TSLexer *lexer) {
 79  if (consume_char('[', lexer)) {
 80    uint8_t level = consume_and_count_char('=', lexer);
 81
 82    if (consume_char('[', lexer)) {
 83      scanner->level_count = level;
 84      return true;
 85    }
 86  }
 87
 88  return false;
 89}
 90
 91static bool scan_block_end(Scanner *scanner, TSLexer *lexer) {
 92  if (consume_char(']', lexer)) {
 93    uint8_t level = consume_and_count_char('=', lexer);
 94
 95    if (scanner->level_count == level && consume_char(']', lexer)) {
 96      return true;
 97    }
 98  }
 99
100  return false;
101}
102
103static bool scan_block_content(Scanner *scanner, TSLexer *lexer) {
104  while (lexer->lookahead != 0) {
105    if (lexer->lookahead == ']') {
106      lexer->mark_end(lexer);
107
108      if (scan_block_end(scanner, lexer)) {
109        return true;
110      }
111    } else {
112      consume(lexer);
113    }
114  }
115
116  return false;
117}
118
119static bool scan_comment_start(Scanner *scanner, TSLexer *lexer) {
120  if (consume_char('-', lexer) && consume_char('-', lexer)) {
121    lexer->mark_end(lexer);
122
123    if (scan_block_start(scanner, lexer)) {
124      lexer->mark_end(lexer);
125      lexer->result_symbol = BLOCK_COMMENT_START;
126      return true;
127    }
128  }
129
130  return false;
131}
132
133static bool scan_comment_content(Scanner *scanner, TSLexer *lexer) {
134  if (scanner->ending_char == 0) { // block comment
135    if (scan_block_content(scanner, lexer)) {
136      lexer->result_symbol = BLOCK_COMMENT_CONTENT;
137      return true;
138    }
139
140    return false;
141  }
142
143  while (lexer->lookahead != 0) {
144    if (lexer->lookahead == scanner->ending_char) {
145      reset_state(scanner);
146      lexer->result_symbol = BLOCK_COMMENT_CONTENT;
147      return true;
148    }
149
150    consume(lexer);
151  }
152
153  return false;
154}
155
156bool tree_sitter_lua_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
157  Scanner *scanner = (Scanner *)payload;
158
159  if (valid_symbols[BLOCK_STRING_END] && scan_block_end(scanner, lexer)) {
160    reset_state(scanner);
161    lexer->result_symbol = BLOCK_STRING_END;
162    return true;
163  }
164
165  if (valid_symbols[BLOCK_STRING_CONTENT] && scan_block_content(scanner, lexer)) {
166    lexer->result_symbol = BLOCK_STRING_CONTENT;
167    return true;
168  }
169
170  if (valid_symbols[BLOCK_COMMENT_END] && scanner->ending_char == 0 && scan_block_end(scanner, lexer)) {
171    reset_state(scanner);
172    lexer->result_symbol = BLOCK_COMMENT_END;
173    return true;
174  }
175
176  if (valid_symbols[BLOCK_COMMENT_CONTENT] && scan_comment_content(scanner, lexer)) {
177    return true;
178  }
179
180  skip_whitespaces(lexer);
181
182  if (valid_symbols[BLOCK_STRING_START] && scan_block_start(scanner, lexer)) {
183    lexer->result_symbol = BLOCK_STRING_START;
184    return true;
185  }
186
187  if (valid_symbols[BLOCK_COMMENT_START]) {
188    if (scan_comment_start(scanner, lexer)) {
189      return true;
190    }
191  }
192
193  return false;
194}