1 files changed, 419 insertions, 0 deletions
diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/lexer.c b/vendor/github.com/mitjafelicijan/go-tree-sitter/lexer.c
new file mode 100644
index 0000000..d108c04
--- /dev/null
+++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/lexer.c
@@ -0,0 +1,419 @@
+#include <stdio.h>
+#include "./lexer.h"
+#include "./subtree.h"
+#include "./length.h"
+#include "./unicode.h"
+#define LOG(message, character)              \
+  if (self->logger.log) {                    \
+    snprintf(                                \
+      self->debug_buffer,                    \
+      TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
+      32 <= character && character < 127 ?   \
+        message " character:'%c'" :          \
+        message " character:%d",             \
+      character                              \
+    );                                       \
+    self->logger.log(                        \
+      self->logger.payload,                  \
+      TSLogTypeLex,                          \
+      self->debug_buffer                     \
+    );                                       \
+  }
+static const int32_t BYTE_ORDER_MARK = 0xFEFF;
+static const TSRange DEFAULT_RANGE = {
+  .start_point = {
+    .row = 0,
+    .column = 0,
+  },
+  .end_point = {
+    .row = UINT32_MAX,
+    .column = UINT32_MAX,
+  },
+  .start_byte = 0,
+  .end_byte = UINT32_MAX
+};
+// Check if the lexer has reached EOF. This state is stored
+// by setting the lexer's `current_included_range_index` such that
+// it has consumed all of its available ranges.
+static bool ts_lexer__eof(const TSLexer *_self) {
+  Lexer *self = (Lexer *)_self;
+  return self->current_included_range_index == self->included_range_count;
+}
+// Clear the currently stored chunk of source code, because the lexer's
+// position has changed.
+static void ts_lexer__clear_chunk(Lexer *self) {
+  self->chunk = NULL;
+  self->chunk_size = 0;
+  self->chunk_start = 0;
+}
+// Call the lexer's input callback to obtain a new chunk of source code
+// for the current position.
+static void ts_lexer__get_chunk(Lexer *self) {
+  self->chunk_start = self->current_position.bytes;
+  self->chunk = self->input.read(
+    self->input.payload,
+    self->current_position.bytes,
+    self->current_position.extent,
+    &self->chunk_size
+  );
+  if (!self->chunk_size) {
+    self->current_included_range_index = self->included_range_count;
+    self->chunk = NULL;
+  }
+}
+// Decode the next unicode character in the current chunk of source code.
+// This assumes that the lexer has already retrieved a chunk of source
+// code that spans the current position.
+static void ts_lexer__get_lookahead(Lexer *self) {
+  uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
+  uint32_t size = self->chunk_size - position_in_chunk;
+  if (size == 0) {
+    self->lookahead_size = 1;
+    self->data.lookahead = '\0';
+    return;
+  }
+  const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
+  UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8
+    ? ts_decode_utf8
+    : ts_decode_utf16;
+  self->lookahead_size = decode(chunk, size, &self->data.lookahead);
+  // If this chunk ended in the middle of a multi-byte character,
+  // try again with a fresh chunk.
+  if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
+    ts_lexer__get_chunk(self);
+    chunk = (const uint8_t *)self->chunk;
+    size = self->chunk_size;
+    self->lookahead_size = decode(chunk, size, &self->data.lookahead);
+  }
+  if (self->data.lookahead == TS_DECODE_ERROR) {
+    self->lookahead_size = 1;
+  }
+}
+static void ts_lexer_goto(Lexer *self, Length position) {
+  self->current_position = position;
+  // Move to the first valid position at or after the given position.
+  bool found_included_range = false;
+  for (unsigned i = 0; i < self->included_range_count; i++) {
+    TSRange *included_range = &self->included_ranges[i];
+    if (
+      included_range->end_byte > self->current_position.bytes &&
+      included_range->end_byte > included_range->start_byte
+    ) {
+      if (included_range->start_byte >= self->current_position.bytes) {
+        self->current_position = (Length) {
+          .bytes = included_range->start_byte,
+          .extent = included_range->start_point,
+        };
+      }
+      self->current_included_range_index = i;
+      found_included_range = true;
+      break;
+    }
+  }
+  if (found_included_range) {
+    // If the current position is outside of the current chunk of text,
+    // then clear out the current chunk of text.
+    if (self->chunk && (
+      self->current_position.bytes < self->chunk_start ||
+      self->current_position.bytes >= self->chunk_start + self->chunk_size
+    )) {
+      ts_lexer__clear_chunk(self);
+    }
+    self->lookahead_size = 0;
+    self->data.lookahead = '\0';
+  }
+  // If the given position is beyond any of included ranges, move to the EOF
+  // state - past the end of the included ranges.
+  else {
+    self->current_included_range_index = self->included_range_count;
+    TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
+    self->current_position = (Length) {
+      .bytes = last_included_range->end_byte,
+      .extent = last_included_range->end_point,
+    };
+    ts_lexer__clear_chunk(self);
+    self->lookahead_size = 1;
+    self->data.lookahead = '\0';
+  }
+}
+// Intended to be called only from functions that control logging.
+static void ts_lexer__do_advance(Lexer *self, bool skip) {
+  if (self->lookahead_size) {
+    self->current_position.bytes += self->lookahead_size;
+    if (self->data.lookahead == '\n') {
+      self->current_position.extent.row++;
+      self->current_position.extent.column = 0;
+    } else {
+      self->current_position.extent.column += self->lookahead_size;
+    }
+  }
+  const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
+  while (
+    self->current_position.bytes >= current_range->end_byte ||
+    current_range->end_byte == current_range->start_byte
+  ) {
+    if (self->current_included_range_index < self->included_range_count) {
+      self->current_included_range_index++;
+    }
+    if (self->current_included_range_index < self->included_range_count) {
+      current_range++;
+      self->current_position = (Length) {
+        current_range->start_byte,
+        current_range->start_point,
+      };
+    } else {
+      current_range = NULL;
+      break;
+    }
+  }
+  if (skip) self->token_start_position = self->current_position;
+  if (current_range) {
+    if (
+      self->current_position.bytes < self->chunk_start ||
+      self->current_position.bytes >= self->chunk_start + self->chunk_size
+    ) {
+      ts_lexer__get_chunk(self);
+    }
+    ts_lexer__get_lookahead(self);
+  } else {
+    ts_lexer__clear_chunk(self);
+    self->data.lookahead = '\0';
+    self->lookahead_size = 1;
+  }
+}
+// Advance to the next character in the source code, retrieving a new
+// chunk of source code if needed.
+static void ts_lexer__advance(TSLexer *_self, bool skip) {
+  Lexer *self = (Lexer *)_self;
+  if (!self->chunk) return;
+  if (skip) {
+    LOG("skip", self->data.lookahead)
+  } else {
+    LOG("consume", self->data.lookahead)
+  }
+  ts_lexer__do_advance(self, skip);
+}
+// Mark that a token match has completed. This can be called multiple
+// times if a longer match is found later.
+static void ts_lexer__mark_end(TSLexer *_self) {
+  Lexer *self = (Lexer *)_self;
+  if (!ts_lexer__eof(&self->data)) {
+    // If the lexer is right at the beginning of included range,
+    // then the token should be considered to end at the *end* of the
+    // previous included range, rather than here.
+    TSRange *current_included_range = &self->included_ranges[
+      self->current_included_range_index
+    ];
+    if (
+      self->current_included_range_index > 0 &&
+      self->current_position.bytes == current_included_range->start_byte
+    ) {
+      TSRange *previous_included_range = current_included_range - 1;
+      self->token_end_position = (Length) {
+        previous_included_range->end_byte,
+        previous_included_range->end_point,
+      };
+      return;
+    }
+  }
+  self->token_end_position = self->current_position;
+}
+static uint32_t ts_lexer__get_column(TSLexer *_self) {
+  Lexer *self = (Lexer *)_self;
+  uint32_t goal_byte = self->current_position.bytes;
+  self->did_get_column = true;
+  self->current_position.bytes -= self->current_position.extent.column;
+  self->current_position.extent.column = 0;
+  if (self->current_position.bytes < self->chunk_start) {
+    ts_lexer__get_chunk(self);
+  }
+  uint32_t result = 0;
+  if (!ts_lexer__eof(_self)) {
+    ts_lexer__get_lookahead(self);
+    while (self->current_position.bytes < goal_byte && self->chunk) {
+      result++;
+      ts_lexer__do_advance(self, false);
+      if (ts_lexer__eof(_self)) break;
+    }
+  }
+  return result;
+}
+// Is the lexer at a boundary between two disjoint included ranges of
+// source code? This is exposed as an API because some languages' external
+// scanners need to perform custom actions at these boundaries.
+static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
+  const Lexer *self = (const Lexer *)_self;
+  if (self->current_included_range_index < self->included_range_count) {
+    TSRange *current_range = &self->included_ranges[self->current_included_range_index];
+    return self->current_position.bytes == current_range->start_byte;
+  } else {
+    return false;
+  }
+}
+void ts_lexer_init(Lexer *self) {
+  *self = (Lexer) {
+    .data = {
+      // The lexer's methods are stored as struct fields so that generated
+      // parsers can call them without needing to be linked against this
+      // library.
+      .advance = ts_lexer__advance,
+      .mark_end = ts_lexer__mark_end,
+      .get_column = ts_lexer__get_column,
+      .is_at_included_range_start = ts_lexer__is_at_included_range_start,
+      .eof = ts_lexer__eof,
+      .lookahead = 0,
+      .result_symbol = 0,
+    },
+    .chunk = NULL,
+    .chunk_size = 0,
+    .chunk_start = 0,
+    .current_position = {0, {0, 0}},
+    .logger = {
+      .payload = NULL,
+      .log = NULL
+    },
+    .included_ranges = NULL,
+    .included_range_count = 0,
+    .current_included_range_index = 0,
+  };
+  ts_lexer_set_included_ranges(self, NULL, 0);
+}
+void ts_lexer_delete(Lexer *self) {
+  ts_free(self->included_ranges);
+}
+void ts_lexer_set_input(Lexer *self, TSInput input) {
+  self->input = input;
+  ts_lexer__clear_chunk(self);
+  ts_lexer_goto(self, self->current_position);
+}
+// Move the lexer to the given position. This doesn't do any work
+// if the parser is already at the given position.
+void ts_lexer_reset(Lexer *self, Length position) {
+  if (position.bytes != self->current_position.bytes) {
+    ts_lexer_goto(self, position);
+  }
+}
+void ts_lexer_start(Lexer *self) {
+  self->token_start_position = self->current_position;
+  self->token_end_position = LENGTH_UNDEFINED;
+  self->data.result_symbol = 0;
+  self->did_get_column = false;
+  if (!ts_lexer__eof(&self->data)) {
+    if (!self->chunk_size) ts_lexer__get_chunk(self);
+    if (!self->lookahead_size) ts_lexer__get_lookahead(self);
+    if (
+      self->current_position.bytes == 0 &&
+      self->data.lookahead == BYTE_ORDER_MARK
+    ) ts_lexer__advance(&self->data, true);
+  }
+}
+void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
+  if (length_is_undefined(self->token_end_position)) {
+    ts_lexer__mark_end(&self->data);
+  }
+  // If the token ended at an included range boundary, then its end position
+  // will have been reset to the end of the preceding range. Reset the start
+  // position to match.
+  if (self->token_end_position.bytes < self->token_start_position.bytes) {
+    self->token_start_position = self->token_end_position;
+  }
+  uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
+  // In order to determine that a byte sequence is invalid UTF8 or UTF16,
+  // the character decoding algorithm may have looked at the following byte.
+  // Therefore, the next byte *after* the current (invalid) character
+  // affects the interpretation of the current character.
+  if (self->data.lookahead == TS_DECODE_ERROR) {
+    current_lookahead_end_byte++;
+  }
+  if (current_lookahead_end_byte > *lookahead_end_byte) {
+    *lookahead_end_byte = current_lookahead_end_byte;
+  }
+}
+void ts_lexer_advance_to_end(Lexer *self) {
+  while (self->chunk) {
+    ts_lexer__advance(&self->data, false);
+  }
+}
+void ts_lexer_mark_end(Lexer *self) {
+  ts_lexer__mark_end(&self->data);
+}
+bool ts_lexer_set_included_ranges(
+  Lexer *self,
+  const TSRange *ranges,
+  uint32_t count
+) {
+  if (count == 0 || !ranges) {
+    ranges = &DEFAULT_RANGE;
+    count = 1;
+  } else {
+    uint32_t previous_byte = 0;
+    for (unsigned i = 0; i < count; i++) {
+      const TSRange *range = &ranges[i];
+      if (
+        range->start_byte < previous_byte ||
+        range->end_byte < range->start_byte
+      ) return false;
+      previous_byte = range->end_byte;
+    }
+  }
+  size_t size = count * sizeof(TSRange);
+  self->included_ranges = ts_realloc(self->included_ranges, size);
+  memcpy(self->included_ranges, ranges, size);
+  self->included_range_count = count;
+  ts_lexer_goto(self, self->current_position);
+  return true;
+}
+TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
+  *count = self->included_range_count;
+  return self->included_ranges;
+}
+#undef LOG

diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/lexer.c b/vendor/github.com/mitjafelicijan/go-tree-sitter/lexer.c new file mode 100644 index 0000000..d108c04 --- /dev/null +++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/lexer.c
@@ -0,0 +1,419 @@
	1	#include <stdio.h>
	2	#include "./lexer.h"
	3	#include "./subtree.h"
	4	#include "./length.h"
	5	#include "./unicode.h"
	6
	7	#define LOG(message, character) \
	8	if (self->logger.log) { \
	9	snprintf( \
	10	self->debug_buffer, \
	11	TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
	12	32 <= character && character < 127 ? \
	13	message " character:'%c'" : \
	14	message " character:%d", \
	15	character \
	16	); \
	17	self->logger.log( \
	18	self->logger.payload, \
	19	TSLogTypeLex, \
	20	self->debug_buffer \
	21	); \
	22	}
	23
	24	static const int32_t BYTE_ORDER_MARK = 0xFEFF;
	25
	26	static const TSRange DEFAULT_RANGE = {
	27	.start_point = {
	28	.row = 0,
	29	.column = 0,
	30	},
	31	.end_point = {
	32	.row = UINT32_MAX,
	33	.column = UINT32_MAX,
	34	},
	35	.start_byte = 0,
	36	.end_byte = UINT32_MAX
	37	};
	38
	39	// Check if the lexer has reached EOF. This state is stored
	40	// by setting the lexer's `current_included_range_index` such that
	41	// it has consumed all of its available ranges.
	42	static bool ts_lexer__eof(const TSLexer *_self) {
	43	Lexer self = (Lexer )_self;
	44	return self->current_included_range_index == self->included_range_count;
	45	}
	46
	47	// Clear the currently stored chunk of source code, because the lexer's
	48	// position has changed.
	49	static void ts_lexer__clear_chunk(Lexer *self) {
	50	self->chunk = NULL;
	51	self->chunk_size = 0;
	52	self->chunk_start = 0;
	53	}
	54
	55	// Call the lexer's input callback to obtain a new chunk of source code
	56	// for the current position.
	57	static void ts_lexer__get_chunk(Lexer *self) {
	58	self->chunk_start = self->current_position.bytes;
	59	self->chunk = self->input.read(
	60	self->input.payload,
	61	self->current_position.bytes,
	62	self->current_position.extent,
	63	&self->chunk_size
	64	);
	65	if (!self->chunk_size) {
	66	self->current_included_range_index = self->included_range_count;
	67	self->chunk = NULL;
	68	}
	69	}
	70
	71	// Decode the next unicode character in the current chunk of source code.
	72	// This assumes that the lexer has already retrieved a chunk of source
	73	// code that spans the current position.
	74	static void ts_lexer__get_lookahead(Lexer *self) {
	75	uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
	76	uint32_t size = self->chunk_size - position_in_chunk;
	77
	78	if (size == 0) {
	79	self->lookahead_size = 1;
	80	self->data.lookahead = '\0';
	81	return;
	82	}
	83
	84	const uint8_t chunk = (const uint8_t )self->chunk + position_in_chunk;
	85	UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8
	86	? ts_decode_utf8
	87	: ts_decode_utf16;
	88
	89	self->lookahead_size = decode(chunk, size, &self->data.lookahead);
	90
	91	// If this chunk ended in the middle of a multi-byte character,
	92	// try again with a fresh chunk.
	93	if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
	94	ts_lexer__get_chunk(self);
	95	chunk = (const uint8_t *)self->chunk;
	96	size = self->chunk_size;
	97	self->lookahead_size = decode(chunk, size, &self->data.lookahead);
	98	}
	99
	100	if (self->data.lookahead == TS_DECODE_ERROR) {
	101	self->lookahead_size = 1;
	102	}
	103	}
	104
	105	static void ts_lexer_goto(Lexer *self, Length position) {
	106	self->current_position = position;
	107
	108	// Move to the first valid position at or after the given position.
	109	bool found_included_range = false;
	110	for (unsigned i = 0; i < self->included_range_count; i++) {
	111	TSRange *included_range = &self->included_ranges[i];
	112	if (
	113	included_range->end_byte > self->current_position.bytes &&
	114	included_range->end_byte > included_range->start_byte
	115	) {
	116	if (included_range->start_byte >= self->current_position.bytes) {
	117	self->current_position = (Length) {
	118	.bytes = included_range->start_byte,
	119	.extent = included_range->start_point,
	120	};
	121	}
	122
	123	self->current_included_range_index = i;
	124	found_included_range = true;
	125	break;
	126	}
	127	}
	128
	129	if (found_included_range) {
	130	// If the current position is outside of the current chunk of text,
	131	// then clear out the current chunk of text.
	132	if (self->chunk && (
	133	self->current_position.bytes < self->chunk_start \|\|
	134	self->current_position.bytes >= self->chunk_start + self->chunk_size
	135	)) {
	136	ts_lexer__clear_chunk(self);
	137	}
	138
	139	self->lookahead_size = 0;
	140	self->data.lookahead = '\0';
	141	}
	142
	143	// If the given position is beyond any of included ranges, move to the EOF
	144	// state - past the end of the included ranges.
	145	else {
	146	self->current_included_range_index = self->included_range_count;
	147	TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
	148	self->current_position = (Length) {
	149	.bytes = last_included_range->end_byte,
	150	.extent = last_included_range->end_point,
	151	};
	152	ts_lexer__clear_chunk(self);
	153	self->lookahead_size = 1;
	154	self->data.lookahead = '\0';
	155	}
	156	}
	157
	158	// Intended to be called only from functions that control logging.
	159	static void ts_lexer__do_advance(Lexer *self, bool skip) {
	160	if (self->lookahead_size) {
	161	self->current_position.bytes += self->lookahead_size;
	162	if (self->data.lookahead == '\n') {
	163	self->current_position.extent.row++;
	164	self->current_position.extent.column = 0;
	165	} else {
	166	self->current_position.extent.column += self->lookahead_size;
	167	}
	168	}
	169
	170	const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
	171	while (
	172	self->current_position.bytes >= current_range->end_byte \|\|
	173	current_range->end_byte == current_range->start_byte
	174	) {
	175	if (self->current_included_range_index < self->included_range_count) {
	176	self->current_included_range_index++;
	177	}
	178	if (self->current_included_range_index < self->included_range_count) {
	179	current_range++;
	180	self->current_position = (Length) {
	181	current_range->start_byte,
	182	current_range->start_point,
	183	};
	184	} else {
	185	current_range = NULL;
	186	break;
	187	}
	188	}
	189
	190	if (skip) self->token_start_position = self->current_position;
	191
	192	if (current_range) {
	193	if (
	194	self->current_position.bytes < self->chunk_start \|\|
	195	self->current_position.bytes >= self->chunk_start + self->chunk_size
	196	) {
	197	ts_lexer__get_chunk(self);
	198	}
	199	ts_lexer__get_lookahead(self);
	200	} else {
	201	ts_lexer__clear_chunk(self);
	202	self->data.lookahead = '\0';
	203	self->lookahead_size = 1;
	204	}
	205	}
	206
	207	// Advance to the next character in the source code, retrieving a new
	208	// chunk of source code if needed.
	209	static void ts_lexer__advance(TSLexer *_self, bool skip) {
	210	Lexer self = (Lexer )_self;
	211	if (!self->chunk) return;
	212
	213	if (skip) {
	214	LOG("skip", self->data.lookahead)
	215	} else {
	216	LOG("consume", self->data.lookahead)
	217	}
	218
	219	ts_lexer__do_advance(self, skip);
	220	}
	221
	222	// Mark that a token match has completed. This can be called multiple
	223	// times if a longer match is found later.
	224	static void ts_lexer__mark_end(TSLexer *_self) {
	225	Lexer self = (Lexer )_self;
	226	if (!ts_lexer__eof(&self->data)) {
	227	// If the lexer is right at the beginning of included range,
	228	// then the token should be considered to end at the end of the
	229	// previous included range, rather than here.
	230	TSRange *current_included_range = &self->included_ranges[
	231	self->current_included_range_index
	232	];
	233	if (
	234	self->current_included_range_index > 0 &&
	235	self->current_position.bytes == current_included_range->start_byte
	236	) {
	237	TSRange *previous_included_range = current_included_range - 1;
	238	self->token_end_position = (Length) {
	239	previous_included_range->end_byte,
	240	previous_included_range->end_point,
	241	};
	242	return;
	243	}
	244	}
	245	self->token_end_position = self->current_position;
	246	}
	247
	248	static uint32_t ts_lexer__get_column(TSLexer *_self) {
	249	Lexer self = (Lexer )_self;
	250
	251	uint32_t goal_byte = self->current_position.bytes;
	252
	253	self->did_get_column = true;
	254	self->current_position.bytes -= self->current_position.extent.column;
	255	self->current_position.extent.column = 0;
	256
	257	if (self->current_position.bytes < self->chunk_start) {
	258	ts_lexer__get_chunk(self);
	259	}
	260
	261	uint32_t result = 0;
	262	if (!ts_lexer__eof(_self)) {
	263	ts_lexer__get_lookahead(self);
	264	while (self->current_position.bytes < goal_byte && self->chunk) {
	265	result++;
	266	ts_lexer__do_advance(self, false);
	267	if (ts_lexer__eof(_self)) break;
	268	}
	269	}
	270
	271	return result;
	272	}
	273
	274	// Is the lexer at a boundary between two disjoint included ranges of
	275	// source code? This is exposed as an API because some languages' external
	276	// scanners need to perform custom actions at these boundaries.
	277	static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
	278	const Lexer self = (const Lexer )_self;
	279	if (self->current_included_range_index < self->included_range_count) {
	280	TSRange *current_range = &self->included_ranges[self->current_included_range_index];
	281	return self->current_position.bytes == current_range->start_byte;
	282	} else {
	283	return false;
	284	}
	285	}
	286
	287	void ts_lexer_init(Lexer *self) {
	288	*self = (Lexer) {
	289	.data = {
	290	// The lexer's methods are stored as struct fields so that generated
	291	// parsers can call them without needing to be linked against this
	292	// library.
	293	.advance = ts_lexer__advance,
	294	.mark_end = ts_lexer__mark_end,
	295	.get_column = ts_lexer__get_column,
	296	.is_at_included_range_start = ts_lexer__is_at_included_range_start,
	297	.eof = ts_lexer__eof,
	298	.lookahead = 0,
	299	.result_symbol = 0,
	300	},
	301	.chunk = NULL,
	302	.chunk_size = 0,
	303	.chunk_start = 0,
	304	.current_position = {0, {0, 0}},
	305	.logger = {
	306	.payload = NULL,
	307	.log = NULL
	308	},
	309	.included_ranges = NULL,
	310	.included_range_count = 0,
	311	.current_included_range_index = 0,
	312	};
	313	ts_lexer_set_included_ranges(self, NULL, 0);
	314	}
	315
	316	void ts_lexer_delete(Lexer *self) {
	317	ts_free(self->included_ranges);
	318	}
	319
	320	void ts_lexer_set_input(Lexer *self, TSInput input) {
	321	self->input = input;
	322	ts_lexer__clear_chunk(self);
	323	ts_lexer_goto(self, self->current_position);
	324	}
	325
	326	// Move the lexer to the given position. This doesn't do any work
	327	// if the parser is already at the given position.
	328	void ts_lexer_reset(Lexer *self, Length position) {
	329	if (position.bytes != self->current_position.bytes) {
	330	ts_lexer_goto(self, position);
	331	}
	332	}
	333
	334	void ts_lexer_start(Lexer *self) {
	335	self->token_start_position = self->current_position;
	336	self->token_end_position = LENGTH_UNDEFINED;
	337	self->data.result_symbol = 0;
	338	self->did_get_column = false;
	339	if (!ts_lexer__eof(&self->data)) {
	340	if (!self->chunk_size) ts_lexer__get_chunk(self);
	341	if (!self->lookahead_size) ts_lexer__get_lookahead(self);
	342	if (
	343	self->current_position.bytes == 0 &&
	344	self->data.lookahead == BYTE_ORDER_MARK
	345	) ts_lexer__advance(&self->data, true);
	346	}
	347	}
	348
	349	void ts_lexer_finish(Lexer self, uint32_t lookahead_end_byte) {
	350	if (length_is_undefined(self->token_end_position)) {
	351	ts_lexer__mark_end(&self->data);
	352	}
	353
	354	// If the token ended at an included range boundary, then its end position
	355	// will have been reset to the end of the preceding range. Reset the start
	356	// position to match.
	357	if (self->token_end_position.bytes < self->token_start_position.bytes) {
	358	self->token_start_position = self->token_end_position;
	359	}
	360
	361	uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
	362
	363	// In order to determine that a byte sequence is invalid UTF8 or UTF16,
	364	// the character decoding algorithm may have looked at the following byte.
	365	// Therefore, the next byte after the current (invalid) character
	366	// affects the interpretation of the current character.
	367	if (self->data.lookahead == TS_DECODE_ERROR) {
	368	current_lookahead_end_byte++;
	369	}
	370
	371	if (current_lookahead_end_byte > *lookahead_end_byte) {
	372	*lookahead_end_byte = current_lookahead_end_byte;
	373	}
	374	}
	375
	376	void ts_lexer_advance_to_end(Lexer *self) {
	377	while (self->chunk) {
	378	ts_lexer__advance(&self->data, false);
	379	}
	380	}
	381
	382	void ts_lexer_mark_end(Lexer *self) {
	383	ts_lexer__mark_end(&self->data);
	384	}
	385
	386	bool ts_lexer_set_included_ranges(
	387	Lexer *self,
	388	const TSRange *ranges,
	389	uint32_t count
	390	) {
	391	if (count == 0 \|\| !ranges) {
	392	ranges = &DEFAULT_RANGE;
	393	count = 1;
	394	} else {
	395	uint32_t previous_byte = 0;
	396	for (unsigned i = 0; i < count; i++) {
	397	const TSRange *range = &ranges[i];
	398	if (
	399	range->start_byte < previous_byte \|\|
	400	range->end_byte < range->start_byte
	401	) return false;
	402	previous_byte = range->end_byte;
	403	}
	404	}
	405
	406	size_t size = count * sizeof(TSRange);
	407	self->included_ranges = ts_realloc(self->included_ranges, size);
	408	memcpy(self->included_ranges, ranges, size);
	409	self->included_range_count = count;
	410	ts_lexer_goto(self, self->current_position);
	411	return true;
	412	}
	413
	414	TSRange ts_lexer_included_ranges(const Lexer self, uint32_t *count) {
	415	*count = self->included_range_count;
	416	return self->included_ranges;
	417	}
	418
	419	#undef LOG