From 5a8dbc6347b3541e84fe669b22c17ad3b715e258 Mon Sep 17 00:00:00 2001
From: Mitja Felicijan <mitja.felicijan@gmail.com>
Date: Wed, 21 Jan 2026 20:22:09 +0100
Subject: Engage!

---
 .../markdown/tree-sitter-markdown/scanner.c        | 1597 ++++++++++++++++++++
 1 file changed, 1597 insertions(+)
 create mode 100644 vendor/github.com/mitjafelicijan/go-tree-sitter/markdown/tree-sitter-markdown/scanner.c

(limited to 'vendor/github.com/mitjafelicijan/go-tree-sitter/markdown/tree-sitter-markdown/scanner.c')

diff --git a/vendor/github.com/mitjafelicijan/go-tree-sitter/markdown/tree-sitter-markdown/scanner.c b/vendor/github.com/mitjafelicijan/go-tree-sitter/markdown/tree-sitter-markdown/scanner.c
new file mode 100644
index 0000000..748fe17
--- /dev/null
+++ b/vendor/github.com/mitjafelicijan/go-tree-sitter/markdown/tree-sitter-markdown/scanner.c
@@ -0,0 +1,1597 @@
+#include "parser.h"
+#include <assert.h>
+#include <ctype.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+// For explanation of the tokens see grammar.js
+typedef enum {
+    LINE_ENDING,
+    SOFT_LINE_ENDING,
+    BLOCK_CLOSE,
+    BLOCK_CONTINUATION,
+    BLOCK_QUOTE_START,
+    INDENTED_CHUNK_START,
+    ATX_H1_MARKER,
+    ATX_H2_MARKER,
+    ATX_H3_MARKER,
+    ATX_H4_MARKER,
+    ATX_H5_MARKER,
+    ATX_H6_MARKER,
+    SETEXT_H1_UNDERLINE,
+    SETEXT_H2_UNDERLINE,
+    THEMATIC_BREAK,
+    LIST_MARKER_MINUS,
+    LIST_MARKER_PLUS,
+    LIST_MARKER_STAR,
+    LIST_MARKER_PARENTHESIS,
+    LIST_MARKER_DOT,
+    LIST_MARKER_MINUS_DONT_INTERRUPT,
+    LIST_MARKER_PLUS_DONT_INTERRUPT,
+    LIST_MARKER_STAR_DONT_INTERRUPT,
+    LIST_MARKER_PARENTHESIS_DONT_INTERRUPT,
+    LIST_MARKER_DOT_DONT_INTERRUPT,
+    FENCED_CODE_BLOCK_START_BACKTICK,
+    FENCED_CODE_BLOCK_START_TILDE,
+    BLANK_LINE_START,
+    FENCED_CODE_BLOCK_END_BACKTICK,
+    FENCED_CODE_BLOCK_END_TILDE,
+    HTML_BLOCK_1_START,
+    HTML_BLOCK_1_END,
+    HTML_BLOCK_2_START,
+    HTML_BLOCK_3_START,
+    HTML_BLOCK_4_START,
+    HTML_BLOCK_5_START,
+    HTML_BLOCK_6_START,
+    HTML_BLOCK_7_START,
+    CLOSE_BLOCK,
+    NO_INDENTED_CHUNK,
+    ERROR,
+    TRIGGER_ERROR,
+    TOKEN_EOF,
+    MINUS_METADATA,
+    PLUS_METADATA,
+    PIPE_TABLE_START,
+    PIPE_TABLE_LINE_ENDING,
+} TokenType;
+
+// Description of a block on the block stack.
+//
+// LIST_ITEM is a list item with minimal indentation (content begins at indent
+// level 2) while LIST_ITEM_MAX_INDENTATION represents a list item with maximal
+// indentation without being considered a indented code block.
+//
+// ANONYMOUS represents any block that whose close is not handled by the
+// external s.
+typedef enum {
+    BLOCK_QUOTE,
+    INDENTED_CODE_BLOCK,
+    LIST_ITEM,
+    LIST_ITEM_1_INDENTATION,
+    LIST_ITEM_2_INDENTATION,
+    LIST_ITEM_3_INDENTATION,
+    LIST_ITEM_4_INDENTATION,
+    LIST_ITEM_5_INDENTATION,
+    LIST_ITEM_6_INDENTATION,
+    LIST_ITEM_7_INDENTATION,
+    LIST_ITEM_8_INDENTATION,
+    LIST_ITEM_9_INDENTATION,
+    LIST_ITEM_10_INDENTATION,
+    LIST_ITEM_11_INDENTATION,
+    LIST_ITEM_12_INDENTATION,
+    LIST_ITEM_13_INDENTATION,
+    LIST_ITEM_14_INDENTATION,
+    LIST_ITEM_MAX_INDENTATION,
+    FENCED_CODE_BLOCK,
+    ANONYMOUS,
+} Block;
+
+// Determines if a character is punctuation as defined by the markdown spec.
+static bool is_punctuation(char chr) {
+    return (chr >= '!' && chr <= '/') || (chr >= ':' && chr <= '@') ||
+           (chr >= '[' && chr <= '`') || (chr >= '{' && chr <= '~');
+}
+
+// Returns the indentation level which lines of a list item should have at
+// minimum. Should only be called with blocks for which `is_list_item` returns
+// true.
+static uint8_t list_item_indentation(Block block) {
+    return (uint8_t)(block - LIST_ITEM + 2);
+}
+
+#define NUM_HTML_TAG_NAMES_RULE_1 3
+
+static const char *const HTML_TAG_NAMES_RULE_1[NUM_HTML_TAG_NAMES_RULE_1] = {
+    "pre", "script", "style"};
+
+#define NUM_HTML_TAG_NAMES_RULE_7 62
+
+static const char *const HTML_TAG_NAMES_RULE_7[NUM_HTML_TAG_NAMES_RULE_7] = {
+    "address",  "article",    "aside",  "base",     "basefont", "blockquote",
+    "body",     "caption",    "center", "col",      "colgroup", "dd",
+    "details",  "dialog",     "dir",    "div",      "dl",       "dt",
+    "fieldset", "figcaption", "figure", "footer",   "form",     "frame",
+    "frameset", "h1",         "h2",     "h3",       "h4",       "h5",
+    "h6",       "head",       "header", "hr",       "html",     "iframe",
+    "legend",   "li",         "link",   "main",     "menu",     "menuitem",
+    "nav",      "noframes",   "ol",     "optgroup", "option",   "p",
+    "param",    "section",    "source", "summary",  "table",    "tbody",
+    "td",       "tfoot",      "th",     "thead",    "title",    "tr",
+    "track",    "ul"};
+
+// For explanation of the tokens see grammar.js
+static const bool paragraph_interrupt_symbols[] = {
+    false, // LINE_ENDING,
+    false, // SOFT_LINE_ENDING,
+    false, // BLOCK_CLOSE,
+    false, // BLOCK_CONTINUATION,
+    true,  // BLOCK_QUOTE_START,
+    false, // INDENTED_CHUNK_START,
+    true,  // ATX_H1_MARKER,
+    true,  // ATX_H2_MARKER,
+    true,  // ATX_H3_MARKER,
+    true,  // ATX_H4_MARKER,
+    true,  // ATX_H5_MARKER,
+    true,  // ATX_H6_MARKER,
+    true,  // SETEXT_H1_UNDERLINE,
+    true,  // SETEXT_H2_UNDERLINE,
+    true,  // THEMATIC_BREAK,
+    true,  // LIST_MARKER_MINUS,
+    true,  // LIST_MARKER_PLUS,
+    true,  // LIST_MARKER_STAR,
+    true,  // LIST_MARKER_PARENTHESIS,
+    true,  // LIST_MARKER_DOT,
+    false, // LIST_MARKER_MINUS_DONT_INTERRUPT,
+    false, // LIST_MARKER_PLUS_DONT_INTERRUPT,
+    false, // LIST_MARKER_STAR_DONT_INTERRUPT,
+    false, // LIST_MARKER_PARENTHESIS_DONT_INTERRUPT,
+    false, // LIST_MARKER_DOT_DONT_INTERRUPT,
+    true,  // FENCED_CODE_BLOCK_START_BACKTICK,
+    true,  // FENCED_CODE_BLOCK_START_TILDE,
+    true,  // BLANK_LINE_START,
+    false, // FENCED_CODE_BLOCK_END_BACKTICK,
+    false, // FENCED_CODE_BLOCK_END_TILDE,
+    true,  // HTML_BLOCK_1_START,
+    false, // HTML_BLOCK_1_END,
+    true,  // HTML_BLOCK_2_START,
+    true,  // HTML_BLOCK_3_START,
+    true,  // HTML_BLOCK_4_START,
+    true,  // HTML_BLOCK_5_START,
+    true,  // HTML_BLOCK_6_START,
+    false, // HTML_BLOCK_7_START,
+    false, // CLOSE_BLOCK,
+    false, // NO_INDENTED_CHUNK,
+    false, // ERROR,
+    false, // TRIGGER_ERROR,
+    false, // EOF,
+    false, // MINUS_METADATA,
+    false, // PLUS_METADATA,
+    true,  // PIPE_TABLE_START,
+    false, // PIPE_TABLE_LINE_ENDING,
+};
+
+// State bitflags used with `Scanner.state`
+
+// Currently matching (at the beginning of a line)
+static const uint8_t STATE_MATCHING = 0x1 << 0;
+// Last line break was inside a paragraph
+static const uint8_t STATE_WAS_SOFT_LINE_BREAK = 0x1 << 1;
+// Block should be closed after next line break
+static const uint8_t STATE_CLOSE_BLOCK = 0x1 << 4;
+
+static size_t roundup_32(size_t x) {
+    x--;
+
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+
+    x++;
+
+    return x;
+}
+
+typedef struct {
+    // A stack of open blocks in the current parse state
+    struct {
+        size_t size;
+        size_t capacity;
+        Block *items;
+    } open_blocks;
+
+    // Parser state flags
+    uint8_t state;
+    // Number of blocks that have been matched so far. Only changes during
+    // matching and is reset after every line ending
+    uint8_t matched;
+    // Consumed but "unused" indentation. Sometimes a tab needs to be "split" to
+    // be used in multiple tokens.
+    uint8_t indentation;
+    // The current column. Used to decide how many spaces a tab should equal
+    uint8_t column;
+    // The delimiter length of the currently open fenced code block
+    uint8_t fenced_code_block_delimiter_length;
+
+    bool simulate;
+} Scanner;
+
+static void push_block(Scanner *s, Block b) {
+    if (s->open_blocks.size == s->open_blocks.capacity) {
+        s->open_blocks.capacity =
+            s->open_blocks.capacity ? s->open_blocks.capacity << 1 : 8;
+        void *tmp = realloc(s->open_blocks.items,
+                            sizeof(Block) * s->open_blocks.capacity);
+        assert(tmp != NULL);
+        s->open_blocks.items = tmp;
+    }
+
+    s->open_blocks.items[s->open_blocks.size++] = b;
+}
+
+static inline Block pop_block(Scanner *s) {
+    return s->open_blocks.items[--s->open_blocks.size];
+}
+
+// Write the whole state of a Scanner to a byte buffer
+static unsigned serialize(Scanner *s, char *buffer) {
+    unsigned size = 0;
+    buffer[size++] = (char)s->state;
+    buffer[size++] = (char)s->matched;
+    buffer[size++] = (char)s->indentation;
+    buffer[size++] = (char)s->column;
+    buffer[size++] = (char)s->fenced_code_block_delimiter_length;
+    size_t blocks_count = s->open_blocks.size;
+    if (blocks_count > 0) {
+        memcpy(&buffer[size], s->open_blocks.items,
+               blocks_count * sizeof(Block));
+        size += blocks_count * sizeof(Block);
+    }
+    return size;
+}
+
+// Read the whole state of a Scanner from a byte buffer
+// `serizalize` and `deserialize` should be fully symmetric.
+static void deserialize(Scanner *s, const char *buffer, unsigned length) {
+    s->open_blocks.size = 0;
+    s->open_blocks.capacity = 0;
+    s->state = 0;
+    s->matched = 0;
+    s->indentation = 0;
+    s->column = 0;
+    s->fenced_code_block_delimiter_length = 0;
+    if (length > 0) {
+        size_t size = 0;
+        s->state = (uint8_t)buffer[size++];
+        s->matched = (uint8_t)buffer[size++];
+        s->indentation = (uint8_t)buffer[size++];
+        s->column = (uint8_t)buffer[size++];
+        s->fenced_code_block_delimiter_length = (uint8_t)buffer[size++];
+        size_t blocks_size = length - size;
+        if (blocks_size > 0) {
+            size_t blocks_count = blocks_size / sizeof(Block);
+
+            // ensure open blocks has enough room
+            if (s->open_blocks.capacity < blocks_count) {
+              size_t capacity = roundup_32(blocks_count);
+              void *tmp = realloc(s->open_blocks.items,
+                            sizeof(Block) * capacity);
+              assert(tmp != NULL);
+              s->open_blocks.items = tmp;
+              s->open_blocks.capacity = capacity;
+            }
+            memcpy(s->open_blocks.items, &buffer[size], blocks_size);
+            s->open_blocks.size = blocks_count;
+        }
+    }
+}
+
+static void mark_end(Scanner *s, TSLexer *lexer) {
+    if (!s->simulate) {
+        lexer->mark_end(lexer);
+    }
+}
+
+// Convenience function to emit the error token. This is done to stop invalid
+// parse branches. Specifically:
+// 1. When encountering a newline after a line break that ended a paragraph, and
+// no new block
+//    has been opened.
+// 2. When encountering a new block after a soft line break.
+// 3. When a `$._trigger_error` token is valid, which is used to stop parse
+// branches through
+//    normal tree-sitter grammar rules.
+//
+// See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in
+// grammar.js
+static bool error(TSLexer *lexer) {
+    lexer->result_symbol = ERROR;
+    return true;
+}
+
+// Advance the lexer one character
+// Also keeps track of the current column, counting tabs as spaces with tab stop
+// 4 See https://github.github.com/gfm/#tabs
+static size_t advance(Scanner *s, TSLexer *lexer) {
+    size_t size = 1;
+    if (lexer->lookahead == '\t') {
+        size = 4 - s->column;
+        s->column = 0;
+    } else {
+        s->column = (s->column + 1) % 4;
+    }
+    lexer->advance(lexer, false);
+    return size;
+}
+
+// Try to match the given block, i.e. consume all tokens that belong to the
+// block. These are
+// 1. indentation for list items and indented code blocks
+// 2. '>' for block quotes
+// Returns true if the block is matched and false otherwise
+static bool match(Scanner *s, TSLexer *lexer, Block block) {
+    switch (block) {
+        case INDENTED_CODE_BLOCK:
+            while (s->indentation < 4) {
+                if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    s->indentation += advance(s, lexer);
+                } else {
+                    break;
+                }
+            }
+            if (s->indentation >= 4 && lexer->lookahead != '\n' &&
+                lexer->lookahead != '\r') {
+                s->indentation -= 4;
+                return true;
+            }
+            break;
+        case LIST_ITEM:
+        case LIST_ITEM_1_INDENTATION:
+        case LIST_ITEM_2_INDENTATION:
+        case LIST_ITEM_3_INDENTATION:
+        case LIST_ITEM_4_INDENTATION:
+        case LIST_ITEM_5_INDENTATION:
+        case LIST_ITEM_6_INDENTATION:
+        case LIST_ITEM_7_INDENTATION:
+        case LIST_ITEM_8_INDENTATION:
+        case LIST_ITEM_9_INDENTATION:
+        case LIST_ITEM_10_INDENTATION:
+        case LIST_ITEM_11_INDENTATION:
+        case LIST_ITEM_12_INDENTATION:
+        case LIST_ITEM_13_INDENTATION:
+        case LIST_ITEM_14_INDENTATION:
+        case LIST_ITEM_MAX_INDENTATION:
+            while (s->indentation < list_item_indentation(block)) {
+                if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    s->indentation += advance(s, lexer);
+                } else {
+                    break;
+                }
+            }
+            if (s->indentation >= list_item_indentation(block)) {
+                s->indentation -= list_item_indentation(block);
+                return true;
+            }
+            if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
+                s->indentation = 0;
+                return true;
+            }
+            break;
+        case BLOCK_QUOTE:
+            while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                s->indentation += advance(s, lexer);
+            }
+            if (lexer->lookahead == '>') {
+                advance(s, lexer);
+                s->indentation = 0;
+                if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    s->indentation += advance(s, lexer) - 1;
+                }
+                return true;
+            }
+            break;
+        case FENCED_CODE_BLOCK:
+        case ANONYMOUS:
+            return true;
+    }
+    return false;
+}
+
+static bool parse_fenced_code_block(Scanner *s, const char delimiter,
+                                    TSLexer *lexer, const bool *valid_symbols) {
+    // count the number of backticks
+    uint8_t level = 0;
+    while (lexer->lookahead == delimiter) {
+        advance(s, lexer);
+        level++;
+    }
+    mark_end(s, lexer);
+    // If this is able to close a fenced code block then that is the only valid
+    // interpretation. It can only close a fenced code block if the number of
+    // backticks is at least the number of backticks of the opening delimiter.
+    // Also it cannot be indented more than 3 spaces.
+    if ((delimiter == '`' ? valid_symbols[FENCED_CODE_BLOCK_END_BACKTICK]
+                          : valid_symbols[FENCED_CODE_BLOCK_END_TILDE]) &&
+        s->indentation < 4 && level >= s->fenced_code_block_delimiter_length &&
+        (lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
+        s->fenced_code_block_delimiter_length = 0;
+        lexer->result_symbol = delimiter == '`' ? FENCED_CODE_BLOCK_END_BACKTICK
+                                                : FENCED_CODE_BLOCK_END_TILDE;
+        return true;
+    }
+    // If this could be the start of a fenced code block, check if the info
+    // string contains any backticks.
+    if ((delimiter == '`' ? valid_symbols[FENCED_CODE_BLOCK_START_BACKTICK]
+                          : valid_symbols[FENCED_CODE_BLOCK_START_TILDE]) &&
+        level >= 3) {
+        bool info_string_has_backtick = false;
+        if (delimiter == '`') {
+            while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
+                   !lexer->eof(lexer)) {
+                if (lexer->lookahead == '`') {
+                    info_string_has_backtick = true;
+                    break;
+                }
+                advance(s, lexer);
+            }
+        }
+        // If it does not then choose to interpret this as the start of a fenced
+        // code block.
+        if (!info_string_has_backtick) {
+            lexer->result_symbol = delimiter == '`'
+                                       ? FENCED_CODE_BLOCK_START_BACKTICK
+                                       : FENCED_CODE_BLOCK_START_TILDE;
+            if (!s->simulate)
+                push_block(s, FENCED_CODE_BLOCK);
+            // Remember the length of the delimiter for later, since we need it
+            // to decide whether a sequence of backticks can close the block.
+            s->fenced_code_block_delimiter_length = level;
+            s->indentation = 0;
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    advance(s, lexer);
+    mark_end(s, lexer);
+    // Otherwise count the number of stars permitting whitespaces between them.
+    size_t star_count = 1;
+    // Also remember how many stars there are before the first whitespace...
+    // ...and how many spaces follow the first star.
+    uint8_t extra_indentation = 0;
+    for (;;) {
+        if (lexer->lookahead == '*') {
+            if (star_count == 1 && extra_indentation >= 1 &&
+                valid_symbols[LIST_MARKER_STAR]) {
+                // If we get to this point then the token has to be at least
+                // this long. We need to call `mark_end` here in case we decide
+                // later that this is a list item.
+                mark_end(s, lexer);
+            }
+            star_count++;
+            advance(s, lexer);
+        } else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            if (star_count == 1) {
+                extra_indentation += advance(s, lexer);
+            } else {
+                advance(s, lexer);
+            }
+        } else {
+            break;
+        }
+    }
+    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
+    bool dont_interrupt = false;
+    if (star_count == 1 && line_end) {
+        extra_indentation = 1;
+        // line is empty so don't interrupt paragraphs if this is a list marker
+        dont_interrupt = s->matched == s->open_blocks.size;
+    }
+    // If there were at least 3 stars then this could be a thematic break
+    bool thematic_break = star_count >= 3 && line_end;
+    // If there was a star and at least one space after that star then this
+    // could be a list marker.
+    bool list_marker_star = star_count >= 1 && extra_indentation >= 1;
+    if (valid_symbols[THEMATIC_BREAK] && thematic_break && s->indentation < 4) {
+        // If a thematic break is valid then it takes precedence
+        lexer->result_symbol = THEMATIC_BREAK;
+        mark_end(s, lexer);
+        s->indentation = 0;
+        return true;
+    }
+    if ((dont_interrupt ? valid_symbols[LIST_MARKER_STAR_DONT_INTERRUPT]
+                        : valid_symbols[LIST_MARKER_STAR]) &&
+        list_marker_star) {
+        // List markers take precedence over emphasis markers
+        // If star_count > 1 then we already called mark_end at the right point.
+        // Otherwise the token should go until this point.
+        if (star_count == 1) {
+            mark_end(s, lexer);
+        }
+        // Not counting one space...
+        extra_indentation--;
+        // ... check if the list item begins with an indented code block
+        if (extra_indentation <= 3) {
+            // If not then calculate the indentation level of the list item
+            // content as indentation of list marker + indentation after list
+            // marker - 1
+            extra_indentation += s->indentation;
+            s->indentation = 0;
+        } else {
+            // Otherwise the indentation level is just the indentation of the
+            // list marker. We keep the indentation after the list marker for
+            // later blocks.
+            uint8_t temp = s->indentation;
+            s->indentation = extra_indentation;
+            extra_indentation = temp;
+        }
+        if (!s->simulate)
+            push_block(s, (Block)(LIST_ITEM + extra_indentation));
+        lexer->result_symbol =
+            dont_interrupt ? LIST_MARKER_STAR_DONT_INTERRUPT : LIST_MARKER_STAR;
+        return true;
+    }
+    return false;
+}
+
+static bool parse_thematic_break_underscore(Scanner *s, TSLexer *lexer,
+                                            const bool *valid_symbols) {
+    advance(s, lexer);
+    mark_end(s, lexer);
+    size_t underscore_count = 1;
+    for (;;) {
+        if (lexer->lookahead == '_') {
+            underscore_count++;
+            advance(s, lexer);
+        } else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            advance(s, lexer);
+        } else {
+            break;
+        }
+    }
+    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
+    if (underscore_count >= 3 && line_end && valid_symbols[THEMATIC_BREAK]) {
+        lexer->result_symbol = THEMATIC_BREAK;
+        mark_end(s, lexer);
+        s->indentation = 0;
+        return true;
+    }
+    return false;
+}
+
+static bool parse_block_quote(Scanner *s, TSLexer *lexer,
+                              const bool *valid_symbols) {
+    if (valid_symbols[BLOCK_QUOTE_START]) {
+        advance(s, lexer);
+        s->indentation = 0;
+        if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            s->indentation += advance(s, lexer) - 1;
+        }
+        lexer->result_symbol = BLOCK_QUOTE_START;
+        if (!s->simulate)
+            push_block(s, BLOCK_QUOTE);
+        return true;
+    }
+    return false;
+}
+
+static bool parse_atx_heading(Scanner *s, TSLexer *lexer,
+                              const bool *valid_symbols) {
+    if (valid_symbols[ATX_H1_MARKER] && s->indentation <= 3) {
+        mark_end(s, lexer);
+        uint16_t level = 0;
+        while (lexer->lookahead == '#' && level <= 6) {
+            advance(s, lexer);
+            level++;
+        }
+        if (level <= 6 &&
+            (lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
+             lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
+            lexer->result_symbol = ATX_H1_MARKER + (level - 1);
+            s->indentation = 0;
+            mark_end(s, lexer);
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_setext_underline(Scanner *s, TSLexer *lexer,
+                                   const bool *valid_symbols) {
+    if (valid_symbols[SETEXT_H1_UNDERLINE] &&
+        s->matched == s->open_blocks.size) {
+        mark_end(s, lexer);
+        while (lexer->lookahead == '=') {
+            advance(s, lexer);
+        }
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            advance(s, lexer);
+        }
+        if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
+            lexer->result_symbol = SETEXT_H1_UNDERLINE;
+            mark_end(s, lexer);
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_plus(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    if (s->indentation <= 3 &&
+        (valid_symbols[LIST_MARKER_PLUS] ||
+         valid_symbols[LIST_MARKER_PLUS_DONT_INTERRUPT] ||
+         valid_symbols[PLUS_METADATA])) {
+        advance(s, lexer);
+        if (valid_symbols[PLUS_METADATA] && lexer->lookahead == '+') {
+            advance(s, lexer);
+            if (lexer->lookahead != '+') {
+                return false;
+            }
+            advance(s, lexer);
+            while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                advance(s, lexer);
+            }
+            if (lexer->lookahead != '\n' && lexer->lookahead != '\r') {
+                return false;
+            }
+            for (;;) {
+                // advance over newline
+                if (lexer->lookahead == '\r') {
+                    advance(s, lexer);
+                    if (lexer->lookahead == '\n') {
+                        advance(s, lexer);
+                    }
+                } else {
+                    advance(s, lexer);
+                }
+                // check for pluses
+                size_t plus_count = 0;
+                while (lexer->lookahead == '+') {
+                    plus_count++;
+                    advance(s, lexer);
+                }
+                if (plus_count == 3) {
+                    // if exactly 3 check if next symbol (after eventual
+                    // whitespace) is newline
+                    while (lexer->lookahead == ' ' ||
+                           lexer->lookahead == '\t') {
+                        advance(s, lexer);
+                    }
+                    if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
+                        // if so also consume newline
+                        if (lexer->lookahead == '\r') {
+                            advance(s, lexer);
+                            if (lexer->lookahead == '\n') {
+                                advance(s, lexer);
+                            }
+                        } else {
+                            advance(s, lexer);
+                        }
+                        mark_end(s, lexer);
+                        lexer->result_symbol = PLUS_METADATA;
+                        return true;
+                    }
+                }
+                // otherwise consume rest of line
+                while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
+                       !lexer->eof(lexer)) {
+                    advance(s, lexer);
+                }
+                // if end of file is reached, then this is not metadata
+                if (lexer->eof(lexer)) {
+                    break;
+                }
+            }
+        } else {
+            uint8_t extra_indentation = 0;
+            while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                extra_indentation += advance(s, lexer);
+            }
+            bool dont_interrupt = false;
+            if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
+                extra_indentation = 1;
+                dont_interrupt = true;
+            }
+            dont_interrupt =
+                dont_interrupt && s->matched == s->open_blocks.size;
+            if (extra_indentation >= 1 &&
+                (dont_interrupt ? valid_symbols[LIST_MARKER_PLUS_DONT_INTERRUPT]
+                                : valid_symbols[LIST_MARKER_PLUS])) {
+                lexer->result_symbol = dont_interrupt
+                                           ? LIST_MARKER_PLUS_DONT_INTERRUPT
+                                           : LIST_MARKER_PLUS;
+                extra_indentation--;
+                if (extra_indentation <= 3) {
+                    extra_indentation += s->indentation;
+                    s->indentation = 0;
+                } else {
+                    uint8_t temp = s->indentation;
+                    s->indentation = extra_indentation;
+                    extra_indentation = temp;
+                }
+                if (!s->simulate)
+                    push_block(s, (Block)(LIST_ITEM + extra_indentation));
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+static bool parse_ordered_list_marker(Scanner *s, TSLexer *lexer,
+                                      const bool *valid_symbols) {
+    if (s->indentation <= 3 &&
+        (valid_symbols[LIST_MARKER_PARENTHESIS] ||
+         valid_symbols[LIST_MARKER_DOT] ||
+         valid_symbols[LIST_MARKER_PARENTHESIS_DONT_INTERRUPT] ||
+         valid_symbols[LIST_MARKER_DOT_DONT_INTERRUPT])) {
+        size_t digits = 1;
+        bool dont_interrupt = lexer->lookahead != '1';
+        advance(s, lexer);
+        while (isdigit(lexer->lookahead)) {
+            dont_interrupt = true;
+            digits++;
+            advance(s, lexer);
+        }
+        if (digits >= 1 && digits <= 9) {
+            bool dot = false;
+            bool parenthesis = false;
+            if (lexer->lookahead == '.') {
+                advance(s, lexer);
+                dot = true;
+            } else if (lexer->lookahead == ')') {
+                advance(s, lexer);
+                parenthesis = true;
+            }
+            if (dot || parenthesis) {
+                uint8_t extra_indentation = 0;
+                while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    extra_indentation += advance(s, lexer);
+                }
+                bool line_end =
+                    lexer->lookahead == '\n' || lexer->lookahead == '\r';
+                if (line_end) {
+                    extra_indentation = 1;
+                    dont_interrupt = true;
+                }
+                dont_interrupt =
+                    dont_interrupt && s->matched == s->open_blocks.size;
+                if (extra_indentation >= 1 &&
+                    (dot ? (dont_interrupt
+                                ? valid_symbols[LIST_MARKER_DOT_DONT_INTERRUPT]
+                                : valid_symbols[LIST_MARKER_DOT])
+                         : (dont_interrupt
+                                ? valid_symbols
+                                      [LIST_MARKER_PARENTHESIS_DONT_INTERRUPT]
+                                : valid_symbols[LIST_MARKER_PARENTHESIS]))) {
+                    lexer->result_symbol =
+                        dot ? LIST_MARKER_DOT : LIST_MARKER_PARENTHESIS;
+                    extra_indentation--;
+                    if (extra_indentation <= 3) {
+                        extra_indentation += s->indentation;
+                        s->indentation = 0;
+                    } else {
+                        uint8_t temp = s->indentation;
+                        s->indentation = extra_indentation;
+                        extra_indentation = temp;
+                    }
+                    if (!s->simulate)
+                        push_block(
+                            s, (Block)(LIST_ITEM + extra_indentation + digits));
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
+static bool parse_minus(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    if (s->indentation <= 3 &&
+        (valid_symbols[LIST_MARKER_MINUS] ||
+         valid_symbols[LIST_MARKER_MINUS_DONT_INTERRUPT] ||
+         valid_symbols[SETEXT_H2_UNDERLINE] || valid_symbols[THEMATIC_BREAK] ||
+         valid_symbols[MINUS_METADATA])) {
+        mark_end(s, lexer);
+        bool whitespace_after_minus = false;
+        bool minus_after_whitespace = false;
+        size_t minus_count = 0;
+        uint8_t extra_indentation = 0;
+
+        for (;;) {
+            if (lexer->lookahead == '-') {
+                if (minus_count == 1 && extra_indentation >= 1) {
+                    mark_end(s, lexer);
+                }
+                minus_count++;
+                advance(s, lexer);
+                minus_after_whitespace = whitespace_after_minus;
+            } else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                if (minus_count == 1) {
+                    extra_indentation += advance(s, lexer);
+                } else {
+                    advance(s, lexer);
+                }
+                whitespace_after_minus = true;
+            } else {
+                break;
+            }
+        }
+        bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
+        bool dont_interrupt = false;
+        if (minus_count == 1 && line_end) {
+            extra_indentation = 1;
+            dont_interrupt = true;
+        }
+        dont_interrupt = dont_interrupt && s->matched == s->open_blocks.size;
+        bool thematic_break = minus_count >= 3 && line_end;
+        bool underline =
+            minus_count >= 1 && !minus_after_whitespace && line_end &&
+            s->matched ==
+                s->open_blocks
+                    .size; // setext heading can not break lazy continuation
+        bool list_marker_minus = minus_count >= 1 && extra_indentation >= 1;
+        bool success = false;
+        if (valid_symbols[SETEXT_H2_UNDERLINE] && underline) {
+            lexer->result_symbol = SETEXT_H2_UNDERLINE;
+            mark_end(s, lexer);
+            s->indentation = 0;
+            success = true;
+        } else if (valid_symbols[THEMATIC_BREAK] &&
+                   thematic_break) { // underline is false if list_marker_minus
+                                     // is true
+            lexer->result_symbol = THEMATIC_BREAK;
+            mark_end(s, lexer);
+            s->indentation = 0;
+            success = true;
+        } else if ((dont_interrupt
+                        ? valid_symbols[LIST_MARKER_MINUS_DONT_INTERRUPT]
+                        : valid_symbols[LIST_MARKER_MINUS]) &&
+                   list_marker_minus) {
+            if (minus_count == 1) {
+                mark_end(s, lexer);
+            }
+            extra_indentation--;
+            if (extra_indentation <= 3) {
+                extra_indentation += s->indentation;
+                s->indentation = 0;
+            } else {
+                uint8_t temp = s->indentation;
+                s->indentation = extra_indentation;
+                extra_indentation = temp;
+            }
+            if (!s->simulate)
+                push_block(s, (Block)(LIST_ITEM + extra_indentation));
+            lexer->result_symbol = dont_interrupt
+                                       ? LIST_MARKER_MINUS_DONT_INTERRUPT
+                                       : LIST_MARKER_MINUS;
+            return true;
+        }
+        if (minus_count == 3 && (!minus_after_whitespace) && line_end &&
+            valid_symbols[MINUS_METADATA]) {
+            for (;;) {
+                // advance over newline
+                if (lexer->lookahead == '\r') {
+                    advance(s, lexer);
+                    if (lexer->lookahead == '\n') {
+                        advance(s, lexer);
+                    }
+                } else {
+                    advance(s, lexer);
+                }
+                // check for minuses
+                minus_count = 0;
+                while (lexer->lookahead == '-') {
+                    minus_count++;
+                    advance(s, lexer);
+                }
+                if (minus_count == 3) {
+                    // if exactly 3 check if next symbol (after eventual
+                    // whitespace) is newline
+                    while (lexer->lookahead == ' ' ||
+                           lexer->lookahead == '\t') {
+                        advance(s, lexer);
+                    }
+                    if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
+                        // if so also consume newline
+                        if (lexer->lookahead == '\r') {
+                            advance(s, lexer);
+                            if (lexer->lookahead == '\n') {
+                                advance(s, lexer);
+                            }
+                        } else {
+                            advance(s, lexer);
+                        }
+                        mark_end(s, lexer);
+                        lexer->result_symbol = MINUS_METADATA;
+                        return true;
+                    }
+                }
+                // otherwise consume rest of line
+                while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
+                       !lexer->eof(lexer)) {
+                    advance(s, lexer);
+                }
+                // if end of file is reached, then this is not metadata
+                if (lexer->eof(lexer)) {
+                    break;
+                }
+            }
+        }
+        if (success) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_html_block(Scanner *s, TSLexer *lexer,
+                             const bool *valid_symbols) {
+    if (!(valid_symbols[HTML_BLOCK_1_START] ||
+          valid_symbols[HTML_BLOCK_1_END] ||
+          valid_symbols[HTML_BLOCK_2_START] ||
+          valid_symbols[HTML_BLOCK_3_START] ||
+          valid_symbols[HTML_BLOCK_4_START] ||
+          valid_symbols[HTML_BLOCK_5_START] ||
+          valid_symbols[HTML_BLOCK_6_START] ||
+          valid_symbols[HTML_BLOCK_7_START])) {
+        return false;
+    }
+    advance(s, lexer);
+    if (lexer->lookahead == '?' && valid_symbols[HTML_BLOCK_3_START]) {
+        advance(s, lexer);
+        lexer->result_symbol = HTML_BLOCK_3_START;
+        if (!s->simulate)
+            push_block(s, ANONYMOUS);
+        return true;
+    }
+    if (lexer->lookahead == '!') {
+        // could be block 2
+        advance(s, lexer);
+        if (lexer->lookahead == '-') {
+            advance(s, lexer);
+            if (lexer->lookahead == '-' && valid_symbols[HTML_BLOCK_2_START]) {
+                advance(s, lexer);
+                lexer->result_symbol = HTML_BLOCK_2_START;
+                if (!s->simulate)
+                    push_block(s, ANONYMOUS);
+                return true;
+            }
+        } else if ('A' <= lexer->lookahead && lexer->lookahead <= 'Z' &&
+                   valid_symbols[HTML_BLOCK_4_START]) {
+            advance(s, lexer);
+            lexer->result_symbol = HTML_BLOCK_4_START;
+            if (!s->simulate)
+                push_block(s, ANONYMOUS);
+            return true;
+        } else if (lexer->lookahead == '[') {
+            advance(s, lexer);
+            if (lexer->lookahead == 'C') {
+                advance(s, lexer);
+                if (lexer->lookahead == 'D') {
+                    advance(s, lexer);
+                    if (lexer->lookahead == 'A') {
+                        advance(s, lexer);
+                        if (lexer->lookahead == 'T') {
+                            advance(s, lexer);
+                            if (lexer->lookahead == 'A') {
+                                advance(s, lexer);
+                                if (lexer->lookahead == '[' &&
+                                    valid_symbols[HTML_BLOCK_5_START]) {
+                                    advance(s, lexer);
+                                    lexer->result_symbol = HTML_BLOCK_5_START;
+                                    if (!s->simulate)
+                                        push_block(s, ANONYMOUS);
+                                    return true;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    bool starting_slash = lexer->lookahead == '/';
+    if (starting_slash) {
+        advance(s, lexer);
+    }
+    char name[11];
+    size_t name_length = 0;
+    while (iswalpha((wint_t)lexer->lookahead)) {
+        if (name_length < 10) {
+            name[name_length++] = (char)towlower((wint_t)lexer->lookahead);
+        } else {
+            name_length = 12;
+        }
+        advance(s, lexer);
+    }
+    if (name_length == 0) {
+        return false;
+    }
+    bool tag_closed = false;
+    if (name_length < 11) {
+        name[name_length] = 0;
+        bool next_symbol_valid =
+            lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
+            lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
+            lexer->lookahead == '>';
+        if (next_symbol_valid) {
+            // try block 1 names
+            for (size_t i = 0; i < NUM_HTML_TAG_NAMES_RULE_1; i++) {
+                if (strcmp(name, HTML_TAG_NAMES_RULE_1[i]) == 0) {
+                    if (starting_slash) {
+                        if (valid_symbols[HTML_BLOCK_1_END]) {
+                            lexer->result_symbol = HTML_BLOCK_1_END;
+                            return true;
+                        }
+                    } else if (valid_symbols[HTML_BLOCK_1_START]) {
+                        lexer->result_symbol = HTML_BLOCK_1_START;
+                        if (!s->simulate)
+                            push_block(s, ANONYMOUS);
+                        return true;
+                    }
+                }
+            }
+        }
+        if (!next_symbol_valid && lexer->lookahead == '/') {
+            advance(s, lexer);
+            if (lexer->lookahead == '>') {
+                advance(s, lexer);
+                tag_closed = true;
+            }
+        }
+        if (next_symbol_valid || tag_closed) {
+            // try block 2 names
+            for (size_t i = 0; i < NUM_HTML_TAG_NAMES_RULE_7; i++) {
+                if (strcmp(name, HTML_TAG_NAMES_RULE_7[i]) == 0 &&
+                    valid_symbols[HTML_BLOCK_6_START]) {
+                    lexer->result_symbol = HTML_BLOCK_6_START;
+                    if (!s->simulate)
+                        push_block(s, ANONYMOUS);
+                    return true;
+                }
+            }
+        }
+    }
+
+    if (!valid_symbols[HTML_BLOCK_7_START]) {
+        return false;
+    }
+
+    if (!tag_closed) {
+        // tag name (continued)
+        while (iswalnum((wint_t)lexer->lookahead) || lexer->lookahead == '-') {
+            advance(s, lexer);
+        }
+        if (!starting_slash) {
+            // attributes
+            bool had_whitespace = false;
+            for (;;) {
+                // whitespace
+                while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    had_whitespace = true;
+                    advance(s, lexer);
+                }
+                if (lexer->lookahead == '/') {
+                    advance(s, lexer);
+                    break;
+                }
+                if (lexer->lookahead == '>') {
+                    break;
+                }
+                // attribute name
+                if (!had_whitespace) {
+                    return false;
+                }
+                if (!iswalpha((wint_t)lexer->lookahead) &&
+                    lexer->lookahead != '_' && lexer->lookahead != ':') {
+                    return false;
+                }
+                had_whitespace = false;
+                advance(s, lexer);
+                while (iswalnum((wint_t)lexer->lookahead) ||
+                       lexer->lookahead == '_' || lexer->lookahead == '.' ||
+                       lexer->lookahead == ':' || lexer->lookahead == '-') {
+                    advance(s, lexer);
+                }
+                // attribute value specification
+                // optional whitespace
+                while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    had_whitespace = true;
+                    advance(s, lexer);
+                }
+                // =
+                if (lexer->lookahead == '=') {
+                    advance(s, lexer);
+                    had_whitespace = false;
+                    // optional whitespace
+                    while (lexer->lookahead == ' ' ||
+                           lexer->lookahead == '\t') {
+                        advance(s, lexer);
+                    }
+                    // attribute value
+                    if (lexer->lookahead == '\'' || lexer->lookahead == '"') {
+                        char delimiter = (char)lexer->lookahead;
+                        advance(s, lexer);
+                        while (lexer->lookahead != delimiter &&
+                               lexer->lookahead != '\n' &&
+                               lexer->lookahead != '\r' && !lexer->eof(lexer)) {
+                            advance(s, lexer);
+                        }
+                        if (lexer->lookahead != delimiter) {
+                            return false;
+                        }
+                        advance(s, lexer);
+                    } else {
+                        // unquoted attribute value
+                        bool had_one = false;
+                        while (lexer->lookahead != ' ' &&
+                               lexer->lookahead != '\t' &&
+                               lexer->lookahead != '"' &&
+                               lexer->lookahead != '\'' &&
+                               lexer->lookahead != '=' &&
+                               lexer->lookahead != '<' &&
+                               lexer->lookahead != '>' &&
+                               lexer->lookahead != '`' &&
+                               lexer->lookahead != '\n' &&
+                               lexer->lookahead != '\r' && !lexer->eof(lexer)) {
+                            advance(s, lexer);
+                            had_one = true;
+                        }
+                        if (!had_one) {
+                            return false;
+                        }
+                    }
+                }
+            }
+        } else {
+            while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                advance(s, lexer);
+            }
+        }
+        if (lexer->lookahead != '>') {
+            return false;
+        }
+        advance(s, lexer);
+    }
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+        advance(s, lexer);
+    }
+    if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
+        lexer->result_symbol = HTML_BLOCK_7_START;
+        if (!s->simulate)
+            push_block(s, ANONYMOUS);
+        return true;
+    }
+    return false;
+}
+
+static bool parse_pipe_table(Scanner *s, TSLexer *lexer,
+                             const bool *valid_symbols) {
+
+    // unused
+    (void)(valid_symbols);
+
+    // PIPE_TABLE_START is zero width
+    mark_end(s, lexer);
+    // count number of cells
+    size_t cell_count = 0;
+    // also remember if we see starting and ending pipes, as empty headers have
+    // to have both
+    bool starting_pipe = false;
+    bool ending_pipe = false;
+    bool empty = true;
+    if (lexer->lookahead == '|') {
+        starting_pipe = true;
+        advance(s, lexer);
+    }
+    while (lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
+           !lexer->eof(lexer)) {
+        if (lexer->lookahead == '|') {
+            cell_count++;
+            ending_pipe = true;
+            advance(s, lexer);
+        } else {
+            if (lexer->lookahead != ' ' && lexer->lookahead != '\t') {
+                ending_pipe = false;
+            }
+            if (lexer->lookahead == '\\') {
+                advance(s, lexer);
+                if (is_punctuation((char)lexer->lookahead)) {
+                    advance(s, lexer);
+                }
+            } else {
+                advance(s, lexer);
+            }
+        }
+    }
+    if (empty && cell_count == 0 && !(starting_pipe && ending_pipe)) {
+        return false;
+    }
+    if (!ending_pipe) {
+        cell_count++;
+    }
+
+    // check the following line for a delimiter row
+    // parse a newline
+    if (lexer->lookahead == '\n') {
+        advance(s, lexer);
+    } else if (lexer->lookahead == '\r') {
+        advance(s, lexer);
+        if (lexer->lookahead == '\n') {
+            advance(s, lexer);
+        }
+    } else {
+        return false;
+    }
+    s->indentation = 0;
+    s->column = 0;
+    for (;;) {
+        if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            s->indentation += advance(s, lexer);
+        } else {
+            break;
+        }
+    }
+    s->simulate = true;
+    uint8_t matched_temp = 0;
+    while (matched_temp < (uint8_t)s->open_blocks.size) {
+        if (match(s, lexer, s->open_blocks.items[matched_temp])) {
+            matched_temp++;
+        } else {
+            return false;
+        }
+    }
+
+    // check if delimiter row has the same number of cells and at least one pipe
+    size_t delimiter_cell_count = 0;
+    if (lexer->lookahead == '|') {
+        advance(s, lexer);
+    }
+    for (;;) {
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            advance(s, lexer);
+        }
+        if (lexer->lookahead == '|') {
+            delimiter_cell_count++;
+            advance(s, lexer);
+            continue;
+        }
+        if (lexer->lookahead == ':') {
+            advance(s, lexer);
+            if (lexer->lookahead != '-') {
+                return false;
+            }
+        }
+        bool had_one_minus = false;
+        while (lexer->lookahead == '-') {
+            had_one_minus = true;
+            advance(s, lexer);
+        }
+        if (had_one_minus) {
+            delimiter_cell_count++;
+        }
+        if (lexer->lookahead == ':') {
+            if (!had_one_minus) {
+                return false;
+            }
+            advance(s, lexer);
+        }
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            advance(s, lexer);
+        }
+        if (lexer->lookahead == '|') {
+            if (!had_one_minus) {
+                delimiter_cell_count++;
+            }
+            advance(s, lexer);
+            continue;
+        }
+        if (lexer->lookahead != '\r' && lexer->lookahead != '\n') {
+            return false;
+        } else {
+            break;
+        }
+    }
+    // if the cell counts are not equal then this is not a table
+    if (cell_count != delimiter_cell_count) {
+        return false;
+    }
+
+    lexer->result_symbol = PIPE_TABLE_START;
+    return true;
+}
+
+static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    // A normal tree-sitter rule decided that the current branch is invalid and
+    // now "requests" an error to stop the branch
+    if (valid_symbols[TRIGGER_ERROR]) {
+        return error(lexer);
+    }
+
+    // Close the inner most block after the next line break as requested. See
+    // `$._close_block` in grammar.js
+    if (valid_symbols[CLOSE_BLOCK]) {
+        s->state |= STATE_CLOSE_BLOCK;
+        lexer->result_symbol = CLOSE_BLOCK;
+        return true;
+    }
+
+    // if we are at the end of the file and there are still open blocks close
+    // them all
+    if (lexer->eof(lexer)) {
+        if (valid_symbols[TOKEN_EOF]) {
+            lexer->result_symbol = TOKEN_EOF;
+            return true;
+        }
+        if (s->open_blocks.size > 0) {
+            lexer->result_symbol = BLOCK_CLOSE;
+            if (!s->simulate)
+                pop_block(s);
+            return true;
+        }
+        return false;
+    }
+
+    if (!(s->state & STATE_MATCHING)) {
+        // Parse any preceeding whitespace and remember its length. This makes a
+        // lot of parsing quite a bit easier.
+        for (;;) {
+            if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                s->indentation += advance(s, lexer);
+            } else {
+                break;
+            }
+        }
+        // We are not matching. This is where the parsing logic for most
+        // "normal" token is. Most importantly parsing logic for the start of
+        // new blocks.
+        if (valid_symbols[INDENTED_CHUNK_START] &&
+            !valid_symbols[NO_INDENTED_CHUNK]) {
+            if (s->indentation >= 4 && lexer->lookahead != '\n' &&
+                lexer->lookahead != '\r') {
+                lexer->result_symbol = INDENTED_CHUNK_START;
+                if (!s->simulate)
+                    push_block(s, INDENTED_CODE_BLOCK);
+                s->indentation -= 4;
+                return true;
+            }
+        }
+        // Decide which tokens to consider based on the first non-whitespace
+        // character
+        switch (lexer->lookahead) {
+            case '\r':
+            case '\n':
+                if (valid_symbols[BLANK_LINE_START]) {
+                    // A blank line token is actually just 0 width, so do not
+                    // consume the characters
+                    lexer->result_symbol = BLANK_LINE_START;
+                    return true;
+                }
+                break;
+            case '`':
+                // A backtick could mark the beginning or ending of a fenced
+                // code block.
+                return parse_fenced_code_block(s, '`', lexer, valid_symbols);
+            case '~':
+                // A tilde could mark the beginning or ending of a fenced code
+                // block.
+                return parse_fenced_code_block(s, '~', lexer, valid_symbols);
+            case '*':
+                // A star could either mark  a list item or a thematic break.
+                // This code is similar to the code for '_' and '+'.
+                return parse_star(s, lexer, valid_symbols);
+            case '_':
+                return parse_thematic_break_underscore(s, lexer, valid_symbols);
+            case '>':
+                // A '>' could mark the beginning of a block quote
+                return parse_block_quote(s, lexer, valid_symbols);
+            case '#':
+                // A '#' could mark a atx heading
+                return parse_atx_heading(s, lexer, valid_symbols);
+            case '=':
+                // A '=' could mark a setext underline
+                return parse_setext_underline(s, lexer, valid_symbols);
+            case '+':
+                // A '+' could be a list marker
+                return parse_plus(s, lexer, valid_symbols);
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+                // A number could be a list marker (if followed by a dot or a
+                // parenthesis)
+                return parse_ordered_list_marker(s, lexer, valid_symbols);
+            case '-':
+                // A minus could mark a list marker, a thematic break or a
+                // setext underline
+                return parse_minus(s, lexer, valid_symbols);
+            case '<':
+                // A < could mark the beginning of a html block
+                return parse_html_block(s, lexer, valid_symbols);
+        }
+        if (lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
+            valid_symbols[PIPE_TABLE_START]) {
+            return parse_pipe_table(s, lexer, valid_symbols);
+        }
+    } else { // we are in the state of trying to match all currently open blocks
+        bool partial_success = false;
+        while (s->matched < (uint8_t)s->open_blocks.size) {
+            if (s->matched == (uint8_t)s->open_blocks.size - 1 &&
+                (s->state & STATE_CLOSE_BLOCK)) {
+                if (!partial_success)
+                    s->state &= ~STATE_CLOSE_BLOCK;
+                break;
+            }
+            if (match(s, lexer, s->open_blocks.items[s->matched])) {
+                partial_success = true;
+                s->matched++;
+            } else {
+                if (s->state & STATE_WAS_SOFT_LINE_BREAK) {
+                    s->state &= (~STATE_MATCHING);
+                }
+                break;
+            }
+        }
+        if (partial_success) {
+            if (s->matched == s->open_blocks.size) {
+                s->state &= (~STATE_MATCHING);
+            }
+            lexer->result_symbol = BLOCK_CONTINUATION;
+            return true;
+        }
+
+        if (!(s->state & STATE_WAS_SOFT_LINE_BREAK)) {
+            lexer->result_symbol = BLOCK_CLOSE;
+            pop_block(s);
+            if (s->matched == s->open_blocks.size) {
+                s->state &= (~STATE_MATCHING);
+            }
+            return true;
+        }
+    }
+
+    // The parser just encountered a line break. Setup the state correspondingly
+    if ((valid_symbols[LINE_ENDING] || valid_symbols[SOFT_LINE_ENDING] ||
+         valid_symbols[PIPE_TABLE_LINE_ENDING]) &&
+        (lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
+        if (lexer->lookahead == '\r') {
+            advance(s, lexer);
+            if (lexer->lookahead == '\n') {
+                advance(s, lexer);
+            }
+        } else {
+            advance(s, lexer);
+        }
+        s->indentation = 0;
+        s->column = 0;
+        if (!(s->state & STATE_CLOSE_BLOCK) &&
+            (valid_symbols[SOFT_LINE_ENDING] ||
+             valid_symbols[PIPE_TABLE_LINE_ENDING])) {
+            lexer->mark_end(lexer);
+            for (;;) {
+                if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    s->indentation += advance(s, lexer);
+                } else {
+                    break;
+                }
+            }
+            s->simulate = true;
+            uint8_t matched_temp = s->matched;
+            s->matched = 0;
+            bool one_will_be_matched = false;
+            while (s->matched < (uint8_t)s->open_blocks.size) {
+                if (match(s, lexer, s->open_blocks.items[s->matched])) {
+                    s->matched++;
+                    one_will_be_matched = true;
+                } else {
+                    break;
+                }
+            }
+            bool all_will_be_matched = s->matched == s->open_blocks.size;
+            if (!lexer->eof(lexer) &&
+                !scan(s, lexer, paragraph_interrupt_symbols)) {
+                s->matched = matched_temp;
+                // If the last line break ended a paragraph and no new block
+                // opened, the last line break should have been a soft line
+                // break Reset the counter for matched blocks
+                s->matched = 0;
+                s->indentation = 0;
+                s->column = 0;
+                // If there is at least one open block, we should be in the
+                // matching state. Also set the matching flag if a
+                // `$._soft_line_break_marker` can be emitted so it does get
+                // emitted.
+                if (one_will_be_matched) {
+                    s->state |= STATE_MATCHING;
+                } else {
+                    s->state &= (~STATE_MATCHING);
+                }
+                if (valid_symbols[PIPE_TABLE_LINE_ENDING]) {
+                    if (all_will_be_matched) {
+                        lexer->result_symbol = PIPE_TABLE_LINE_ENDING;
+                        return true;
+                    }
+                } else {
+                    lexer->result_symbol = SOFT_LINE_ENDING;
+                    // reset some state variables
+                    s->state |= STATE_WAS_SOFT_LINE_BREAK;
+                    return true;
+                }
+            } else {
+                s->matched = matched_temp;
+            }
+            s->indentation = 0;
+            s->column = 0;
+        }
+        if (valid_symbols[LINE_ENDING]) {
+            // If the last line break ended a paragraph and no new block opened,
+            // the last line break should have been a soft line break Reset the
+            // counter for matched blocks
+            s->matched = 0;
+            // If there is at least one open block, we should be in the matching
+            // state. Also set the matching flag if a
+            // `$._soft_line_break_marker` can be emitted so it does get
+            // emitted.
+            if (s->open_blocks.size > 0) {
+                s->state |= STATE_MATCHING;
+            } else {
+                s->state &= (~STATE_MATCHING);
+            }
+            // reset some state variables
+            s->state &= (~STATE_WAS_SOFT_LINE_BREAK);
+            lexer->result_symbol = LINE_ENDING;
+            return true;
+        }
+    }
+    return false;
+}
+
+void *tree_sitter_markdown_external_scanner_create(void) {
+    Scanner *s = (Scanner *)malloc(sizeof(Scanner));
+    s->open_blocks.items = (Block *)calloc(1, sizeof(Block));
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
+    _Static_assert(ATX_H6_MARKER == ATX_H1_MARKER + 5, "");
+#else
+    assert(ATX_H6_MARKER == ATX_H1_MARKER + 5);
+#endif
+    deserialize(s, NULL, 0);
+
+    return s;
+}
+
+bool tree_sitter_markdown_external_scanner_scan(void *payload, TSLexer *lexer,
+                                                const bool *valid_symbols) {
+    Scanner *scanner = (Scanner *)payload;
+    scanner->simulate = false;
+    return scan(scanner, lexer, valid_symbols);
+}
+
+unsigned tree_sitter_markdown_external_scanner_serialize(void *payload,
+                                                         char *buffer) {
+    Scanner *scanner = (Scanner *)payload;
+    return serialize(scanner, buffer);
+}
+
+void tree_sitter_markdown_external_scanner_deserialize(void *payload,
+                                                       char *buffer,
+                                                       unsigned length) {
+    Scanner *scanner = (Scanner *)payload;
+    deserialize(scanner, buffer, length);
+}
+
+void tree_sitter_markdown_external_scanner_destroy(void *payload) {
+    Scanner *scanner = (Scanner *)payload;
+    free(scanner->open_blocks.items);
+    free(scanner);
+}
-- 
cgit v1.2.3