1#pragma once
 2
 3#include <cstdint>
 4#include <string_view>
 5
 6// UTF-8 parsing utilities for streaming-aware unicode support
 7
 8struct utf8_parse_result {
 9    uint32_t codepoint;      // Decoded codepoint (only valid if status == SUCCESS)
10    size_t bytes_consumed;   // How many bytes this codepoint uses (1-4)
11    enum status { SUCCESS, INCOMPLETE, INVALID } status;
12
13    utf8_parse_result(enum status s, uint32_t cp = 0, size_t bytes = 0)
14        : codepoint(cp), bytes_consumed(bytes), status(s) {}
15};
16
17// Determine the expected length of a UTF-8 sequence from its first byte
18// Returns 0 for invalid first bytes
19size_t utf8_sequence_length(unsigned char first_byte);
20
21// Parse a single UTF-8 codepoint from input
22utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset);