1#pragma once
  2
  3#include "chat.h"
  4#include "chat-parser-xml-toolcall.h"
  5#include "json-partial.h"
  6#include "regex-partial.h"
  7
  8#include <nlohmann/json_fwd.hpp>
  9
 10#include <optional>
 11#include <string>
 12#include <vector>
 13
 14class common_chat_msg_partial_exception : public std::runtime_error {
 15  public:
 16    common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
 17};
 18
 19class common_chat_msg_parser {
 20    std::string input_;
 21    bool is_partial_;
 22    common_chat_parser_params syntax_; // TODO: rename to params
 23    std::string healing_marker_;
 24
 25    size_t pos_ = 0;
 26    common_chat_msg result_;
 27
 28  public:
 29    common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
 30    const std::string & input() const { return input_; }
 31    size_t pos() const { return pos_; }
 32    const std::string & healing_marker() const { return healing_marker_; }
 33    const bool & is_partial() const { return is_partial_; }
 34    const common_chat_msg & result() const { return result_; }
 35    const common_chat_parser_params & syntax() const { return syntax_; }
 36
 37    void move_to(size_t pos) {
 38        if (pos > input_.size()) {
 39            throw std::runtime_error("Invalid position!");
 40        }
 41        pos_ = pos;
 42    }
 43    void move_back(size_t n) {
 44        if (pos_ < n) {
 45            throw std::runtime_error("Can't move back that far!");
 46        }
 47        pos_ -= n;
 48    }
 49
 50    // Get the substring of the input at the given range
 51    std::string str(const common_string_range & rng) const;
 52
 53    // Appends to the result.content field
 54    void add_content(const std::string & content);
 55
 56    // Appends to the result.reasoning_content field
 57    void add_reasoning_content(const std::string & reasoning_content);
 58
 59    // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
 60    bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
 61
 62    // Adds a tool call using the "name", "id" and "arguments" fields of the json object
 63    bool add_tool_call(const nlohmann::ordered_json & tool_call);
 64
 65    // Adds an array of tool calls using their "name", "id" and "arguments" fields.
 66    bool add_tool_calls(const nlohmann::ordered_json & arr);
 67
 68    // Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
 69    bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call);
 70
 71    void finish();
 72
 73    bool consume_spaces();
 74
 75    void consume_literal(const std::string & literal);
 76
 77    bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
 78
 79    std::string consume_rest();
 80
 81    struct find_regex_result {
 82        std::string prelude;
 83        std::vector<common_string_range> groups;
 84    };
 85
 86    std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
 87
 88    bool try_consume_literal(const std::string & literal);
 89
 90    std::optional<find_regex_result> try_find_literal(const std::string & literal);
 91
 92    find_regex_result consume_regex(const common_regex & regex);
 93
 94    std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
 95
 96    std::optional<common_json> try_consume_json();
 97    common_json consume_json();
 98
 99    struct consume_json_result {
100        nlohmann::ordered_json value;
101        bool is_partial;
102    };
103
104    /*
105        Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
106
107        By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
108        e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
109
110        But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
111        - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
112        - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
113    */
114    consume_json_result consume_json_with_dumped_args(
115        const std::vector<std::vector<std::string>> & args_paths = {},
116        const std::vector<std::vector<std::string>> & content_paths = {}
117    );
118    std::optional<consume_json_result> try_consume_json_with_dumped_args(
119        const std::vector<std::vector<std::string>> & args_paths = {},
120        const std::vector<std::vector<std::string>> & content_paths = {}
121    );
122
123    /**
124     * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
125     * form.scope_start, form.tool_sep and form.scope_end can be empty.
126     */
127    bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form);
128
129    // Parse content uses reasoning and XML-Style tool call
130    void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>");
131
132    void clear_tools();
133};