1#pragma once
2
3#include "chat.h"
4#include "chat-parser-xml-toolcall.h"
5#include "json-partial.h"
6#include "regex-partial.h"
7
8#include <nlohmann/json_fwd.hpp>
9
10#include <optional>
11#include <string>
12#include <vector>
13
14class common_chat_msg_partial_exception : public std::runtime_error {
15 public:
16 common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
17};
18
19class common_chat_msg_parser {
20 std::string input_;
21 bool is_partial_;
22 common_chat_parser_params syntax_; // TODO: rename to params
23 std::string healing_marker_;
24
25 size_t pos_ = 0;
26 common_chat_msg result_;
27
28 public:
29 common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
30 const std::string & input() const { return input_; }
31 size_t pos() const { return pos_; }
32 const std::string & healing_marker() const { return healing_marker_; }
33 const bool & is_partial() const { return is_partial_; }
34 const common_chat_msg & result() const { return result_; }
35 const common_chat_parser_params & syntax() const { return syntax_; }
36
37 void move_to(size_t pos) {
38 if (pos > input_.size()) {
39 throw std::runtime_error("Invalid position!");
40 }
41 pos_ = pos;
42 }
43 void move_back(size_t n) {
44 if (pos_ < n) {
45 throw std::runtime_error("Can't move back that far!");
46 }
47 pos_ -= n;
48 }
49
50 // Get the substring of the input at the given range
51 std::string str(const common_string_range & rng) const;
52
53 // Appends to the result.content field
54 void add_content(const std::string & content);
55
56 // Appends to the result.reasoning_content field
57 void add_reasoning_content(const std::string & reasoning_content);
58
59 // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
60 bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
61
62 // Adds a tool call using the "name", "id" and "arguments" fields of the json object
63 bool add_tool_call(const nlohmann::ordered_json & tool_call);
64
65 // Adds an array of tool calls using their "name", "id" and "arguments" fields.
66 bool add_tool_calls(const nlohmann::ordered_json & arr);
67
68 // Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
69 bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call);
70
71 void finish();
72
73 bool consume_spaces();
74
75 void consume_literal(const std::string & literal);
76
77 bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
78
79 std::string consume_rest();
80
81 struct find_regex_result {
82 std::string prelude;
83 std::vector<common_string_range> groups;
84 };
85
86 std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
87
88 bool try_consume_literal(const std::string & literal);
89
90 std::optional<find_regex_result> try_find_literal(const std::string & literal);
91
92 find_regex_result consume_regex(const common_regex & regex);
93
94 std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
95
96 std::optional<common_json> try_consume_json();
97 common_json consume_json();
98
99 struct consume_json_result {
100 nlohmann::ordered_json value;
101 bool is_partial;
102 };
103
104 /*
105 Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
106
107 By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
108 e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
109
110 But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
111 - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
112 - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
113 */
114 consume_json_result consume_json_with_dumped_args(
115 const std::vector<std::vector<std::string>> & args_paths = {},
116 const std::vector<std::vector<std::string>> & content_paths = {}
117 );
118 std::optional<consume_json_result> try_consume_json_with_dumped_args(
119 const std::vector<std::vector<std::string>> & args_paths = {},
120 const std::vector<std::vector<std::string>> & content_paths = {}
121 );
122
123 /**
124 * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
125 * form.scope_start, form.tool_sep and form.scope_end can be empty.
126 */
127 bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form);
128
129 // Parse content uses reasoning and XML-Style tool call
130 void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>");
131
132 void clear_tools();
133};