1// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.
2
3#pragma once
4
5#include "common.h"
6#include "peg-parser.h"
7#include <functional>
8#include <chrono>
9#include <string>
10#include <vector>
11#include <map>
12
13#include <nlohmann/json_fwd.hpp>
14
15struct common_chat_templates;
16
17struct common_chat_tool_call {
18 std::string name;
19 std::string arguments;
20 std::string id;
21
22 bool operator==(const common_chat_tool_call & other) const {
23 return name == other.name && arguments == other.arguments && id == other.id;
24 }
25};
26
27struct common_chat_msg_content_part {
28 std::string type;
29 std::string text;
30
31 // TODO @ngxson : no known chat templates support reasoning_content in content parts yet
32 // this can be useful for models with interleaved thinking (like Kimi-K2)
33 // if you see any templates explicitly support this, please ping me
34 // std::string reasoning_content;
35
36 bool operator==(const common_chat_msg_content_part & other) const {
37 return type == other.type && text == other.text;
38 }
39};
40
41struct common_chat_msg {
42 std::string role;
43 std::string content;
44 std::vector<common_chat_msg_content_part> content_parts;
45 std::vector<common_chat_tool_call> tool_calls;
46 std::string reasoning_content;
47 std::string tool_name;
48 std::string tool_call_id;
49
50 nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const;
51
52 bool empty() const {
53 return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
54 }
55 void set_tool_call_ids(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
56 for (auto i = 0u; i < tool_calls.size(); i++) {
57 if (ids_cache.size() <= i) {
58 auto id = tool_calls[i].id;
59 if (id.empty()) {
60 id = gen_tool_call_id();
61 }
62 ids_cache.push_back(id);
63 }
64 tool_calls[i].id = ids_cache[i];
65 }
66 }
67 bool operator==(const common_chat_msg & other) const {
68 return role == other.role
69 && content == other.content
70 && content_parts == other.content_parts
71 && tool_calls == other.tool_calls
72 && reasoning_content == other.reasoning_content
73 && tool_name == other.tool_name
74 && tool_call_id == other.tool_call_id;
75 }
76 bool operator!=(const common_chat_msg & other) const {
77 return !(*this == other);
78 }
79};
80
81struct common_chat_msg_diff {
82 std::string reasoning_content_delta;
83 std::string content_delta;
84 size_t tool_call_index = std::string::npos;
85 common_chat_tool_call tool_call_delta;
86
87 static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
88
89 bool operator==(const common_chat_msg_diff & other) const {
90 return content_delta == other.content_delta
91 && tool_call_index == other.tool_call_index
92 && tool_call_delta == other.tool_call_delta;
93 }
94};
95
96struct common_chat_tool {
97 std::string name;
98 std::string description;
99 std::string parameters;
100};
101
102enum common_chat_tool_choice {
103 COMMON_CHAT_TOOL_CHOICE_AUTO,
104 COMMON_CHAT_TOOL_CHOICE_REQUIRED,
105 COMMON_CHAT_TOOL_CHOICE_NONE,
106};
107
108enum common_chat_format {
109 COMMON_CHAT_FORMAT_CONTENT_ONLY,
110 COMMON_CHAT_FORMAT_GENERIC,
111 COMMON_CHAT_FORMAT_MISTRAL_NEMO,
112 COMMON_CHAT_FORMAT_MAGISTRAL,
113 COMMON_CHAT_FORMAT_LLAMA_3_X,
114 COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
115 COMMON_CHAT_FORMAT_DEEPSEEK_R1,
116 COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
117 COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
118 COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
119 COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
120 COMMON_CHAT_FORMAT_HERMES_2_PRO,
121 COMMON_CHAT_FORMAT_COMMAND_R7B,
122 COMMON_CHAT_FORMAT_GRANITE,
123 COMMON_CHAT_FORMAT_GPT_OSS,
124 COMMON_CHAT_FORMAT_SEED_OSS,
125 COMMON_CHAT_FORMAT_NEMOTRON_V2,
126 COMMON_CHAT_FORMAT_APERTUS,
127 COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
128 COMMON_CHAT_FORMAT_GLM_4_5,
129 COMMON_CHAT_FORMAT_MINIMAX_M2,
130 COMMON_CHAT_FORMAT_KIMI_K2,
131 COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
132 COMMON_CHAT_FORMAT_APRIEL_1_5,
133 COMMON_CHAT_FORMAT_XIAOMI_MIMO,
134 COMMON_CHAT_FORMAT_SOLAR_OPEN,
135 COMMON_CHAT_FORMAT_EXAONE_MOE,
136
137 // These are intended to be parsed by the PEG parser
138 COMMON_CHAT_FORMAT_PEG_SIMPLE,
139 COMMON_CHAT_FORMAT_PEG_NATIVE,
140 COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
141
142 COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
143};
144
145struct common_chat_templates_inputs {
146 std::vector<common_chat_msg> messages;
147 std::string grammar;
148 std::string json_schema;
149 bool add_generation_prompt = true;
150 bool use_jinja = true;
151 // Parameters below only supported when use_jinja is true
152 std::vector<common_chat_tool> tools;
153 common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
154 bool parallel_tool_calls = false;
155 common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
156 bool enable_thinking = true;
157 std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
158 std::map<std::string, std::string> chat_template_kwargs;
159 bool add_bos = false;
160 bool add_eos = false;
161};
162
163struct common_chat_params {
164 common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
165 std::string prompt;
166 std::string grammar;
167 bool grammar_lazy = false;
168 bool thinking_forced_open = false;
169 std::vector<common_grammar_trigger> grammar_triggers;
170 std::vector<std::string> preserved_tokens;
171 std::vector<std::string> additional_stops;
172 std::string parser;
173};
174
175// per-message parsing syntax
176// should be derived from common_chat_params
177struct common_chat_parser_params {
178 common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
179 common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
180 // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
181 bool reasoning_in_content = false;
182 bool thinking_forced_open = false;
183 bool parse_tool_calls = true;
184 common_peg_arena parser = {};
185 common_chat_parser_params() = default;
186 common_chat_parser_params(const common_chat_params & chat_params) {
187 format = chat_params.format;
188 thinking_forced_open = chat_params.thinking_forced_open;
189 }
190};
191
192// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
193bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
194
195void common_chat_templates_free(struct common_chat_templates * tmpls);
196
197struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
198
199typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
200
201common_chat_templates_ptr common_chat_templates_init(
202 const struct llama_model * model,
203 const std::string & chat_template_override,
204 const std::string & bos_token_override = "",
205 const std::string & eos_token_override = "");
206
207bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
208std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
209
210
211struct common_chat_params common_chat_templates_apply(
212 const struct common_chat_templates * tmpls,
213 const struct common_chat_templates_inputs & inputs);
214
215// Format single message, while taking into account the position of that message in chat history
216std::string common_chat_format_single(
217 const struct common_chat_templates * tmpls,
218 const std::vector<common_chat_msg> & past_msg,
219 const common_chat_msg & new_msg,
220 bool add_ass,
221 bool use_jinja);
222
223// Returns an example of formatted chat
224std::string common_chat_format_example(
225 const struct common_chat_templates * tmpls,
226 bool use_jinja,
227 const std::map<std::string, std::string> & chat_template_kwargs);
228
229const char* common_chat_format_name(common_chat_format format);
230common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
231common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
232
233// used by arg and server
234const char * common_reasoning_format_name(common_reasoning_format format);
235common_reasoning_format common_reasoning_format_from_name(const std::string & format);
236
237common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
238
239bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
240
241// Parses a JSON array of messages in OpenAI's chat completion API format.
242std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const nlohmann::ordered_json & messages);
243
244// DEPRECATED: only used in tests
245nlohmann::ordered_json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
246
247std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const nlohmann::ordered_json & tools);
248nlohmann::ordered_json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
249
250nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);
251
252// get template caps, useful for reporting to server /props endpoint
253std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates);