1#include "chat.h"
2#include "chat-parser.h"
3#include "chat-peg-parser.h"
4#include "common.h"
5#include "json-partial.h"
6#include "json-schema-to-grammar.h"
7#include "log.h"
8#include "regex-partial.h"
9
10#include "jinja/parser.h"
11#include "jinja/value.h"
12#include "jinja/runtime.h"
13#include "jinja/caps.h"
14
15#include <algorithm>
16#include <cstdio>
17#include <cctype>
18#include <exception>
19#include <functional>
20#include <iostream>
21#include <optional>
22#include <stdexcept>
23#include <string>
24#include <vector>
25
26using json = nlohmann::ordered_json;
27
28static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
29 auto time = std::chrono::system_clock::to_time_t(now);
30 auto local_time = *std::localtime(&time);
31 std::ostringstream ss;
32 ss << std::put_time(&local_time, format.c_str());
33 auto res = ss.str();
34 return res;
35}
36
37static std::string string_diff(const std::string & last, const std::string & current) {
38 if (last.empty()) {
39 return current;
40 }
41 if (!string_starts_with(current, last)) {
42 if (string_starts_with(last, current)) {
43 // This happens if the last generation ended on a partial stop word (not erased),
44 // and the current ended on a stop word (erased).
45 return "";
46 }
47 throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'");
48 }
49 return current.substr(last.size());
50}
51
52static bool has_content_or_tool_calls(const common_chat_msg & msg) {
53 return !msg.content.empty() || !msg.tool_calls.empty();
54}
55
56json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
57 if (!content.empty() && !content_parts.empty()) {
58 throw std::runtime_error("Cannot specify both content and content_parts");
59 }
60 json jmsg {
61 {"role", role},
62 };
63 if (!content.empty()) {
64 jmsg["content"] = content;
65 } else if (!content_parts.empty()) {
66 if (concat_typed_text) {
67 std::string text;
68 for (const auto & part : content_parts) {
69 if (part.type != "text") {
70 LOG_WRN("Ignoring content part type: %s\n", part.type.c_str());
71 continue;
72 }
73 if (!text.empty()) {
74 text += '\n';
75 }
76 text += part.text;
77 }
78 jmsg["content"] = text;
79 } else {
80 auto & parts = jmsg["content"] = json::array();
81 for (const auto & part : content_parts) {
82 parts.push_back({
83 {"type", part.type},
84 {"text", part.text},
85 });
86 }
87 }
88 } else {
89 jmsg["content"] = "";
90 }
91 if (!reasoning_content.empty()) {
92 jmsg["reasoning_content"] = reasoning_content;
93 }
94 if (!tool_name.empty()) {
95 jmsg["name"] = tool_name;
96 }
97 if (!tool_call_id.empty()) {
98 jmsg["tool_call_id"] = tool_call_id;
99 }
100 if (!tool_calls.empty()) {
101 jmsg["tool_calls"] = json::array();
102 auto & jtool_calls = jmsg["tool_calls"];
103 for (const auto & tool_call : tool_calls) {
104 json tc {
105 {"type", "function"},
106 {"function", {
107 {"name", tool_call.name},
108 {"arguments", tool_call.arguments},
109 }},
110 };
111 if (!tool_call.id.empty()) {
112 tc["id"] = tool_call.id;
113 }
114 // Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo).
115 // We only generate a random id for the ones that don't generate one by themselves
116 // (they also won't get to see it as their template likely doesn't use it, so it's all for the client)
117 // {"id", tc.id.empty() ? gen_tool_call_id() : tc.id},
118 jtool_calls.push_back(tc);
119 }
120 }
121
122 return jmsg;
123}
124
125std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
126 std::vector<common_chat_msg_diff> diffs;
127 if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
128 diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
129 } else {
130 diffs.reserve(3);
131 }
132
133 // TODO: these can become expensive for long messages - how to optimize?
134 if (msg_prv.reasoning_content != msg_new.reasoning_content) {
135 auto & diff = diffs.emplace_back();
136 diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
137 }
138 if (msg_prv.content != msg_new.content) {
139 auto & diff = diffs.emplace_back();
140 diff.content_delta = string_diff(msg_prv.content, msg_new.content);
141 }
142
143 if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
144 throw std::runtime_error("Invalid diff: now finding less tool calls!");
145 }
146
147 if (!msg_prv.tool_calls.empty()) {
148 const auto idx = msg_prv.tool_calls.size() - 1;
149 const auto & pref = msg_prv.tool_calls[idx];
150 const auto & newf = msg_new.tool_calls[idx];
151 if (pref.name != newf.name) {
152 throw std::runtime_error("Invalid diff: tool call mismatch!");
153 }
154 const auto args_diff = string_diff(pref.arguments, newf.arguments);
155 if (!args_diff.empty() || pref.id != newf.id) {
156 auto & diff = diffs.emplace_back();
157 diff.tool_call_index = idx;
158 if (pref.id != newf.id) {
159 diff.tool_call_delta.id = newf.id;
160 diff.tool_call_delta.name = newf.name;
161 }
162 diff.tool_call_delta.arguments = args_diff;
163 }
164 }
165 for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
166 auto & diff = diffs.emplace_back();
167 diff.tool_call_index = idx;
168 diff.tool_call_delta = msg_new.tool_calls[idx];
169 }
170
171 return diffs;
172}
173
174using chat_template_caps = jinja::caps;
175
176struct common_chat_template {
177 jinja::program prog;
178 std::string bos_tok;
179 std::string eos_tok;
180 std::string src;
181 chat_template_caps caps;
182
183 common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
184 jinja::lexer lexer;
185 auto lexer_res = lexer.tokenize(src);
186 this->prog = jinja::parse_from_tokens(lexer_res);
187
188 this->src = lexer_res.source;
189 this->bos_tok = bos_token;
190 this->eos_tok = eos_token;
191
192 this->caps = jinja::caps_get(prog);
193 // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
194 }
195
196 const std::string & source() const { return src; }
197 const std::string & bos_token() const { return bos_tok; }
198 const std::string & eos_token() const { return eos_tok; }
199
200 // TODO: this is ugly, refactor it somehow
201 json add_system(const json & messages, const std::string & system_prompt) const {
202 GGML_ASSERT(messages.is_array());
203 auto msgs_copy = messages;
204 if (!caps.supports_system_role) {
205 if (msgs_copy.empty()) {
206 msgs_copy.insert(msgs_copy.begin(), json{
207 {"role", "user"},
208 {"content", system_prompt}
209 });
210 } else {
211 auto & first_msg = msgs_copy[0];
212 if (!first_msg.contains("content")) {
213 first_msg["content"] = "";
214 }
215 first_msg["content"] = system_prompt + "\n\n"
216 + first_msg["content"].get<std::string>();
217 }
218 } else {
219 if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
220 msgs_copy.insert(msgs_copy.begin(), json{
221 {"role", "system"},
222 {"content", system_prompt}
223 });
224 } else if (msgs_copy[0].at("role") == "system") {
225 msgs_copy[0]["content"] = system_prompt;
226 }
227 }
228 return msgs_copy;
229 }
230
231 chat_template_caps original_caps() const {
232 return caps;
233 }
234
235};
236
237struct common_chat_templates {
238 bool add_bos;
239 bool add_eos;
240 bool has_explicit_template; // Model had builtin template or template overridde was specified.
241 std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
242 std::unique_ptr<common_chat_template> template_tool_use;
243};
244
245struct templates_params {
246 json messages;
247 json tools;
248 common_chat_tool_choice tool_choice;
249 json json_schema;
250 bool parallel_tool_calls;
251 common_reasoning_format reasoning_format;
252 bool stream;
253 std::string grammar;
254 bool add_generation_prompt = true;
255 bool enable_thinking = true;
256 std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
257 json extra_context;
258 bool add_bos;
259 bool add_eos;
260 bool is_inference = true;
261 bool mark_input = true; // whether to mark input strings in the jinja context
262};
263
264common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
265 if (tool_choice == "auto") {
266 return COMMON_CHAT_TOOL_CHOICE_AUTO;
267 }
268 if (tool_choice == "none") {
269 return COMMON_CHAT_TOOL_CHOICE_NONE;
270 }
271 if (tool_choice == "required") {
272 return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
273 }
274 throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
275}
276
277bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
278 common_chat_templates_inputs dummy_inputs;
279 common_chat_msg msg;
280 msg.role = "user";
281 msg.content = "test";
282 dummy_inputs.messages = {msg};
283 dummy_inputs.enable_thinking = false;
284 const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
285 dummy_inputs.enable_thinking = true;
286 const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
287 return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
288}
289
290std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
291 std::vector<common_chat_msg> msgs;
292
293 try {
294
295 if (!messages.is_array()) {
296 throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
297 }
298
299 for (const auto & message : messages) {
300 if (!message.is_object()) {
301 throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
302 }
303
304 common_chat_msg msg;
305 if (!message.contains("role")) {
306 throw std::invalid_argument("Missing 'role' in message: " + message.dump());
307 }
308 msg.role = message.at("role");
309
310 auto has_content = message.contains("content");
311 auto has_tool_calls = message.contains("tool_calls");
312 if (has_content) {
313 const auto & content = message.at("content");
314 if (content.is_string()) {
315 msg.content = content;
316 } else if (content.is_array()) {
317 for (const auto & part : content) {
318 if (!part.contains("type")) {
319 throw std::invalid_argument("Missing content part type: " + part.dump());
320 }
321 const auto & type = part.at("type");
322 if (type != "text") {
323 throw std::invalid_argument("Unsupported content part type: " + type.dump());
324 }
325 common_chat_msg_content_part msg_part;
326 msg_part.type = type;
327 msg_part.text = part.at("text");
328 msg.content_parts.push_back(msg_part);
329 }
330 } else if (!content.is_null()) {
331 throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
332 }
333 }
334 if (has_tool_calls) {
335 for (const auto & tool_call : message.at("tool_calls")) {
336 common_chat_tool_call tc;
337 if (!tool_call.contains("type")) {
338 throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
339 }
340 const auto & type = tool_call.at("type");
341 if (type != "function") {
342 throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
343 }
344 if (!tool_call.contains("function")) {
345 throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
346 }
347 const auto & fc = tool_call.at("function");
348 if (!fc.contains("name")) {
349 throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
350 }
351 tc.name = fc.at("name");
352 tc.arguments = fc.at("arguments");
353 if (tool_call.contains("id")) {
354 tc.id = tool_call.at("id");
355 }
356 msg.tool_calls.push_back(tc);
357 }
358 }
359 if (!has_content && !has_tool_calls) {
360 throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
361 }
362 if (message.contains("reasoning_content")) {
363 msg.reasoning_content = message.at("reasoning_content");
364 }
365 if (message.contains("name")) {
366 msg.tool_name = message.at("name");
367 }
368 if (message.contains("tool_call_id")) {
369 msg.tool_call_id = message.at("tool_call_id");
370 }
371
372 msgs.push_back(msg);
373 }
374 } catch (const std::exception & e) {
375 // @ngxson : disable otherwise it's bloating the API response
376 // printf("%s\n", std::string("; messages = ") + messages.dump(2));
377 throw std::runtime_error("Failed to parse messages: " + std::string(e.what()));
378 }
379
380 return msgs;
381}
382
383static json render_message_to_json(const std::vector<common_chat_msg> & msgs, const jinja::caps & c) {
384 if (!c.supports_string_content && !c.supports_typed_content) {
385 LOG_WRN("%s: Neither string content nor typed content is supported by the template. This is unexpected and may lead to issues.\n", __func__);
386 }
387
388 bool only_string_accepted = c.supports_string_content && !c.supports_typed_content;
389 bool only_typed_accepted = !c.supports_string_content && c.supports_typed_content;
390
391 json messages = json::array();
392 for (const auto & msg : msgs) {
393 if (only_string_accepted) {
394 json jmsg = msg.to_json_oaicompat(/* concat_typed_text= */ true);
395 messages.push_back(jmsg);
396 } else if (only_typed_accepted) {
397 json jmsg = msg.to_json_oaicompat(/* concat_typed_text= */ false);
398 if (jmsg.at("content").is_string()) {
399 jmsg["content"] = json::array({
400 json{
401 {"type", "text"},
402 {"text", jmsg.at("content").get<std::string>()},
403 }
404 });
405 }
406 messages.push_back(jmsg);
407 } else {
408 json jmsg = msg.to_json_oaicompat(/* concat_typed_text= */ false);
409 messages.push_back(jmsg);
410 }
411 }
412 return messages;
413}
414
415// DEPRECATED: only used in tests
416json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text) {
417 jinja::caps c;
418 c.supports_string_content = true;
419 c.supports_typed_content = !concat_typed_text;
420 return render_message_to_json(msgs, c);
421}
422
423std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & tools) {
424 std::vector<common_chat_tool> result;
425
426 try {
427 if (!tools.is_null()) {
428 if (!tools.is_array()) {
429 throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
430 }
431 for (const auto & tool : tools) {
432 if (!tool.contains("type")) {
433 throw std::invalid_argument("Missing tool type: " + tool.dump());
434 }
435 const auto & type = tool.at("type");
436 if (!type.is_string() || type != "function") {
437 throw std::invalid_argument("Unsupported tool type: " + tool.dump());
438 }
439 if (!tool.contains("function")) {
440 throw std::invalid_argument("Missing tool function: " + tool.dump());
441 }
442
443 const auto & function = tool.at("function");
444 result.push_back({
445 /* .name = */ function.at("name"),
446 /* .description = */ function.value("description", ""),
447 /* .parameters = */ function.value("parameters", json::object()).dump(),
448 });
449 }
450 }
451 } catch (const std::exception & e) {
452 throw std::runtime_error("Failed to parse tools: " + std::string(e.what()) + "; tools = " + tools.dump(2));
453 }
454
455 return result;
456}
457
458json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools) {
459 if (tools.empty()) {
460 return json();
461 }
462
463 auto result = json::array();
464 for (const auto & tool : tools) {
465 result.push_back({
466 {"type", "function"},
467 {"function", {
468 {"name", tool.name},
469 {"description", tool.description},
470 {"parameters", json::parse(tool.parameters)},
471 }},
472 });
473 }
474 return result;
475}
476
477json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
478 json delta = json::object();
479 if (!diff.reasoning_content_delta.empty()) {
480 delta["reasoning_content"] = diff.reasoning_content_delta;
481 }
482 if (!diff.content_delta.empty()) {
483 delta["content"] = diff.content_delta;
484 }
485 if (diff.tool_call_index != std::string::npos) {
486 json tool_call;
487 tool_call["index"] = diff.tool_call_index;
488 if (!diff.tool_call_delta.id.empty()) {
489 tool_call["id"] = diff.tool_call_delta.id;
490 tool_call["type"] = "function";
491 }
492 json function = json::object();
493 if (!diff.tool_call_delta.name.empty()) {
494 function["name"] = diff.tool_call_delta.name;
495 }
496 function["arguments"] = diff.tool_call_delta.arguments;
497 tool_call["function"] = function;
498 delta["tool_calls"] = json::array({tool_call});
499 }
500 return delta;
501}
502
503bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
504 if (use_jinja) {
505 try {
506 common_chat_msg msg;
507 msg.role = "user";
508 msg.content = "test";
509
510 auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl);
511
512 common_chat_templates_inputs inputs;
513 inputs.messages = {msg};
514
515 common_chat_templates_apply(tmpls.get(), inputs);
516 return true;
517 } catch (const std::exception & e) {
518 LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what());
519 return false;
520 }
521 }
522 llama_chat_message chat[] = {{"user", "test"}};
523 const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0);
524 return res >= 0;
525}
526
527std::string common_chat_format_single(
528 const struct common_chat_templates * tmpls,
529 const std::vector<common_chat_msg> & past_msg,
530 const common_chat_msg & new_msg,
531 bool add_ass,
532 bool use_jinja) {
533
534 common_chat_templates_inputs inputs;
535 inputs.use_jinja = use_jinja;
536 inputs.add_bos = tmpls->add_bos;
537 inputs.add_eos = tmpls->add_eos;
538
539 std::string fmt_past_msg;
540 if (!past_msg.empty()) {
541 inputs.messages = past_msg;
542 inputs.add_generation_prompt = false;
543 fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt;
544 }
545 std::ostringstream ss;
546 // if the past_msg ends with a newline, we must preserve it in the formatted version
547 if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') {
548 ss << "\n";
549 };
550 // format chat with new_msg
551 inputs.messages.push_back(new_msg);
552 inputs.add_generation_prompt = add_ass;
553 auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt;
554 // get the diff part
555 ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
556 return ss.str();
557}
558
559std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map<std::string, std::string> & chat_template_kwargs) {
560 common_chat_templates_inputs inputs;
561 inputs.use_jinja = use_jinja;
562 inputs.add_bos = tmpls->add_bos;
563 inputs.add_eos = tmpls->add_eos;
564 inputs.chat_template_kwargs = chat_template_kwargs;
565 auto add_simple_msg = [&](auto role, auto content) {
566 common_chat_msg msg;
567 msg.role = role;
568 msg.content = content;
569 inputs.messages.push_back(msg);
570 };
571 add_simple_msg("system", "You are a helpful assistant");
572 add_simple_msg("user", "Hello");
573 add_simple_msg("assistant", "Hi there");
574 add_simple_msg("user", "How are you?");
575 return common_chat_templates_apply(tmpls, inputs).prompt;
576}
577
578#define CHATML_TEMPLATE_SRC \
579 "{%- for message in messages -%}\n" \
580 " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \
581 "{%- endfor -%}\n" \
582 "{%- if add_generation_prompt -%}\n" \
583 " {{- '<|im_start|>assistant\n' -}}\n" \
584 "{%- endif -%}"
585
586void common_chat_templates_free(struct common_chat_templates * tmpls) {
587 delete tmpls;
588}
589
590bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls) {
591 return tmpls->has_explicit_template;
592}
593
594std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant) {
595 if (!variant.empty()) {
596 if (variant == "tool_use") {
597 if (tmpls->template_tool_use) {
598 return tmpls->template_tool_use->source();
599 }
600 return "";
601 } else {
602 LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
603 }
604 }
605 return tmpls->template_default->source();
606}
607
608common_chat_templates_ptr common_chat_templates_init(
609 const struct llama_model * model,
610 const std::string & chat_template_override,
611 const std::string & bos_token_override,
612 const std::string & eos_token_override)
613{
614 std::string default_template_src;
615 std::string template_tool_use_src;
616
617 bool has_explicit_template = !chat_template_override.empty();
618 if (chat_template_override.empty()) {
619 GGML_ASSERT(model != nullptr);
620 const auto * str = llama_model_chat_template(model, /* name */ nullptr);
621 if (str) {
622 default_template_src = str;
623 has_explicit_template = true;
624 }
625 str = llama_model_chat_template(model, /* name */ "tool_use");
626 if (str) {
627 template_tool_use_src = str;
628 has_explicit_template = true;
629 }
630 } else {
631 default_template_src = chat_template_override;
632 }
633 if (default_template_src.empty() || default_template_src == "chatml") {
634 if (!template_tool_use_src.empty()) {
635 default_template_src = template_tool_use_src;
636 } else {
637 default_template_src = CHATML_TEMPLATE_SRC;
638 }
639 }
640
641 // TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error
642 // Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633
643 if (default_template_src.find("<|channel|>") != std::string::npos
644 // search for the error message and patch it
645 && default_template_src.find("in message.content or") != std::string::npos) {
646 string_replace_all(default_template_src,
647 "{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}",
648 "{%- if false %}");
649 }
650
651 // TODO @aldehir : this is a temporary fix, pending Minja changes
652 // Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664
653 if (default_template_src.find("[TOOL_CALLS]") != std::string::npos
654 // search for the error message and patch it
655 && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
656 string_replace_all(default_template_src,
657 "{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
658 "{%- if false %}");
659 }
660
661 std::string token_bos = bos_token_override;
662 std::string token_eos = eos_token_override;
663 bool add_bos = false;
664 bool add_eos = false;
665 if (model) {
666 const auto * vocab = llama_model_get_vocab(model);
667 const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
668 if (token == LLAMA_TOKEN_NULL) {
669 if (default_template_src.find(jinja_variable_name) != std::string::npos
670 || template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
671 LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n", name);
672 }
673 return std::string();
674 }
675 return common_token_to_piece(vocab, token, true);
676 };
677 token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token");
678 token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token");
679 add_bos = llama_vocab_get_add_bos(vocab);
680 add_eos = llama_vocab_get_add_eos(vocab);
681 }
682 common_chat_templates_ptr tmpls(new common_chat_templates());
683 tmpls->has_explicit_template = has_explicit_template;
684 tmpls->add_bos = add_bos;
685 tmpls->add_eos = add_eos;
686 try {
687 tmpls->template_default = std::make_unique<common_chat_template>(default_template_src, token_bos, token_eos);
688 } catch (const std::exception & e) {
689 LOG_ERR("%s: error: %s\n", __func__, e.what());
690 LOG_ERR("%s: failed to initialize chat template\n", __func__);
691 LOG_ERR("%s: please consider disabling jinja via --no-jinja, or using another chat template\n", __func__);
692 throw e;
693 }
694 if (!template_tool_use_src.empty()) {
695 try {
696 tmpls->template_tool_use = std::make_unique<common_chat_template>(template_tool_use_src, token_bos, token_eos);
697 } catch (const std::exception & e) {
698 LOG_ERR("%s: failed to parse tool use chat template (ignoring it): %s\n", __func__, e.what());
699 }
700 }
701 return tmpls;
702}
703
704const char * common_chat_format_name(common_chat_format format) {
705 switch (format) {
706 case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
707 case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
708 case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
709 case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral";
710 case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
711 case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
712 case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
713 case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
714 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
715 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
716 case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
717 case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
718 case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
719 case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
720 case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS";
721 case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
722 case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
723 case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
724 case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
725 case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
726 case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
727 case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
728 case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
729 case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
730 case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
731 case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
732 case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
733 case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
734 case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
735 case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
736 default:
737 throw std::runtime_error("Unknown chat format");
738 }
739}
740
741const char * common_reasoning_format_name(common_reasoning_format format) {
742 switch (format) {
743 case COMMON_REASONING_FORMAT_NONE: return "none";
744 case COMMON_REASONING_FORMAT_AUTO: return "auto";
745 case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
746 case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy";
747 default:
748 throw std::runtime_error("Unknown reasoning format");
749 }
750}
751
752common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
753 if (format == "none") {
754 return COMMON_REASONING_FORMAT_NONE;
755 } else if (format == "auto") {
756 return COMMON_REASONING_FORMAT_AUTO;
757 } else if (format == "deepseek") {
758 return COMMON_REASONING_FORMAT_DEEPSEEK;
759 } else if (format == "deepseek-legacy") {
760 return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
761 }
762 throw std::runtime_error("Unknown reasoning format: " + format);
763}
764
765static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
766 for (const auto & tool : tools) {
767 if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
768 LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str());
769 continue;
770 }
771 fn(tool);
772 }
773}
774
775static void foreach_parameter(const json & function, const std::function<void(const std::string &, const json &, bool)> & fn) {
776 if (!function.contains("parameters") || !function.at("parameters").is_object()) {
777 return;
778 }
779 const auto & params = function.at("parameters");
780 if (!params.contains("properties") || !params.at("properties").is_object()) {
781 return;
782 }
783 const auto & props = params.at("properties");
784 std::set<std::string> required;
785 if (params.contains("required") && params.at("required").is_array()) {
786 params.at("required").get_to(required);
787 }
788 for (const auto & [name, prop] : props.items()) {
789 bool is_required = (required.find(name) != required.end());
790 fn(name, prop, is_required);
791 }
792}
793
794static std::string apply(
795 const common_chat_template & tmpl,
796 const struct templates_params & inputs,
797 const std::optional<json> & messages_override = std::nullopt,
798 const std::optional<json> & tools_override = std::nullopt,
799 const std::optional<json> & additional_context = std::nullopt)
800{
801 jinja::context ctx(tmpl.source());
802
803 nlohmann::ordered_json inp = nlohmann::ordered_json{
804 {"messages", messages_override.has_value() ? *messages_override : inputs.messages},
805 {"bos_token", tmpl.bos_token()},
806 {"eos_token", tmpl.eos_token()},
807 };
808 if (tools_override.has_value() || !inputs.tools.empty()) {
809 inp["tools"] = tools_override.has_value() ? *tools_override : inputs.tools;
810 }
811 if (inputs.extra_context.is_object()) {
812 // TODO: do we need to merge, or replacing is fine?
813 for (const auto & [k, v] : inputs.extra_context.items()) {
814 inp[k] = v;
815 }
816 }
817 if (additional_context.has_value()) {
818 // TODO: merge properly instead of overwriting (matching old behavior)
819 for (const auto & [k, v] : additional_context->items()) {
820 inp[k] = v;
821 }
822 }
823 if (inputs.add_generation_prompt) {
824 inp["add_generation_prompt"] = true;
825 }
826
827 jinja::global_from_json(ctx, inp, inputs.mark_input);
828
829 // render
830 jinja::runtime runtime(ctx);
831 const jinja::value results = runtime.execute(tmpl.prog);
832 auto parts = runtime.gather_string_parts(results);
833
834 std::string result = parts->as_string().str();
835
836 // TODO: improve this later
837 if (inputs.add_bos && string_starts_with(result, tmpl.bos_token())) {
838 result = result.substr(tmpl.bos_token().size());
839 }
840 if (inputs.add_eos && string_ends_with(result, tmpl.eos_token())) {
841 result = result.substr(0, result.size() - tmpl.eos_token().size());
842 }
843 return result;
844}
845
846static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) {
847 common_chat_params data;
848
849 auto tool_call_schemas = json::array();
850 foreach_function(inputs.tools, [&](const json & tool) {
851 const auto & function = tool.at("function");
852 auto tool_schema = json {
853 {"type", "object"},
854 {"properties", {
855 {"name", {
856 {"type", "string"},
857 {"const", function.at("name")},
858 }},
859 {"arguments", function.at("parameters")},
860 }},
861 {"required", json::array({"name", "arguments"})},
862 };
863 if (function.contains("description")) {
864 tool_schema["description"] = function.at("description");
865 }
866 if (inputs.parallel_tool_calls) {
867 tool_schema.at("properties")["id"] = {
868 {"type", "string"},
869 {"minLength", 4},
870 };
871 tool_schema.at("required").push_back("id");
872 }
873 tool_call_schemas.emplace_back(tool_schema);
874 });
875 const auto tool_call =
876 inputs.parallel_tool_calls
877 ? json {
878 {"type", "object"},
879 {"properties", {
880 {"tool_calls", {
881 {"type", "array"},
882 {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
883 {"anyOf", tool_call_schemas},
884 }},
885 {"minItems", 1},
886 }},
887 }},
888 {"required", json::array({"tool_calls"})},
889 }
890 : json {
891 {"type", "object"},
892 {"properties", {
893 {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
894 {"anyOf", tool_call_schemas},
895 }},
896 }},
897 {"required", json::array({"tool_call"})},
898 };
899 const auto schema =
900 inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED
901 ? json {
902 {"anyOf", json::array({
903 tool_call,
904 {
905 {"type", "object"},
906 {"properties", {
907 {"response", inputs.json_schema.is_null()
908 ? json {{"type", "string"}}
909 : inputs.json_schema
910 },
911 }},
912 {"required", json::array({"response"})},
913 },
914 })}
915 }
916 : tool_call;
917
918 data.grammar_lazy = false;
919 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
920 builder.add_schema("root", schema);
921 });
922
923 auto tweaked_messages = tmpl.add_system(
924 inputs.messages,
925 "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
926
927 // ensure all messages has "content" field
928 for (auto & message : tweaked_messages) {
929 if (!message.contains("content") || message["content"].is_null()) {
930 message["content"] = "";
931 }
932 }
933
934 data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
935 data.format = COMMON_CHAT_FORMAT_GENERIC;
936 return data;
937}
938
939static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
940 common_chat_params data;
941 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
942 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
943 auto schemas = json::array();
944 foreach_function(inputs.tools, [&](const json & tool) {
945 const auto & function = tool.at("function");
946 schemas.push_back({
947 {"type", "object"},
948 {"properties", {
949 // Important note: the model is probably trained to take a JSON stringified arguments value.
950 // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
951 {"name", {
952 {"type", "string"},
953 {"const", function.at("name")},
954 }},
955 {"arguments", function.at("parameters")},
956 {"id", {
957 {"type", "string"},
958 // Nemo's template expects a 9-character alphanumeric ID.
959 {"pattern", "^[a-zA-Z0-9]{9}$"},
960 }},
961 }},
962 {"required", json::array({"name", "arguments", "id"})},
963 });
964 });
965 auto schema = json {
966 {"type", "array"},
967 {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
968 {"minItems", 1},
969 };
970 if (!inputs.parallel_tool_calls) {
971 schema["maxItems"] = 1;
972 }
973 builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
974 });
975 data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
976 data.preserved_tokens = {
977 "[TOOL_CALLS]",
978 };
979 data.prompt = apply(tmpl, inputs);
980 data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
981 return data;
982}
983
984
985// Case-insensitive find
986static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
987 auto it = std::search(
988 haystack.begin() + pos, haystack.end(),
989 needle.begin(), needle.end(),
990 [](char a, char b) { return std::tolower(a) == std::tolower(b); }
991 );
992 return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
993}
994
995static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
996 common_chat_params data;
997 const auto is_json_schema_provided = !inputs.json_schema.is_null();
998 const auto is_grammar_provided = !inputs.grammar.empty();
999 const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
1000
1001 // the logic requires potentially modifying the messages
1002 auto tweaked_messages = inputs.messages;
1003
1004 auto replace_json_schema_marker = [](json & messages) -> bool {
1005 static std::string marker1 = "force json schema.\n";
1006 static std::string marker2 = "force json schema.";
1007
1008 if (messages.empty() || messages.at(0).at("role") != "system") {
1009 return false;
1010 }
1011
1012 std::string content = messages.at(0).at("content");
1013
1014 for (const auto & marker : {marker1, marker2}) {
1015 const auto pos = ifind_string(content, marker);
1016 if (pos != std::string::npos) {
1017 content.replace(pos, marker.length(), "");
1018 // inject modified content back into the messages
1019 messages.at(0).at("content") = content;
1020 return true;
1021 }
1022 }
1023
1024 return false;
1025 };
1026
1027 // Lfm2 model does not natively work with json, but can generally understand the tools structure
1028 //
1029 // Example of the pytorch dialog structure:
1030 // <|startoftext|><|im_start|>system
1031 // List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
1032 // <|im_start|>user
1033 // What is the current status of candidate ID 12345?<|im_end|>
1034 // <|im_start|>assistant
1035 // <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
1036 // <|im_start|>tool
1037 // <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
1038 // <|im_start|>assistant
1039 // The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
1040 //
1041 // For the llama server compatibility with json tools semantic,
1042 // the client can add "Follow json schema." line into the system message prompt to force the json output.
1043 //
1044 if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
1045 // server/utils.hpp prohibits that branch for the custom grammar anyways
1046 throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
1047 } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
1048 LOG_INF("%s: Using tools to build a grammar\n", __func__);
1049
1050 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1051 auto schemas = json::array();
1052 foreach_function(inputs.tools, [&](const json & tool) {
1053 const auto & function = tool.at("function");
1054 schemas.push_back({
1055 {"type", "object"},
1056 {"properties", {
1057 {"name", {
1058 {"type", "string"},
1059 {"const", function.at("name")},
1060 }},
1061 {"arguments", function.at("parameters")},
1062 }},
1063 {"required", json::array({"name", "arguments", "id"})},
1064 });
1065 });
1066 auto schema = json {
1067 {"type", "array"},
1068 {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1069 {"minItems", 1},
1070 };
1071 if (!inputs.parallel_tool_calls) {
1072 schema["maxItems"] = 1;
1073 }
1074
1075 builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
1076 });
1077 // model has no concept of tool selection mode choice,
1078 // if the system prompt rendered correctly it will produce a tool call
1079 // the grammar goes inside the tool call body
1080 data.grammar_lazy = true;
1081 data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
1082 data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
1083 data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
1084 } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
1085 LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
1086 // output those tokens
1087 data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
1088 } else if (is_json_schema_provided) {
1089 LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
1090 data.grammar = json_schema_to_grammar(inputs.json_schema);
1091 } else if (is_grammar_provided) {
1092 LOG_INF("%s: Using provided grammar\n", __func__);
1093 data.grammar = inputs.grammar;
1094 } else {
1095 LOG_INF("%s: Using content relying on the template\n", __func__);
1096 }
1097
1098 data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
1099 LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
1100
1101 return data;
1102}
1103
1104static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) {
1105 common_chat_params data;
1106
1107 // Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
1108 auto adjusted_messages = json::array();
1109 for (const auto & msg : inputs.messages) {
1110 auto role = msg.value("role", "");
1111 if (role != "system" && role != "assistant") {
1112 // Only adjust system and assistant messages. Interestingly, the system message may contain thinking.
1113 adjusted_messages.push_back(msg);
1114 continue;
1115 }
1116
1117 auto content = json::array();
1118
1119 // If message contains `reasoning_content`, add it as a block of type `thinking`
1120 if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
1121 content.push_back({
1122 {"type", "thinking"},
1123 {"thinking", msg.at("reasoning_content").get<std::string>()},
1124 });
1125 }
1126
1127 // If message contains `content`, add it as a block of type `text`
1128 if (msg.contains("content")) {
1129 if (msg.at("content").is_string()) {
1130 content.push_back({
1131 {"type", "text"},
1132 {"text", msg.at("content").get<std::string>()},
1133 });
1134 } else if (msg.at("content").is_array()) {
1135 auto blocks = msg.at("content");
1136 content.insert(content.end(), blocks.begin(), blocks.end());
1137 }
1138 }
1139
1140 auto adjusted = msg;
1141 adjusted["content"] = content;
1142 adjusted.erase("reasoning_content");
1143 adjusted_messages.push_back(adjusted);
1144 }
1145
1146 auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
1147 auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
1148 auto include_grammar = true;
1149
1150 data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
1151 data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
1152 data.preserved_tokens = {
1153 "[THINK]",
1154 "[/THINK]",
1155 "[TOOL_CALLS]",
1156 "[ARGS]",
1157 };
1158
1159 auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
1160 auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
1161
1162 // Response format parser
1163 if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
1164 // Ministral wants to emit json surrounded by code fences
1165 return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```";
1166 }
1167
1168 // Tool call parser
1169 if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
1170 auto tool_choice = p.choice();
1171 foreach_function(inputs.tools, [&](const json & tool) {
1172 const auto & function = tool.at("function");
1173 std::string name = function.at("name");
1174 const auto & schema = function.at("parameters");
1175
1176 tool_choice |= p.rule("tool-" + name,
1177 p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]")
1178 + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
1179 );
1180 });
1181
1182 auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
1183 auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
1184 auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls));
1185
1186 return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls;
1187 }
1188
1189 // Content only parser
1190 include_grammar = false;
1191 return reasoning << p.content(p.rest());
1192 });
1193
1194 data.parser = parser.save();
1195
1196 if (include_grammar) {
1197 data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
1198
1199 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1200 foreach_function(inputs.tools, [&](const json & tool) {
1201 const auto & function = tool.at("function");
1202 auto schema = function.at("parameters");
1203 builder.resolve_refs(schema);
1204 });
1205 parser.build_grammar(builder, data.grammar_lazy);
1206 });
1207
1208 data.grammar_triggers = {
1209 {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}
1210 };
1211 }
1212
1213 return data;
1214}
1215
1216static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
1217 common_chat_params data;
1218 data.prompt = apply(tmpl, inputs);
1219 data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
1220 data.preserved_tokens = {
1221 "[THINK]",
1222 "[/THINK]",
1223 };
1224
1225 if (inputs.tools.is_array() && !inputs.tools.empty()) {
1226 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1227 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1228 auto schemas = json::array();
1229 foreach_function(inputs.tools, [&](const json & tool) {
1230 const auto & function = tool.at("function");
1231 schemas.push_back({
1232 {"type", "object"},
1233 {"properties", {
1234 {"name", {
1235 {"type", "string"},
1236 {"const", function.at("name")},
1237 }},
1238 {"arguments", function.at("parameters")},
1239 {"id", {
1240 {"type", "string"},
1241 {"pattern", "^[a-zA-Z0-9]{9}$"},
1242 }},
1243 }},
1244 {"required", json::array({"name", "arguments", "id"})},
1245 });
1246 });
1247 auto schema = json {
1248 {"type", "array"},
1249 {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1250 {"minItems", 1},
1251 };
1252 if (!inputs.parallel_tool_calls) {
1253 schema["maxItems"] = 1;
1254 }
1255 builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
1256 });
1257 data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
1258 data.preserved_tokens.push_back("[TOOL_CALLS]");
1259 } else {
1260 data.grammar_lazy = false;
1261 if (!inputs.json_schema.is_null()) {
1262 if (!inputs.grammar.empty()) {
1263 throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
1264 }
1265 data.grammar = json_schema_to_grammar(inputs.json_schema);
1266 } else {
1267 data.grammar = inputs.grammar;
1268 }
1269 }
1270
1271 return data;
1272}
1273
1274static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
1275 common_chat_params data;
1276
1277 auto adjusted_messages = json::array();
1278 for (const auto & msg : inputs.messages) {
1279 auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
1280 auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
1281 if (has_reasoning_content && has_tool_calls) {
1282 auto adjusted_message = msg;
1283 adjusted_message["tool_plan"] = msg.at("reasoning_content");
1284 adjusted_message.erase("reasoning_content");
1285 adjusted_messages.push_back(adjusted_message);
1286 } else {
1287 adjusted_messages.push_back(msg);
1288 }
1289 }
1290 data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
1291 data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
1292 if (string_ends_with(data.prompt, "<|START_THINKING|>")) {
1293 if (!inputs.enable_thinking) {
1294 data.prompt += "<|END_THINKING|>";
1295 } else {
1296 data.thinking_forced_open = true;
1297 }
1298 } else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) {
1299 data.prompt += "<|START_THINKING|><|END_THINKING|>";
1300 }
1301
1302 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1303 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1304 auto schemas = json::array();
1305 foreach_function(inputs.tools, [&](const json & tool) {
1306 const auto & function = tool.at("function");
1307 schemas.push_back({
1308 {"type", "object"},
1309 {"properties", {
1310 {"tool_call_id", {
1311 {"type", "string"},
1312 // Command-R's template expects an integer string.
1313 {"pattern", "^[0-9]{1,10}$"},
1314 }},
1315 {"tool_name", {
1316 {"type", "string"},
1317 {"const", function.at("name")},
1318 }},
1319 {"parameters", function.at("parameters")},
1320 }},
1321 {"required", json::array({"tool_call_id", "tool_name", "parameters"})},
1322 });
1323 });
1324 auto schema = json {
1325 {"type", "array"},
1326 {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1327 {"minItems", 1},
1328 };
1329 if (!inputs.parallel_tool_calls) {
1330 schema["maxItems"] = 1;
1331 }
1332 builder.add_rule("root",
1333 std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") +
1334 "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
1335 });
1336 data.grammar_triggers.push_back({
1337 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1338 // If thinking_forced_open, then we capture the </think> tag in the grammar,
1339 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1340 std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") +
1341 "(<\\|START_ACTION\\|>)[\\s\\S]*"
1342 });
1343 data.preserved_tokens = {
1344 "<|START_ACTION|>",
1345 "<|END_ACTION|>",
1346 "<|START_RESPONSE|>",
1347 "<|END_RESPONSE|>",
1348 "<|START_THINKING|>",
1349 "<|END_THINKING|>",
1350 };
1351 return data;
1352}
1353
1354static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
1355 if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
1356 throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
1357 }
1358 const auto & parameters_properties = parameters.at("properties");
1359 const auto & parameters_required = parameters.at("required");
1360 for (const auto & prop : expected_properties) {
1361 if (!parameters_properties.contains(prop)) {
1362 throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT
1363 }
1364 if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) {
1365 throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT
1366 }
1367 }
1368 if (parameters_properties.size() != expected_properties.size()) {
1369 throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", "));
1370 }
1371}
1372
1373static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
1374 auto builtin_tools = json::array();
1375 common_chat_params data;
1376 if (!inputs.tools.is_null()) {
1377 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1378 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1379 std::vector<std::string> tool_rules;
1380
1381 auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
1382 if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
1383 // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
1384 // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
1385 expect_tool_parameters(name, parameters, {"query"});
1386 } else if (name == "python" || name == "code_interpreter") {
1387 // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
1388 expect_tool_parameters(name, parameters, {"code"});
1389 } else {
1390 return false;
1391 }
1392
1393 std::vector<std::string> kvs;
1394 for (const auto & [key, value] : parameters.at("properties").items()) {
1395 kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
1396 }
1397
1398 tool_rules.push_back(
1399 builder.add_rule(
1400 name + "-call",
1401 "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
1402 builtin_tools.push_back(name);
1403
1404 return true;
1405 };
1406
1407 foreach_function(inputs.tools, [&](const json & tool) {
1408 const auto & function = tool.at("function");
1409 std::string name = function.at("name");
1410 auto parameters = function.at("parameters");
1411 builder.resolve_refs(parameters);
1412
1413 // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
1414 if (allow_python_tag_builtin_tools) {
1415 handle_builtin_tool(name, parameters);
1416 }
1417 tool_rules.push_back(
1418 builder.add_rule(
1419 name + "-call",
1420 "\"{\" space "
1421 "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
1422 " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
1423 " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
1424 "\"}\" space"));
1425 });
1426 // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
1427 data.grammar_triggers.push_back({
1428 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1429 "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*",
1430 });
1431 if (!builtin_tools.empty()) {
1432 data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
1433 data.preserved_tokens.push_back("<|python_tag|>");
1434 }
1435 // Allow a few empty lines on top of the usual constrained json schema space rule.
1436 builder.add_rule("root", string_join(tool_rules, " | "));
1437 data.additional_stops.push_back("<|eom_id|>");
1438 });
1439 data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
1440 ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
1441 : COMMON_CHAT_FORMAT_LLAMA_3_X;
1442 } else {
1443 data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
1444 }
1445 data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json {
1446 {"date_string", format_time(inputs.now, "%d %b %Y")},
1447 {"tools_in_user_message", false},
1448 {"builtin_tools", builtin_tools},
1449 });
1450 return data;
1451}
1452
1453static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
1454 common_chat_params data;
1455
1456 // Generate the prompt using the apply() function with the template
1457 data.prompt = apply(tmpl, inputs);
1458 data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2;
1459
1460 // Handle thinking tags appropriately based on inputs.enable_thinking
1461 if (string_ends_with(data.prompt, "<think>\n")) {
1462 if (!inputs.enable_thinking) {
1463 data.prompt += "</think>";
1464 } else {
1465 data.thinking_forced_open = true;
1466 }
1467 }
1468
1469 // When tools are present, build grammar for the <TOOLCALL> format, similar to CommandR, but without tool call ID
1470 if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
1471 data.grammar_lazy = true;
1472 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1473 auto schemas = json::array();
1474 foreach_function(inputs.tools, [&](const json & tool) {
1475 const auto & function = tool.at("function");
1476 schemas.push_back({
1477 { "type", "object" },
1478 { "properties",
1479 {
1480 { "name",
1481 {
1482 { "type", "string" },
1483 { "const", function.at("name") },
1484 } },
1485 { "arguments", function.at("parameters") },
1486 } },
1487 { "required", json::array({ "name", "arguments" }) },
1488 });
1489 });
1490 auto schema = json{
1491 { "type", "array" },
1492 { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
1493 { "minItems", 1 },
1494 };
1495 if (!inputs.parallel_tool_calls) {
1496 schema["maxItems"] = 1;
1497 }
1498 builder.add_rule("root",
1499 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
1500 "\"<TOOLCALL>\" " + builder.add_schema("tool_calls", schema) +
1501 " \"</TOOLCALL>\"");
1502 });
1503 data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1504 // If thinking_forced_open, then we capture the </think> tag in the grammar,
1505 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1506 std::string(data.thinking_forced_open ?
1507 "[\\s\\S]*?(</think>\\s*)" :
1508 "(?:<think>[\\s\\S]*?</think>\\s*)?") +
1509 "(<TOOLCALL>)[\\s\\S]*" });
1510 }
1511 return data;
1512}
1513
1514static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_template & tmpl, const struct templates_params & inputs) {
1515 common_chat_params data;
1516
1517 data.prompt = apply(tmpl, inputs);
1518 data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
1519
1520 // Handle thinking tags appropriately based on inputs.enable_thinking
1521 if (string_ends_with(data.prompt, "<think>\n")) {
1522 if (!inputs.enable_thinking) {
1523 data.prompt += "</think>";
1524 } else {
1525 data.thinking_forced_open = true;
1526 }
1527 }
1528
1529 data.preserved_tokens = {
1530 "<think>",
1531 "</think>",
1532 "<tool_call>",
1533 "</tool_call>",
1534 };
1535
1536 auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
1537 auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
1538 auto include_grammar = true;
1539
1540 auto parser = build_chat_peg_constructed_parser([&](auto & p) {
1541 auto reasoning = p.eps();
1542 if (inputs.enable_thinking && extract_reasoning) {
1543 auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
1544 if (data.thinking_forced_open) {
1545 reasoning = reasoning_content;
1546 }
1547 }
1548
1549 // Response format parser
1550 if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
1551 return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema));
1552 }
1553
1554 // Tool call parser
1555 if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
1556 auto tool_choice = p.choice();
1557 foreach_function(inputs.tools, [&](const json & tool) {
1558 const auto & function = tool.at("function");
1559 std::string name = function.at("name");
1560 auto parameters = function.at("parameters");
1561
1562 auto schema_info = common_schema_info();
1563 schema_info.resolve_refs(parameters);
1564
1565 auto tool_open = "<function=" + p.tool_name(p.literal(name)) + ">\n";
1566 auto tool_close = p.literal("</function>\n");
1567 auto args = p.sequence();
1568 auto arg_string = p.rule("xml-arg-string", p.until_one_of({
1569 "\n</parameter>",
1570 "\n<parameter=",
1571 "\n</function>"
1572 }));
1573
1574 foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) {
1575 auto rule_name = "tool-" + name + "-arg-" + param_name;
1576
1577 auto arg_open = "<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">\n";
1578 auto arg_close = p.literal("</parameter>\n");
1579 auto arg_value = p.eps();
1580
1581 if (schema_info.resolves_to_string(param_schema)) {
1582 arg_value = p.tool_arg_string_value(arg_string) + "\n";
1583 } else {
1584 arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema));
1585 }
1586
1587 // Model may or my not close with </parameter>
1588 auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close)));
1589 args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1);
1590 });
1591
1592 tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close));
1593 });
1594
1595 auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
1596 auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
1597 auto tool_call = p.rule("tool-call", "<tool_call>\n" + tool_choice + "</tool_call>" + p.space());
1598 auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
1599
1600 return reasoning << p.content(p.until("<tool_call>")) << tool_calls;
1601 }
1602
1603 // Content only parser
1604 include_grammar = false;
1605 return reasoning << p.content(p.rest());
1606 });
1607
1608 data.parser = parser.save();
1609
1610 if (include_grammar) {
1611 data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
1612
1613 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1614 foreach_function(inputs.tools, [&](const json & tool) {
1615 const auto & function = tool.at("function");
1616 auto schema = function.at("parameters");
1617 builder.resolve_refs(schema);
1618 });
1619 parser.build_grammar(builder, data.grammar_lazy);
1620 });
1621
1622 data.grammar_triggers = {
1623 {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"}
1624 };
1625 }
1626
1627 return data;
1628}
1629
1630
1631static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
1632 common_chat_params data;
1633
1634 // Generate the prompt using the apply() function with the template
1635 data.prompt = apply(tmpl, inputs);
1636 data.format = COMMON_CHAT_FORMAT_APERTUS;
1637
1638 // Handle thinking tags appropriately based on inputs.enable_thinking
1639 if (string_ends_with(data.prompt, "<|inner_prefix|>")) {
1640 if (!inputs.enable_thinking) {
1641 data.prompt += "<|inner_suffix|>";
1642 } else {
1643 data.thinking_forced_open = true;
1644 }
1645 }
1646
1647 // When tools are present, build grammar for the <|tools_prefix|> format
1648 if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
1649 data.grammar_lazy = true;
1650 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1651 auto schemas = json::array();
1652 foreach_function(inputs.tools, [&](const json & tool) {
1653 const auto & function = tool.at("function");
1654 schemas.push_back({
1655 { "type", "object" },
1656 { "properties",
1657 {
1658 { function.at("name"), function.at("parameters") }
1659 } },
1660 { "required", json::array({ function.at("name") }) },
1661 });
1662 });
1663 auto schema = json{
1664 { "type", "array" },
1665 { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
1666 { "minItems", 1 },
1667 };
1668 if (!inputs.parallel_tool_calls) {
1669 schema["maxItems"] = 1;
1670 }
1671 builder.add_rule("root",
1672 std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") +
1673 "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\"");
1674 });
1675 data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1676 // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar,
1677 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1678 std::string(data.thinking_forced_open ?
1679 "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" :
1680 "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") +
1681 "(<\\|tools_prefix\\|>)[\\s\\S]*" });
1682 data.preserved_tokens = {
1683 "<|system_start|>",
1684 "<|system_end|>",
1685 "<|developer_start|>",
1686 "<|developer_end|>",
1687 "<|user_start|>",
1688 "<|user_end|>",
1689 "<|assistant_start|>",
1690 "<|assistant_end|>",
1691 "<|inner_prefix|>",
1692 "<|inner_suffix|>",
1693 "<|tools_prefix|>",
1694 "<|tools_suffix|>",
1695 };
1696 }
1697 return data;
1698}
1699
1700static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
1701 common_chat_params data;
1702 auto prompt = apply(tmpl, inputs);
1703
1704 // Hacks to fix the official (broken) prompt.
1705 // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
1706 // until the official template is fixed.
1707 if (tmpl.source().find("{% if ns.is_tool %}{{'<๏ฝtoolโoutputsโend๏ฝ>'}}") != std::string::npos) {
1708 // Don't leave the chat dangling after tool results
1709 if (string_ends_with(prompt, "<๏ฝtoolโoutputsโend๏ฝ>")) {
1710 prompt += "<๏ฝendโofโsentence๏ฝ>";
1711 if (inputs.add_generation_prompt) {
1712 prompt += "<๏ฝAssistant๏ฝ>";
1713 }
1714 }
1715 // Fix up tool call delta example added by Minja
1716 prompt = std::regex_replace(
1717 prompt,
1718 std::regex("(<๏ฝtoolโcallโend๏ฝ>)[\\s\\r\\n]*(<๏ฝtoolโoutputsโbegin๏ฝ>|<๏ฝUser๏ฝ>)"),
1719 "$1<๏ฝtoolโcallsโend๏ฝ><๏ฝendโofโsentence๏ฝ>$2");
1720 }
1721 data.prompt = prompt;
1722 data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
1723 if (string_ends_with(data.prompt, "<think>\n")) {
1724 if (!inputs.enable_thinking) {
1725 data.prompt += "</think>";
1726 } else {
1727 data.thinking_forced_open = true;
1728 }
1729 }
1730
1731 if (inputs.tools.is_array() && !inputs.tools.empty()) {
1732 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
1733 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1734 std::vector<std::string> tool_rules;
1735 foreach_function(inputs.tools, [&](const json & tool) {
1736 const auto & function = tool.at("function");
1737 std::string name = function.at("name");
1738 auto parameters = function.at("parameters");
1739 builder.resolve_refs(parameters);
1740 tool_rules.push_back(builder.add_rule(name + "-call",
1741 "( \"<๏ฝtoolโcallโbegin๏ฝ>\" )? \"function<๏ฝtoolโsep๏ฝ>" + name + "\\n"
1742 "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
1743 "\"```<๏ฝtoolโcallโend๏ฝ>\""));
1744 });
1745 // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
1746 // so we accept common variants (then it's all constrained)
1747 builder.add_rule("root",
1748 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
1749 "( \"<๏ฝtoolโcallsโbegin๏ฝ>\" | \"<๏ฝtool_calls_begin๏ฝ>\" | \"<๏ฝtool calls begin๏ฝ>\" | \"<๏ฝtool\\\\_calls\\\\_begin๏ฝ>\" | \"<๏ฝtoolโcalls๏ฝ>\" ) "
1750 "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
1751 "\"<๏ฝtoolโcallsโend๏ฝ>\""
1752 " space");
1753 data.grammar_triggers.push_back({
1754 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1755 // If thinking_forced_open, then we capture the </think> tag in the grammar,
1756 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1757 std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
1758 "(<๏ฝtoolโcallsโbegin๏ฝ>|<๏ฝtool_calls_begin๏ฝ>|<๏ฝtool calls begin๏ฝ>|<๏ฝtool\\\\_calls\\\\_begin๏ฝ>|<๏ฝtoolโcalls๏ฝ>)[\\s\\S]*"
1759 });
1760 data.preserved_tokens = {
1761 "<think>",
1762 "</think>",
1763 "<๏ฝtoolโcallsโbegin๏ฝ>",
1764 "<๏ฝtoolโcallโbegin๏ฝ>",
1765 "<๏ฝtoolโsep๏ฝ>",
1766 "<๏ฝtoolโcallโend๏ฝ>",
1767 "<๏ฝtoolโcallsโend๏ฝ",
1768 };
1769 });
1770 }
1771 return data;
1772}
1773
1774static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
1775 common_chat_params data;
1776
1777 // Pass thinking context for DeepSeek V3.1 template
1778 json additional_context = {
1779 {"thinking", inputs.enable_thinking},
1780 };
1781
1782 auto prompt = apply(tmpl, inputs,
1783 /* messages_override= */ inputs.messages,
1784 /* tools_override= */ std::nullopt,
1785 additional_context);
1786 data.prompt = prompt;
1787 data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
1788 if (string_ends_with(data.prompt, "<think>")) {
1789 if (!inputs.enable_thinking) {
1790 data.prompt += "</think>";
1791 } else {
1792 data.thinking_forced_open = true;
1793 }
1794 }
1795 if (inputs.tools.is_array() && !inputs.tools.empty()) {
1796 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
1797 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1798 std::vector<std::string> tool_rules;
1799 foreach_function(inputs.tools, [&](const json & tool) {
1800 const auto & function = tool.at("function");
1801 std::string name = function.at("name");
1802 auto parameters = function.at("parameters");
1803 builder.resolve_refs(parameters);
1804 tool_rules.push_back(builder.add_rule(name + "-call",
1805 "( \"<๏ฝtoolโcallโbegin๏ฝ>\" )? \"" + name + "<๏ฝtoolโsep๏ฝ>"
1806 "\" " + builder.add_schema(name + "-args", parameters) + " "
1807 "\"<๏ฝtoolโcallโend๏ฝ>\""));
1808 });
1809 // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
1810 // so we accept common variants (then it's all constrained)
1811 builder.add_rule("root",
1812 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
1813 "( \"<๏ฝtoolโcallsโbegin๏ฝ>\" | \"<๏ฝtool_calls_begin๏ฝ>\" | \"<๏ฝtool calls begin๏ฝ>\" | \"<๏ฝtool\\\\_calls\\\\_begin๏ฝ>\" | \"<๏ฝtoolโcalls๏ฝ>\" ) "
1814 "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
1815 "\"<๏ฝtoolโcallsโend๏ฝ>\""
1816 " space");
1817 data.grammar_triggers.push_back({
1818 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1819 // If thinking_forced_open, then we capture the </think> tag in the grammar,
1820 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1821 std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
1822 "(<๏ฝtoolโcallsโbegin๏ฝ>|<๏ฝtool_calls_begin๏ฝ>|<๏ฝtool calls begin๏ฝ>|<๏ฝtool\\\\_calls\\\\_begin๏ฝ>|<๏ฝtoolโcalls๏ฝ>)[\\s\\S]*"
1823 });
1824 data.preserved_tokens = {
1825 "<think>",
1826 "</think>",
1827 "<๏ฝtoolโcallsโbegin๏ฝ>",
1828 "<๏ฝtoolโcallโbegin๏ฝ>",
1829 "<๏ฝtoolโsep๏ฝ>",
1830 "<๏ฝtoolโcallโend๏ฝ>",
1831 "<๏ฝtoolโcallsโend๏ฝ>",
1832 };
1833 });
1834 }
1835 return data;
1836}
1837
1838static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
1839 common_chat_params data;
1840 data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1841
1842 data.prompt = apply(tmpl, params);
1843 data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
1844
1845 // Handle thinking tags based on prompt ending
1846 if (string_ends_with(data.prompt, "<think>\n")) {
1847 if (!params.enable_thinking) {
1848 // Close the thinking tag immediately if thinking is disabled
1849 data.prompt += "</think>\n\n";
1850 } else {
1851 // Mark thinking as forced open (template started with <think>)
1852 data.thinking_forced_open = true;
1853 }
1854 }
1855
1856 // Preserve MiniMax-M2 special tokens
1857 data.preserved_tokens = {
1858 "<think>",
1859 "</think>",
1860 "<minimax:tool_call>",
1861 "</minimax:tool_call>",
1862 };
1863
1864 // build grammar for tool call
1865 static const xml_tool_call_format form {
1866 /* form.scope_start = */ "<minimax:tool_call>\n",
1867 /* form.tool_start = */ "<invoke name=\"",
1868 /* form.tool_sep = */ "\">\n",
1869 /* form.key_start = */ "<parameter name=\"",
1870 /* form.key_val_sep = */ "\">",
1871 /* form.val_end = */ "</parameter>\n",
1872 /* form.tool_end = */ "</invoke>\n",
1873 /* form.scope_end = */ "</minimax:tool_call>",
1874 };
1875 build_grammar_xml_tool_call(data, params.tools, form);
1876
1877 return data;
1878}
1879
1880static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
1881 common_chat_params data;
1882 data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1883
1884 data.prompt = apply(tmpl, params);
1885 data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
1886
1887 data.preserved_tokens = {
1888 "<tool_call>",
1889 "</tool_call>",
1890 "<function=",
1891 "</function>",
1892 "<parameter=",
1893 "</parameter>",
1894 };
1895
1896 // build grammar for tool call
1897 static const xml_tool_call_format form {
1898 /* form.scope_start = */ "<tool_call>\n",
1899 /* form.tool_start = */ "<function=",
1900 /* form.tool_sep = */ ">\n",
1901 /* form.key_start = */ "<parameter=",
1902 /* form.key_val_sep = */ ">\n",
1903 /* form.val_end = */ "\n</parameter>\n",
1904 /* form.tool_end = */ "</function>\n",
1905 /* form.scope_end = */ "</tool_call>",
1906 };
1907 build_grammar_xml_tool_call(data, params.tools, form);
1908
1909 return data;
1910}
1911
1912static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
1913 common_chat_params data;
1914 data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1915
1916 data.prompt = apply(tmpl, params);
1917 data.format = COMMON_CHAT_FORMAT_KIMI_K2;
1918
1919 data.preserved_tokens = {
1920 "<think>",
1921 "</think>",
1922 "<|tool_calls_section_begin|>",
1923 "<|tool_call_begin|>",
1924 "<|tool_call_argument_begin|>",
1925 "<|tool_call_end|>",
1926 "<|tool_calls_section_end|>",
1927 "<|im_end|>",
1928 "<|im_system|>",
1929 "<|im_middle|>",
1930 };
1931
1932 data.additional_stops.insert(data.additional_stops.end(), {
1933 "<|im_end|>",
1934 "<|im_middle|>"
1935 });
1936 // build grammar for tool call
1937 static const xml_tool_call_format form = ([]() {
1938 xml_tool_call_format form {};
1939 form.scope_start = "<|tool_calls_section_begin|>";
1940 form.tool_start = "<|tool_call_begin|>";
1941 form.tool_sep = "<|tool_call_argument_begin|>{";
1942 form.key_start = "\"";
1943 form.key_val_sep = "\": ";
1944 form.val_end = ", ";
1945 form.tool_end = "}<|tool_call_end|>";
1946 form.scope_end = "<|tool_calls_section_end|>";
1947 form.raw_argval = false;
1948 form.last_val_end = "";
1949 return form;
1950 })();
1951 build_grammar_xml_tool_call(data, params.tools, form);
1952
1953 return data;
1954}
1955
1956static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
1957 common_chat_params data;
1958 data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1959
1960 data.prompt = apply(tmpl, params);
1961 data.format = COMMON_CHAT_FORMAT_APRIEL_1_5;
1962
1963 data.preserved_tokens = {
1964 "<thinking>",
1965 "</thinking>",
1966 "<tool_calls>",
1967 "</tool_calls>",
1968 };
1969
1970 // build grammar for tool call
1971 static const xml_tool_call_format form = ([]() {
1972 xml_tool_call_format form {};
1973 form.scope_start = "<tool_calls>[";
1974 form.tool_start = "{\"name\": \"";
1975 form.tool_sep = "\", \"arguments\": {";
1976 form.key_start = "\"";
1977 form.key_val_sep = "\": ";
1978 form.val_end = ", ";
1979 form.tool_end = "}, ";
1980 form.scope_end = "]</tool_calls>";
1981 form.raw_argval = false;
1982 form.last_val_end = "";
1983 form.last_tool_end = "}";
1984 return form;
1985 })();
1986 build_grammar_xml_tool_call(data, params.tools, form);
1987
1988 return data;
1989}
1990
1991static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
1992 common_chat_params data;
1993 data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1994
1995 data.prompt = apply(tmpl, params);
1996 data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO;
1997
1998 data.preserved_tokens = {
1999 "<tool_call>",
2000 "</tool_call>",
2001 };
2002
2003 // build grammar for tool call
2004 static const xml_tool_call_format form = ([]() {
2005 xml_tool_call_format form {};
2006 form.scope_start = "\n";
2007 form.tool_start = "<tool_call>\n{\"name\": \"";
2008 form.tool_sep = "\", \"arguments\": {";
2009 form.key_start = "\"";
2010 form.key_val_sep = "\": ";
2011 form.val_end = ", ";
2012 form.tool_end = "}\n</tool_call>";
2013 form.scope_end = "";
2014 form.raw_argval = false;
2015 form.last_val_end = "";
2016 return form;
2017 })();
2018 build_grammar_xml_tool_call(data, params.tools, form);
2019
2020 return data;
2021}
2022
2023static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
2024 common_chat_params data;
2025
2026 // Copy reasoning to the "thinking" field as expected by the gpt-oss template
2027 auto adjusted_messages = json::array();
2028 for (const auto & msg : inputs.messages) {
2029 auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
2030 auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
2031
2032 if (has_reasoning_content && has_tool_calls) {
2033 auto adjusted_message = msg;
2034 adjusted_message["thinking"] = msg.at("reasoning_content");
2035 adjusted_messages.push_back(adjusted_message);
2036 } else {
2037 adjusted_messages.push_back(msg);
2038 }
2039 }
2040
2041 auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
2042
2043 // Check if we need to replace the return token with end token during
2044 // inference and without generation prompt. For more details see:
2045 // https://github.com/ggml-org/llama.cpp/issues/15417
2046 if (inputs.is_inference && !inputs.add_generation_prompt) {
2047 static constexpr std::string_view return_token = "<|return|>";
2048 static constexpr std::string_view end_token = "<|end|>";
2049 if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
2050 prompt.replace(pos, return_token.length(), end_token);
2051 }
2052 }
2053
2054 data.prompt = prompt;
2055 data.format = COMMON_CHAT_FORMAT_GPT_OSS;
2056
2057 // These special tokens are required to parse properly, so we include them
2058 // even if parse_tool_calls is false.
2059 data.preserved_tokens = {
2060 "<|channel|>",
2061 "<|constrain|>",
2062 "<|message|>",
2063 "<|start|>",
2064 "<|end|>",
2065 };
2066
2067 if (!inputs.json_schema.is_null()) {
2068 data.grammar_lazy = false;
2069 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2070 auto schema = inputs.json_schema;
2071 builder.resolve_refs(schema);
2072
2073 auto not_end = builder.add_rule("not-end",
2074 "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
2075 auto analysis = builder.add_rule("analysis",
2076 "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
2077 auto constraint = builder.add_rule("constraint", "\"<|constrain|>\"? [a-zA-Z0-9_-]+");
2078 auto final = builder.add_rule("final",
2079 "\"<|channel|>final\" ( \" \" " + constraint + " )? \"<|message|>\" " +
2080 builder.add_schema("response", schema)
2081 );
2082
2083 builder.add_rule("root", "( " + analysis + " \"<|start|>assistant\" )? " + final);
2084 });
2085 }
2086
2087 if (inputs.tools.is_array() && !inputs.tools.empty()) {
2088 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2089 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2090 // tool calls can appear in commentary or analysis channels
2091 auto channel = builder.add_rule("channel", "\"<|channel|>\" ( \"commentary\" | \"analysis\" )");
2092
2093 std::vector<std::string> tool_rules_recipient_in_role;
2094 std::vector<std::string> tool_rules_recipient_in_channel;
2095 foreach_function(inputs.tools, [&](const json & tool) {
2096 const auto & function = tool.at("function");
2097 std::string name = function.at("name");
2098 auto parameters = function.at("parameters");
2099 builder.resolve_refs(parameters);
2100
2101 tool_rules_recipient_in_role.push_back(
2102 builder.add_rule(name + "-call",
2103 "\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " +
2104 builder.add_schema(name + "-args", parameters)
2105 )
2106 );
2107
2108 tool_rules_recipient_in_channel.push_back(
2109 builder.add_rule(name + "-call",
2110 "\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " +
2111 builder.add_schema(name + "-args", parameters)
2112 )
2113 );
2114 });
2115
2116 auto recipient_in_channel = builder.add_rule("recipient_in_channel",
2117 channel + " \" to=functions.\" ( " +
2118 string_join(tool_rules_recipient_in_channel, " | ") + " )"
2119 );
2120
2121 if (data.grammar_lazy) {
2122 auto recipient_in_role = builder.add_rule("recipient_in_role",
2123 "\"<|start|>assistant\"? \" to=functions.\" ( " +
2124 string_join(tool_rules_recipient_in_role, " | ") + " )"
2125 );
2126
2127 builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
2128 } else {
2129 auto not_end = builder.add_rule("not-end",
2130 "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
2131 auto analysis = builder.add_rule("analysis",
2132 "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
2133 auto commentary = builder.add_rule("commentary",
2134 "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
2135
2136 auto recipient_in_role = builder.add_rule("recipient_in_role",
2137 "\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
2138 );
2139
2140 builder.add_rule("root",
2141 "( " + analysis + " \"<|start|>assistant\" )? " +
2142 "( " + commentary + " \"<|start|>assistant\" )? " +
2143 "( " + recipient_in_role + " | " + recipient_in_channel + " )"
2144 );
2145 }
2146
2147 // Trigger on tool calls that appear in the commentary channel
2148 data.grammar_triggers.push_back({
2149 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
2150 "<\\|channel\\|>(?:commentary|analysis) to"
2151 });
2152
2153 // Trigger tool calls that appear in the role section, either at the
2154 // start or in the middle.
2155 data.grammar_triggers.push_back({
2156 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
2157 "^ to"
2158 });
2159
2160 data.grammar_triggers.push_back({
2161 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
2162 "<\\|start\\|>assistant to"
2163 });
2164 });
2165 }
2166
2167 return data;
2168}
2169
2170static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
2171 common_chat_params data;
2172 data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2173
2174 std::string prompt = apply(tmpl, inputs);
2175
2176 // match the existing trimming behavior
2177 if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) {
2178 prompt.erase(0, tmpl.bos_token().size());
2179 }
2180 if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) {
2181 prompt.erase(prompt.size() - tmpl.eos_token().size());
2182 }
2183 if (string_ends_with(prompt, "<think>")) {
2184 if (!inputs.enable_thinking) {
2185 prompt += "</think>";
2186 } else {
2187 data.thinking_forced_open = true;
2188 }
2189 }
2190
2191 // add GLM preserved tokens
2192 data.preserved_tokens = {
2193 "<|endoftext|>",
2194 "[MASK]",
2195 "[gMASK]",
2196 "[sMASK]",
2197 "<sop>",
2198 "<eop>",
2199 "<|system|>",
2200 "<|user|>",
2201 "<|assistant|>",
2202 "<|observation|>",
2203 "<|begin_of_image|>",
2204 "<|end_of_image|>",
2205 "<|begin_of_video|>",
2206 "<|end_of_video|>",
2207 "<|begin_of_audio|>",
2208 "<|end_of_audio|>",
2209 "<|begin_of_transcription|>",
2210 "<|end_of_transcription|>",
2211 "<|code_prefix|>",
2212 "<|code_middle|>",
2213 "<|code_suffix|>",
2214 "/nothink",
2215 "<think>",
2216 "</think>",
2217 "<tool_call>",
2218 "</tool_call>",
2219 "<arg_key>",
2220 "</arg_key>",
2221 "<arg_value>",
2222 "</arg_value>"
2223 };
2224
2225 // extra GLM 4.5 stop word
2226 data.additional_stops.insert(data.additional_stops.end(), {
2227 "<|user|>",
2228 "<|observation|>"
2229 });
2230
2231 // build grammar for tool call
2232 static const xml_tool_call_format form {
2233 /* form.scope_start = */ "",
2234 /* form.tool_start = */ "\n<tool_call>",
2235 /* form.tool_sep = */ "\n",
2236 /* form.key_start = */ "<arg_key>",
2237 /* form.key_val_sep = */ "</arg_key>\n<arg_value>",
2238 /* form.val_end = */ "</arg_value>\n",
2239 /* form.tool_end = */ "</tool_call>\n",
2240 /* form.scope_end = */ "",
2241 };
2242 build_grammar_xml_tool_call(data, inputs.tools, form);
2243
2244 data.prompt = prompt;
2245 data.format = COMMON_CHAT_FORMAT_GLM_4_5;
2246 return data;
2247}
2248
2249static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
2250 LOG_DBG("%s\n", __func__);
2251 common_chat_params data;
2252 const std::optional<json> additional_context = json {
2253 {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
2254 {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
2255 };
2256 data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override =*/ std::nullopt, additional_context);
2257 if (inputs.tools.is_array() && !inputs.tools.empty()) {
2258 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2259 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2260 auto schemas = json::array();
2261 foreach_function(inputs.tools, [&](const json & tool) {
2262 const auto & function = tool.at("function");
2263 schemas.push_back({
2264 {"type", "object"},
2265 {"properties", {
2266 {"name", {
2267 {"type", "string"},
2268 {"const", function.at("name")},
2269 }},
2270 {"arguments", function.at("parameters")},
2271 }},
2272 {"required", json::array({"name", "arguments", "id"})},
2273 });
2274 });
2275 auto schema = json {
2276 {"type", "array"},
2277 {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
2278 {"minItems", 1},
2279 };
2280 if (!inputs.parallel_tool_calls) {
2281 schema["maxItems"] = 1;
2282 }
2283 builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
2284 });
2285 data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["});
2286 data.preserved_tokens = {
2287 " functools[",
2288 };
2289 data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
2290 } else {
2291 data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
2292 }
2293 return data;
2294}
2295
2296static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
2297 // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
2298 // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
2299 // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
2300 common_chat_params data;
2301 data.prompt = apply(tmpl, inputs);
2302 data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
2303 if (inputs.tools.is_array() && !inputs.tools.empty()) {
2304 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2305 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2306 std::vector<std::string> first_tool_rules;
2307 std::vector<std::string> subsequent_tool_rules;
2308 foreach_function(inputs.tools, [&](const json & tool) {
2309 const auto & function = tool.at("function");
2310 std::string name = function.at("name");
2311 auto parameters = function.at("parameters");
2312 builder.resolve_refs(parameters);
2313 std::string args_pattern = "[\\s\\S]*";
2314 auto args_rule = builder.add_schema(name + "-args", parameters);
2315 if (name == "python") {
2316 args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
2317 } else {
2318 args_pattern = "\\{" + args_pattern;
2319 }
2320 auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
2321 first_tool_rules.push_back(call_rule);
2322 if (inputs.parallel_tool_calls) {
2323 subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
2324 }
2325 data.grammar_triggers.push_back({
2326 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
2327 "((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
2328 });
2329 });
2330 data.preserved_tokens = {
2331 "<|end_header_id|>",
2332 };
2333 auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space";
2334 if (inputs.parallel_tool_calls) {
2335 auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space";
2336 builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*");
2337 } else {
2338 builder.add_rule("root", first_rule);
2339 }
2340
2341 });
2342 }
2343 return data;
2344}
2345
2346static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
2347 // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
2348 common_chat_params data;
2349
2350 if (!inputs.tools.is_null()) {
2351 std::string python_code_argument_name;
2352 auto has_raw_python = false;
2353
2354 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2355 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2356 std::vector<std::string> tool_rules;
2357 foreach_function(inputs.tools, [&](const json & tool) {
2358 const auto & function = tool.at("function");
2359 const auto & parameters = function.at("parameters");
2360 std::string name = function.at("name");
2361 if (name == "python" || name == "ipython") {
2362 if (!parameters.contains("type")) {
2363 throw std::runtime_error("Missing type in python tool");
2364 }
2365 has_raw_python = true;
2366 const auto & type = parameters.at("type");
2367 if (type == "object") {
2368 auto properties = parameters.at("properties");
2369 for (auto it = properties.begin(); it != properties.end(); ++it) {
2370 if (it.value().at("type") == "string") {
2371 if (!python_code_argument_name.empty()) {
2372 throw std::runtime_error("Multiple string arguments found in python tool");
2373 }
2374 python_code_argument_name = it.key();
2375 }
2376 }
2377 if (python_code_argument_name.empty()) {
2378 throw std::runtime_error("No string argument found in python tool");
2379 }
2380 } else if (type != "string") {
2381 throw std::runtime_error("Invalid type in python tool: " + type.dump());
2382 }
2383 }
2384 tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
2385 });
2386 if (has_raw_python) {
2387 tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
2388 data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
2389 data.preserved_tokens.push_back("<|python_tag|>");
2390 }
2391 auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
2392 builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
2393 data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
2394 });
2395 data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
2396 } else {
2397 data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
2398 }
2399
2400 data.prompt = apply(tmpl, inputs);
2401 // TODO: if (has_raw_python)
2402 return data;
2403}
2404
2405static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
2406 common_chat_params data;
2407
2408 json extra_context = json {
2409 {"enable_thinking", inputs.enable_thinking},
2410 };
2411 extra_context.update(inputs.extra_context);
2412
2413 data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context);
2414 data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
2415 if (string_ends_with(data.prompt, "<think>\n")) {
2416 if (!extra_context["enable_thinking"]) {
2417 data.prompt += "</think>";
2418 } else {
2419 data.thinking_forced_open = true;
2420 }
2421 }
2422
2423 if (!inputs.tools.is_null()) {
2424 // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
2425 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2426 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2427 std::vector<std::string> tool_rules;
2428 std::vector<std::string> tool_call_alts;
2429 std::vector<std::string> escaped_names;
2430 foreach_function(inputs.tools, [&](const json & tool) {
2431 const auto & function = tool.at("function");
2432 std::string name = function.at("name");
2433 auto parameters = function.at("parameters");
2434 builder.resolve_refs(parameters);
2435 tool_rules.push_back(builder.add_schema(name + "-call", {
2436 {"type", "object"},
2437 {"properties", json {
2438 {"name", json {{"const", name}}},
2439 {"arguments", parameters},
2440 }},
2441 {"required", json::array({"name", "arguments"})},
2442 }));
2443 tool_call_alts.push_back(builder.add_rule(
2444 name + "-function-tag",
2445 "\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
2446 builder.add_schema(name + "-args", parameters) + " "
2447 "\"</function>\" space"));
2448
2449 data.grammar_triggers.push_back({
2450 COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
2451 "<function=" + name + ">",
2452 });
2453 auto escaped_name = regex_escape(name);
2454 data.grammar_triggers.push_back({
2455 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
2456 "<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
2457 });
2458 escaped_names.push_back(escaped_name);
2459 });
2460 auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
2461 std::vector<std::string> alt_tags {
2462 any_tool_call,
2463 "\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"",
2464 // The rest is just to accommodate common "good bad" outputs.
2465 "\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
2466 "\"<response>\" space " + any_tool_call + " \"</response>\"",
2467 "\"<tools>\" space " + any_tool_call + " \"</tools>\"",
2468 "\"<json>\" space " + any_tool_call + " \"</json>\"",
2469 "\"<xml>\" space " + any_tool_call + " \"</xml>\"",
2470 "\"<JSON>\" space " + any_tool_call + " \"</JSON>\"",
2471 };
2472 auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
2473 tool_call_alts.push_back(wrappable_tool_call);
2474 tool_call_alts.push_back(
2475 "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
2476 auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
2477 builder.add_rule("root",
2478 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
2479 (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
2480 // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
2481 data.grammar_triggers.push_back({
2482 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
2483 // If thinking_forced_open, then we capture the </think> tag in the grammar,
2484 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
2485 std::string(data.thinking_forced_open ? "(</think>\\s*)" : "") + (
2486 "\\s*("
2487 "(?:<tool_call>"
2488 "|<function"
2489 "|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
2490 "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\""
2491 ")"
2492 ")"
2493 ),
2494 });
2495 data.preserved_tokens = {
2496 "<think>",
2497 "</think>",
2498 "<tool_call>",
2499 "</tool_call>",
2500 "<function",
2501 "<tools>",
2502 "</tools>",
2503 "<response>",
2504 "</response>",
2505 "<function_call>",
2506 "</function_call>",
2507 "<json>",
2508 "</json>",
2509 "<JSON>",
2510 "</JSON>",
2511 "```",
2512 "```json",
2513 "```xml",
2514 };
2515 });
2516 }
2517
2518 return data;
2519}
2520
2521static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) {
2522 common_chat_params data;
2523
2524 // Pass thinking context for Granite template
2525 json additional_context = {
2526 {"thinking", inputs.enable_thinking},
2527 };
2528
2529 data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context);
2530 data.format = COMMON_CHAT_FORMAT_GRANITE;
2531
2532 if (string_ends_with(data.prompt, "<think>\n") || string_ends_with(data.prompt, "<think>")) {
2533 if (!inputs.enable_thinking) {
2534 data.prompt += "</think>";
2535 } else {
2536 data.thinking_forced_open = true;
2537 }
2538 }
2539
2540 if (!inputs.tools.is_null()) {
2541 // Granite uses <|tool_call|> followed by JSON list
2542 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2543 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2544 std::vector<std::string> tool_rules;
2545 foreach_function(inputs.tools, [&](const json & tool) {
2546 const auto & function = tool.at("function");
2547 std::string name = function.at("name");
2548 auto parameters = function.at("parameters");
2549 builder.resolve_refs(parameters);
2550 tool_rules.push_back(builder.add_rule(name + "-call", builder.add_schema(name +
2551"-args", {
2552 {"type", "object"},
2553 {"properties", {
2554 {"name", {{"const", name}}},
2555 {"arguments", parameters},
2556 }},
2557 {"required", json::array({"name", "arguments"})},
2558 })));
2559 });
2560
2561 auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
2562 auto tool_list = builder.add_rule("tool_list", "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\"");
2563
2564 if (data.thinking_forced_open) {
2565 builder.add_rule("root", "\"</think>\" space \"<response>\" space [^<]* \"</response>\" space \"<|tool_call|>\" space " + tool_list);
2566 } else {
2567 builder.add_rule("root", "\"<|tool_call|>\" space " + tool_list);
2568 }
2569
2570 data.grammar_triggers.push_back({
2571 COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
2572 "<|tool_call|>"
2573 });
2574
2575 data.preserved_tokens = {
2576 "<think>",
2577 "</think>",
2578 "<response>",
2579 "</response>",
2580 "<|tool_call|>",
2581 };
2582 });
2583 } else {
2584 // Handle thinking tags for non-tool responses
2585 if (data.thinking_forced_open && inputs.enable_thinking) {
2586 data.grammar_lazy = false;
2587 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2588 builder.add_rule("root", "\"</think>\" space \"<response>\" space .* \"</response>\" space");
2589 });
2590 data.preserved_tokens = {
2591 "<think>",
2592 "</think>",
2593 "<response>",
2594 "</response>",
2595 };
2596 }
2597 }
2598
2599 return data;
2600}
2601
2602static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
2603 common_chat_params data;
2604
2605 // Copy `reasoning_content` to `reasoning`
2606 auto adjusted_messages = json::array();
2607 for (const auto & msg : inputs.messages) {
2608 if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
2609 auto adjusted_message = msg;
2610 adjusted_message["reasoning"] = msg.at("reasoning_content");
2611 adjusted_message.erase("reasoning_content");
2612 adjusted_messages.push_back(adjusted_message);
2613 } else {
2614 adjusted_messages.push_back(msg);
2615 }
2616 }
2617
2618 auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
2619 auto include_grammar = true;
2620
2621 auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
2622
2623 // Check if we need to replace the flush token with end token during inference and without generation prompt.
2624 if (inputs.is_inference && !inputs.add_generation_prompt) {
2625 static constexpr std::string_view return_token = "<|flush|>";
2626 static constexpr std::string_view end_token = "<|end|>";
2627 if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
2628 prompt.replace(pos, return_token.length(), end_token);
2629 }
2630 }
2631
2632 data.prompt = prompt;
2633 data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
2634 data.preserved_tokens = {
2635 "<|think|>",
2636 "<|content|>",
2637 "<|begin|>",
2638 "<|end|>",
2639 "<|tool_calls|>",
2640 "<|tool_call:begin|>",
2641 "<|tool_call:end|>",
2642 "<|tool_call:name|>",
2643 "<|tool_call:args|>",
2644 };
2645
2646 auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
2647 auto lit_think = p.atomic(p.literal("<|think|>"));
2648 auto lit_assistant_begin = p.atomic(p.literal("<|begin|>assistant"));
2649 auto lit_content = p.atomic(p.literal("<|content|>"));
2650 auto lit_end = p.atomic(p.literal("<|end|>"));
2651 auto parser_until_end = p.until("<|end|>");
2652
2653 // reasoning <- "<|think|>" (!"<|end|>" .)*
2654 auto parser_reasoning = p.rule("reasoning", lit_think + p.reasoning(parser_until_end));
2655
2656 // content <- "<|content|>" (!"<|end|>" .)*
2657 auto parser_content = p.rule("content", lit_content + p.content(parser_until_end));
2658
2659 // wrap_choice(items) <- item-choice wrapped*
2660 // item-choice <- items[0] / ... / items[n]
2661 // wrapped <- "<|end|><|begin|>assistant" item-choice
2662 auto wrap_choice = [&](const std::vector<common_peg_parser> & items) {
2663 auto choice = p.choice(items);
2664 return choice + p.zero_or_more(lit_end + lit_assistant_begin + choice);
2665 };
2666
2667 // wrap_seq(items) <- item[0] "<|end|><|begin|>assistant" item[1] ...
2668 auto wrap_seq = [&](const std::vector<common_peg_parser> & items) {
2669 auto seq = p.sequence();
2670 for (auto i = 0u; i < items.size(); i++) {
2671 if (i == 0) {
2672 seq += items[i];
2673 continue;
2674 }
2675 seq += lit_end + lit_assistant_begin + items[i];
2676 }
2677 return seq;
2678 };
2679
2680 // Response format parser
2681 if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
2682 auto parser_response_format = lit_content + p.content(p.schema(p.json(), "response-format", inputs.json_schema));
2683 return p.choice({
2684 wrap_seq({parser_reasoning, parser_response_format}),
2685 wrap_seq({parser_response_format})
2686 });
2687 }
2688
2689 auto lit_tool_call_begin = p.literal("<|tool_call:begin|>");
2690 auto lit_tool_call_name = p.literal("<|tool_call:name|>");
2691 auto lit_tool_call_args = p.literal("<|tool_call:args|>");
2692 auto lit_tool_call_end = p.literal("<|tool_call:end|>");
2693
2694 // Tool call parser
2695 if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
2696 auto parser_tool_call = p.choice();
2697 foreach_function(inputs.tools, [&](const json & tool) {
2698 const auto & function = tool.at("function");
2699 std::string name = function.at("name");
2700 const auto & schema = function.at("parameters");
2701
2702 // tool(name, schema) <- name "<|tool_call:args|>" schema
2703 parser_tool_call |= p.rule("tool-" + name,
2704 p.atomic(p.tool_name(p.literal(name)) + lit_tool_call_args)
2705 + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
2706 });
2707
2708 auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
2709 auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
2710
2711 // tool-calls <- "<|tool_calls|>" tool-call+
2712 // tool-call <- "<|tool_call:begin|> call-id "<|tool_call:name|>" &([^<]+ "<|tool_call:args|>") tool-choice "<|tool_call:end|>"
2713 // call-id <- [a-zA-Z0-9_-]+
2714 // tool-choice <- tool(t[0].name, t[0].schema) / ... / tool(t[n].name, t[n].schema)
2715 auto parser_tool_calls = p.trigger_rule("tool-calls",
2716 p.atomic(p.literal("<|tool_calls|>"))
2717 + p.repeat(
2718 p.tool_open(
2719 lit_tool_call_begin
2720 + p.tool_id(p.chars("[a-zA-Z0-9_-]", 1, -1))
2721 + lit_tool_call_name
2722 + p.peek(p.chars("[^<]", 1, -1) + lit_tool_call_args))
2723 + parser_tool_call
2724 + p.tool_close(lit_tool_call_end),
2725 /* min = */ 1,
2726 /* max = */ max_calls));
2727
2728 if (min_calls == 1) {
2729 // If required, then try any combination of the reasoning, content, and tool call
2730 return p.choice({
2731 wrap_seq({parser_reasoning, parser_content, parser_tool_calls}),
2732 wrap_seq({parser_reasoning, parser_tool_calls}),
2733 wrap_seq({parser_content, parser_tool_calls}),
2734 wrap_seq({parser_tool_calls})
2735 });
2736 }
2737
2738 return wrap_choice({parser_reasoning, parser_content, parser_tool_calls});
2739 }
2740
2741 // Content only parser
2742 include_grammar = false;
2743 return wrap_choice({parser_reasoning, parser_content});
2744 });
2745
2746 data.parser = parser.save();
2747
2748 if (include_grammar) {
2749 data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
2750
2751 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2752 foreach_function(inputs.tools, [&](const json & tool) {
2753 const auto & function = tool.at("function");
2754 auto schema = function.at("parameters");
2755 builder.resolve_refs(schema);
2756 });
2757 parser.build_grammar(builder, data.grammar_lazy);
2758 });
2759
2760 data.grammar_triggers = {
2761 {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls|>"}
2762 };
2763 }
2764
2765 return data;
2766}
2767
2768static common_chat_params common_chat_params_init_exaone_moe(const common_chat_template & tmpl, const struct templates_params & inputs) {
2769 common_chat_params data;
2770
2771 data.prompt = apply(tmpl, inputs);
2772 data.format = COMMON_CHAT_FORMAT_EXAONE_MOE;
2773 if (string_ends_with(data.prompt, "<think>\n")) {
2774 if (!inputs.enable_thinking) {
2775 data.prompt += "</think>\n\n";
2776 } else {
2777 data.thinking_forced_open = true;
2778 }
2779 }
2780
2781 if (inputs.tools.is_array() && !inputs.tools.empty()) {
2782 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
2783 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2784 std::vector<std::string> tool_rules;
2785 foreach_function(inputs.tools, [&](const json & tool) {
2786 const auto & function = tool.at("function");
2787 std::string name = function.at("name");
2788 auto parameters = function.at("parameters");
2789 builder.resolve_refs(parameters);
2790 // Expect: <tool_call>{"name": "<name>", "arguments": {...}}</tool_call>
2791 tool_rules.push_back(builder.add_rule(
2792 name + "-call",
2793 "\"<tool_call>\" space " +
2794 builder.add_schema(name + "-obj", json{
2795 {"type", "object"},
2796 {"properties", {
2797 {"name", json{{"const", name}}},
2798 {"arguments", parameters},
2799 }},
2800 {"required", json::array({"name", "arguments"})},
2801 }) +
2802 " space \"</tool_call>\" space"));
2803 });
2804
2805 auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
2806 builder.add_rule("root",
2807 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
2808 (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
2809
2810 data.grammar_triggers.push_back({
2811 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
2812 std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)?" : "") +
2813 "(<tool_call>)[\\s\\S]*"
2814 });
2815 data.preserved_tokens = {
2816 "<think>",
2817 "</think>",
2818 "<tool_call>",
2819 "</tool_call>",
2820 };
2821 });
2822 }
2823
2824 return data;
2825}
2826
2827static common_chat_params common_chat_params_init_translate_gemma(const common_chat_template & tmpl, const struct templates_params & inputs) {
2828 common_chat_params data;
2829
2830 // This template does not support tools or reasoning
2831 // we just need to transform the messages into the correct schema
2832
2833 templates_params inputs_new = inputs;
2834 json & messages = inputs_new.messages;
2835
2836 // default to chat_template_kwargs, or en-GB if not specified
2837 std::string default_src_lang = inputs.extra_context.value("source_lang_code", "en-GB");
2838 std::string default_tgt_lang = inputs.extra_context.value("target_lang_code", "en-GB");
2839
2840 GGML_ASSERT(messages.is_array());
2841 for (auto & message : messages) {
2842 if (message.contains("role") && message["role"].get<std::string>() != "user") {
2843 continue;
2844 }
2845 if (!message.contains("content")) {
2846 message["content"] = json::array();
2847 }
2848 if (message.contains("content") && !message["content"].is_array()) {
2849 auto content_str = message["content"].get<std::string>();
2850 // default to en-GB if not specified (to make common_chat_format_example works)
2851 auto src_lang = message.contains("source_lang_code")
2852 ? message["source_lang_code"].get<std::string>() : default_src_lang;
2853 auto tgt_lang = message.contains("target_lang_code")
2854 ? message["target_lang_code"].get<std::string>() : default_tgt_lang;
2855 message["content"] = json::array({
2856 json{
2857 {"type", "text"},
2858 {"text", content_str},
2859 {"source_lang_code", src_lang},
2860 {"target_lang_code", tgt_lang},
2861 }
2862 });
2863 }
2864 }
2865
2866 data.prompt = apply(tmpl, inputs_new, std::nullopt, std::nullopt);
2867 data.format = COMMON_CHAT_FORMAT_GENERIC;
2868
2869 return data;
2870}
2871
2872static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
2873 common_chat_params data;
2874 data.prompt = apply(tmpl, inputs);
2875 data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
2876 data.grammar_lazy = false;
2877 if (!inputs.json_schema.is_null()) {
2878 if (!inputs.grammar.empty()) {
2879 throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
2880 }
2881 data.grammar = json_schema_to_grammar(inputs.json_schema);
2882 } else {
2883 data.grammar = inputs.grammar;
2884 }
2885 return data;
2886}
2887
2888static common_chat_params common_chat_params_init_seed_oss(
2889 const common_chat_template & tmpl,
2890 templates_params & params,
2891 const common_chat_templates_inputs & inputs)
2892{
2893 common_chat_params data;
2894 data.prompt = apply(tmpl, params);
2895 data.format = COMMON_CHAT_FORMAT_SEED_OSS;
2896 if (string_ends_with(data.prompt, "<seed:think>")) {
2897 if (!inputs.enable_thinking) {
2898 data.prompt += "</seed:think>";
2899 } else {
2900 data.thinking_forced_open = true;
2901 }
2902 }
2903
2904 if (params.tools.is_array() && !params.tools.empty()) {
2905 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2906 data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2907 std::vector<std::string> tool_rules;
2908 foreach_function(params.tools, [&](const json & tool) {
2909 const auto & function = tool.at("function");
2910 std::string name = function.at("name");
2911 auto parameters = function.at("parameters");
2912 builder.resolve_refs(parameters);
2913
2914 // Create rule for Seed-OSS function call format
2915 std::string param_rules;
2916 if (parameters.contains("properties")) {
2917 for (const auto & [key, value] : parameters.at("properties").items()) {
2918 param_rules += "\"<parameter=" + key + ">\"" + builder.add_schema(name + "-arg-" + key, value) +
2919 "\"</parameter>\"";
2920 }
2921 }
2922
2923 tool_rules.push_back(builder.add_rule(name + "-call",
2924 "\"<seed:tool_call>\" space \"<function=" + name + ">\" space " +
2925 param_rules +
2926 " \"</function>\" space \"</seed:tool_call>\""));
2927 });
2928
2929 data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<seed:tool_call>" });
2930
2931 data.preserved_tokens = {
2932 "<seed:think>", "</seed:think>", "<seed:tool_call>", "</seed:tool_call>",
2933 "<function=", "</function>", "<parameter=", "</parameter>",
2934 };
2935
2936 builder.add_rule("root", string_join(tool_rules, " | "));
2937 });
2938 }
2939 return data;
2940}
2941
2942// various workarounds for known issues with certain templates or model behaviors
2943// TODO @ngxson : improve this (how?)
2944namespace workaround {
2945
2946// if first message is system and template does not support it, merge it with next message
2947static void system_message_not_supported(json & messages) {
2948 if (!messages.empty() && messages.front().at("role") == "system") {
2949 if (messages.size() > 1) {
2950 LOG_DBG("Merging system prompt into next message\n");
2951 auto & first_msg = messages.front();
2952 auto & second_msg = messages[1];
2953 second_msg["content"] = first_msg.at("content").get<std::string>()
2954 + "\n" + second_msg.at("content").get<std::string>();
2955 messages.erase(messages.begin());
2956 } else {
2957 LOG_WRN("Removing system prompt due to template not supporting system role\n");
2958 messages.erase(messages.begin());
2959 }
2960 }
2961}
2962
2963static void func_args_not_string(json & messages) {
2964 GGML_ASSERT(messages.is_array());
2965 for (auto & message : messages) {
2966 if (message.contains("tool_calls")) {
2967 for (auto & tool_call : message["tool_calls"]) {
2968 if (tool_call.contains("function") && tool_call["function"].contains("arguments")) {
2969 auto & args = tool_call["function"]["arguments"];
2970 if (args.is_string()) {
2971 try {
2972 args = json::parse(args.get<std::string>());
2973 } catch (const std::exception & e) {
2974 throw std::runtime_error("Failed to parse tool call arguments as JSON: " + std::string(e.what()));
2975 }
2976 }
2977 }
2978 }
2979 }
2980 }
2981}
2982
2983static void move_tool_calls_to_content(json & messages, int indent_spaces = 2) {
2984 GGML_ASSERT(messages.is_array());
2985 for (auto & message : messages) {
2986 if (message.contains("tool_calls")) {
2987 auto tool_calls_new = json{
2988 {"tool_calls", message.at("tool_calls")}
2989 };
2990 message.erase("tool_calls");
2991 auto content = message.at("content");
2992 std::string content_new = content.is_null() ? "" : content.get<std::string>();
2993 message["content"] = content_new + tool_calls_new.dump(indent_spaces, ' ', false, json::error_handler_t::replace);
2994 }
2995 }
2996}
2997
2998// TODO @ngxson : we may remove support for generic schema in the future
2999static void use_generic_schema(json & messages) {
3000 GGML_ASSERT(messages.is_array());
3001 for (auto & message : messages) {
3002 if (message.contains("tool_calls") && message.at("tool_calls").is_array()) {
3003 auto & tool_calls = message.at("tool_calls");
3004 for (auto & tool_call : tool_calls) {
3005 if (tool_call.contains("type") && tool_call.at("type") == "function" &&
3006 tool_call.contains("function") && tool_call.at("function").is_object()) {
3007 // Copy values before erasing to avoid use-after-free
3008 json name_value;
3009 json arguments_value;
3010 json id_value;
3011 const auto & function = tool_call.at("function");
3012 if (function.contains("name")) {
3013 name_value = function.at("name");
3014 }
3015 if (function.contains("arguments")) {
3016 arguments_value = function.at("arguments");
3017 }
3018 if (tool_call.contains("id")) {
3019 id_value = tool_call.at("id");
3020 }
3021 // Now safely erase and assign in the correct order
3022 tool_call.erase("type");
3023 tool_call.erase("function");
3024 tool_call.erase("id");
3025 // Reassign in desired order: name, arguments, id
3026 if (!name_value.is_null()) {
3027 tool_call["name"] = name_value;
3028 }
3029 if (!arguments_value.is_null()) {
3030 tool_call["arguments"] = arguments_value;
3031 }
3032 if (!id_value.is_null()) {
3033 tool_call["id"] = id_value;
3034 }
3035 }
3036 }
3037 }
3038 }
3039}
3040
3041} // namespace workaround
3042
3043static common_chat_params common_chat_templates_apply_jinja(
3044 const struct common_chat_templates * tmpls,
3045 const struct common_chat_templates_inputs & inputs)
3046{
3047 templates_params params;
3048 params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
3049 const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
3050 ? *tmpls->template_tool_use
3051 : *tmpls->template_default;
3052 const auto & src = tmpl.source();
3053 const auto & caps = tmpl.original_caps();
3054 params.messages = render_message_to_json(inputs.messages, tmpl.original_caps());
3055 params.add_generation_prompt = inputs.add_generation_prompt;
3056 params.tool_choice = inputs.tool_choice;
3057 params.reasoning_format = inputs.reasoning_format;
3058 params.enable_thinking = inputs.enable_thinking;
3059 params.grammar = inputs.grammar;
3060 params.now = inputs.now;
3061 params.add_bos = tmpls->add_bos;
3062 params.add_eos = tmpls->add_eos;
3063
3064 if (!tmpl.original_caps().supports_system_role) {
3065 workaround::system_message_not_supported(params.messages);
3066 }
3067
3068 params.extra_context = json::object();
3069 for (auto el : inputs.chat_template_kwargs) {
3070 params.extra_context[el.first] = json::parse(el.second);
3071 }
3072
3073 if (!inputs.json_schema.empty()) {
3074 params.json_schema = json::parse(inputs.json_schema);
3075 }
3076
3077 if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
3078 LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
3079 params.parallel_tool_calls = false;
3080 } else {
3081 params.parallel_tool_calls = inputs.parallel_tool_calls;
3082 }
3083
3084 if (params.tools.is_array()) {
3085 if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) {
3086 throw std::runtime_error("Cannot specify grammar with tools");
3087 }
3088 if (caps.supports_tool_calls && !caps.supports_tools) {
3089 LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
3090 }
3091 }
3092
3093 // DeepSeek V3.1: detect based on specific patterns in the template
3094 if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
3095 params.json_schema.is_null()) {
3096 return common_chat_params_init_deepseek_v3_1(tmpl, params);
3097 }
3098
3099 // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
3100 if (src.find("<๏ฝtoolโcallsโbegin๏ฝ>") != std::string::npos && params.json_schema.is_null()) {
3101 return common_chat_params_init_deepseek_r1(tmpl, params);
3102 }
3103
3104 // Command R7B: : use handler in all cases except json schema (thinking / tools).
3105 if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) {
3106 workaround::func_args_not_string(params.messages);
3107 return common_chat_params_init_command_r7b(tmpl, params);
3108 }
3109
3110 // Granite (IBM) - detects thinking / tools support
3111 if (src.find("elif thinking") != std::string::npos && src.find("<|tool_call|>") != std::string::npos) {
3112 workaround::func_args_not_string(params.messages);
3113 workaround::use_generic_schema(params.messages);
3114 workaround::move_tool_calls_to_content(params.messages);
3115 return common_chat_params_init_granite(tmpl, params);
3116 }
3117
3118 // GLM 4.5: detect by <arg_key> and <arg_value> tags (check before Hermes since both use <tool_call>)
3119 if (src.find("[gMASK]<sop>") != std::string::npos &&
3120 src.find("<arg_key>") != std::string::npos &&
3121 src.find("<arg_value>") != std::string::npos &&
3122 params.json_schema.is_null()) {
3123 workaround::func_args_not_string(params.messages);
3124 if (!params.extra_context.contains("clear_thinking")) {
3125 // by default, do not clear reasoning_content (added since GLM-4.7)
3126 params.extra_context["clear_thinking"] = false;
3127 }
3128 return common_chat_params_init_glm_4_5(tmpl, params);
3129 }
3130
3131 // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
3132 // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
3133 // Require presence of <tool_call>, <function=...>, and <parameter=...> blocks.
3134 if (src.find("<tool_call>") != std::string::npos &&
3135 src.find("<function>") != std::string::npos &&
3136 src.find("<function=") != std::string::npos &&
3137 src.find("<parameters>") != std::string::npos &&
3138 src.find("<parameter=") != std::string::npos) {
3139 workaround::func_args_not_string(params.messages);
3140 // Nemotron 3 Nano 30B A3B
3141 if (src.find("<think>") != std::string::npos) {
3142 return common_chat_params_init_nemotron_v3(tmpl, params);
3143 }
3144 return common_chat_params_init_qwen3_coder_xml(tmpl, params);
3145 }
3146
3147 // Xiaomi MiMo format detection (must come before Hermes 2 Pro)
3148 if (src.find("<tools>") != std::string::npos &&
3149 src.find("# Tools") != std::string::npos &&
3150 src.find("</tools>") != std::string::npos &&
3151 src.find("<tool_calls>") != std::string::npos &&
3152 src.find("</tool_calls>") != std::string::npos &&
3153 src.find("<tool_response>") != std::string::npos) {
3154 return common_chat_params_init_xiaomi_mimo(tmpl, params);
3155 }
3156
3157 // EXAONE MoE format detection
3158 if (src.find("<tool_call>") != std::string::npos &&
3159 src.find("<tool_result>") != std::string::npos &&
3160 src.find("<|tool_declare|>") != std::string::npos) {
3161 return common_chat_params_init_exaone_moe(tmpl, params);
3162 }
3163
3164 // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
3165 if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
3166 return common_chat_params_init_hermes_2_pro(tmpl, params);
3167 }
3168
3169 // GPT-OSS
3170 if (src.find("<|channel|>") != std::string::npos) {
3171 return common_chat_params_init_gpt_oss(tmpl, params);
3172 }
3173
3174 // Seed-OSS
3175 if (src.find("<seed:think>") != std::string::npos) {
3176 workaround::func_args_not_string(params.messages);
3177 return common_chat_params_init_seed_oss(tmpl, params, inputs);
3178 }
3179
3180 // Nemotron v2
3181 if (src.find("<SPECIAL_10>") != std::string::npos) {
3182 return common_chat_params_init_nemotron_v2(tmpl, params);
3183 }
3184
3185 // Apertus format detection
3186 if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) {
3187 return common_chat_params_init_apertus(tmpl, params);
3188 }
3189
3190 // LFM2 (w/ tools)
3191 if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
3192 src.find("]<|tool_list_end|>") != std::string::npos) {
3193 return common_chat_params_init_lfm2(tmpl, params);
3194 }
3195
3196 // MiniMax-M2 format detection
3197 if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
3198 workaround::func_args_not_string(params.messages);
3199 return common_chat_params_init_minimax_m2(tmpl, params);
3200 }
3201
3202 // Kimi K2 format detection
3203 if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos &&
3204 src.find("<|tool_calls_section_begin|>") != std::string::npos &&
3205 src.find("## Return of") != std::string::npos) {
3206 return common_chat_params_init_kimi_k2(tmpl, params);
3207 }
3208
3209 // Apriel 1.5 format detection
3210 if (src.find("<thinking>") != std::string::npos &&
3211 src.find("</thinking>") != std::string::npos &&
3212 src.find("<available_tools>") != std::string::npos &&
3213 src.find("<|assistant|>") != std::string::npos &&
3214 src.find("<|tool_result|>") != std::string::npos &&
3215 src.find("<tool_calls>[") != std::string::npos &&
3216 src.find("]</tool_calls>") != std::string::npos) {
3217 return common_chat_params_init_apriel_1_5(tmpl, params);
3218 }
3219
3220 // Solar Open
3221 if (src.find("<|tool_response:begin|>") != std::string::npos &&
3222 src.find("<|tool_response:name|>") != std::string::npos &&
3223 src.find("<|tool_response:result|>") != std::string::npos) {
3224 return common_chat_params_init_solar_open(tmpl, params);
3225 }
3226
3227 // Use generic handler when mixing tools + JSON schema.
3228 // TODO: support that mix in handlers below.
3229 if ((params.tools.is_array() && params.json_schema.is_object())) {
3230 return common_chat_params_init_generic(tmpl, params);
3231 }
3232
3233 // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases.
3234 if (src.find(">>>all") != std::string::npos) {
3235 return common_chat_params_init_functionary_v3_2(tmpl, params);
3236 }
3237
3238 // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases.
3239 if (src.find(" functools[") != std::string::npos) {
3240 return common_chat_params_init_firefunction_v2(tmpl, params);
3241 }
3242
3243 // Functionary v3.1 (w/ tools)
3244 if (src.find("<|start_header_id|>") != std::string::npos
3245 && src.find("<function=") != std::string::npos) {
3246 return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
3247 }
3248
3249 // Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
3250 if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
3251 auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
3252 workaround::func_args_not_string(params.messages);
3253 return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
3254 }
3255
3256 // Ministral/Mistral Large 3
3257 if (src.find("[SYSTEM_PROMPT]") != std::string::npos &&
3258 src.find("[TOOL_CALLS]") != std::string::npos &&
3259 src.find("[ARGS]") != std::string::npos) {
3260 return common_chat_params_init_ministral_3(tmpl, params);
3261 }
3262
3263 if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
3264 return common_chat_params_init_magistral(tmpl, params);
3265 }
3266
3267 // Solar Open
3268 if (src.find("<|tool_response:begin|>") != std::string::npos &&
3269 src.find("<|tool_response:name|>") != std::string::npos &&
3270 src.find("<|tool_response:result|>") != std::string::npos) {
3271 return common_chat_params_init_solar_open(tmpl, params);
3272 }
3273
3274 // TranslateGemma
3275 if (src.find("[source_lang_code]") != std::string::npos &&
3276 src.find("[target_lang_code]") != std::string::npos) {
3277 return common_chat_params_init_translate_gemma(tmpl, params);
3278 }
3279
3280 // Plain handler (no tools)
3281 if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
3282 return common_chat_params_init_without_tools(tmpl, params);
3283 }
3284
3285 // Mistral Nemo (w/ tools)
3286 if (src.find("[TOOL_CALLS]") != std::string::npos) {
3287 workaround::func_args_not_string(params.messages);
3288 return common_chat_params_init_mistral_nemo(tmpl, params);
3289 }
3290
3291 // Generic fallback
3292 workaround::func_args_not_string(params.messages);
3293 workaround::use_generic_schema(params.messages);
3294 workaround::move_tool_calls_to_content(params.messages);
3295 return common_chat_params_init_generic(tmpl, params);
3296}
3297
3298// Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template.
3299static common_chat_params common_chat_templates_apply_legacy(
3300 const struct common_chat_templates * tmpls,
3301 const struct common_chat_templates_inputs & inputs)
3302{
3303 size_t alloc_size = 0;
3304 std::vector<llama_chat_message> chat;
3305 std::vector<std::string> contents;
3306
3307 for (const auto & msg : inputs.messages) {
3308 auto content = msg.content;
3309 for (const auto & part : msg.content_parts) {
3310 if (part.type != "text") {
3311 LOG_WRN("Ignoring non-text content part: %s\n", part.type.c_str());
3312 continue;
3313 }
3314 if (!content.empty()) {
3315 content += "\n";;
3316 }
3317 content += part.text;
3318 }
3319 contents.emplace_back(std::move(content));
3320 }
3321 for (size_t i = 0; i < contents.size(); ++i) {
3322 const auto & msg = inputs.messages[i];
3323 const auto & content = contents[i];
3324 chat.push_back({msg.role.c_str(), content.c_str()});
3325 size_t msg_size = msg.role.size() + content.size();
3326 alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
3327 }
3328
3329 std::vector<char> buf(alloc_size);
3330
3331 // run the first time to get the total output length
3332 const auto & src = tmpls->template_default->source();
3333 int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
3334
3335 // error: chat template is not supported
3336 if (res < 0) {
3337 // if the custom "tmpl" is not supported, we throw an error
3338 // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template()
3339 throw std::runtime_error("this custom template is not supported, try using --jinja");
3340 }
3341
3342 // if it turns out that our buffer is too small, we resize it
3343 if ((size_t) res > buf.size()) {
3344 buf.resize(res);
3345 res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
3346 }
3347
3348 // for safety, we check the result again
3349 if (res < 0 || (size_t) res > buf.size()) {
3350 throw std::runtime_error("failed to apply chat template, try using --jinja");
3351 }
3352
3353 common_chat_params params;
3354 params.prompt = std::string(buf.data(), res);
3355 if (!inputs.json_schema.empty()) {
3356 params.grammar = json_schema_to_grammar(json::parse(inputs.json_schema));
3357 } else {
3358 params.grammar = inputs.grammar;
3359 }
3360 return params;
3361}
3362
3363common_chat_params common_chat_templates_apply(
3364 const struct common_chat_templates * tmpls,
3365 const struct common_chat_templates_inputs & inputs)
3366{
3367 GGML_ASSERT(tmpls != nullptr);
3368 return inputs.use_jinja
3369 ? common_chat_templates_apply_jinja(tmpls, inputs)
3370 : common_chat_templates_apply_legacy(tmpls, inputs);
3371}
3372
3373std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates) {
3374 GGML_ASSERT(chat_templates != nullptr);
3375 GGML_ASSERT(chat_templates->template_default != nullptr);
3376 return chat_templates->template_default->caps.to_map();
3377}