1// Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
2//
3// Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
4// e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
5//
6// cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
7//
8#include "chat.h"
9
10#include "log.h"
11
12#include "../src/unicode.h"
13#include "../src/llama-grammar.h"
14
15#include <nlohmann/json.hpp>
16
17#include <fstream>
18#include <iostream>
19#include <functional>
20#include <string>
21
22using json = nlohmann::ordered_json;
23
24static std::ostream & operator<<(std::ostream & os, const common_chat_msg_diff & diff) {
25 os << "{ content_delta: " << diff.content_delta << "; ";
26 os << "reasoning_content_delta: " << diff.reasoning_content_delta << "; ";
27 if (diff.tool_call_index != std::string::npos) {
28 os << "tool_call_index: " << diff.tool_call_index << "; ";
29 os << "tool_call_delta.name: " << diff.tool_call_delta.name << "; ";
30 os << "tool_call_delta.id: " << diff.tool_call_delta.id << "; ";
31 os << "tool_call_delta.arguments: " << diff.tool_call_delta.arguments << "; ";
32 }
33 os << "}";
34 return os;
35}
36// operator<< for vector<common_chat_msg_diff>:
37static std::ostream & operator<<(std::ostream & os, const std::vector<common_chat_msg_diff> & diffs) {
38 os << "[\n";
39 for (const auto & diff : diffs) {
40 os << " " << diff << ",\n";
41 }
42 os << "]";
43 return os;
44}
45static std::ostream & operator<<(std::ostream & os, const common_chat_msg & msg) {
46 os << "{ role: " << msg.role << "; ";
47 os << "content: " << msg.content << "; ";
48 os << "content_parts: [\n";
49 for (const auto & part : msg.content_parts) {
50 os << " { type: " << part.type << "; text: " << part.text << " },\n";
51 }
52 os << "]; ";
53 os << "reasoning_content: " << msg.reasoning_content << "; ";
54 os << "tool_calls: [\n";
55 for (const auto & tool_call : msg.tool_calls) {
56 os << " { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id << " },\n";
57 }
58 os << "]";
59 os << "}";
60 return os;
61}
62
63template <class T> static bool equals(const T & expected, const T & actual) {
64 return expected == actual;
65}
66
67static common_chat_msg normalize(const common_chat_msg & msg) {
68 common_chat_msg normalized = msg;
69 for (auto & tool_call : normalized.tool_calls) {
70 try {
71 tool_call.arguments = json::parse(tool_call.arguments).dump();
72 } catch (const std::exception &) {
73 // Do nothing
74 }
75 }
76 return normalized;
77}
78
79
80template <>
81bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
82 return normalize(expected) == normalize(actual);
83}
84
85template <class T> static void assert_equals(const T & expected, const T & actual) {
86 if (!equals(expected, actual)) {
87 std::cerr << "Expected:```\n" << expected << "\n```" << std::endl;
88 std::cerr << "Actual:```\n" << actual << "\n```" << std::endl;
89 std::cerr << std::flush;
90 throw std::runtime_error("Test failed");
91 }
92}
93
94static std::string read_file(const std::string & path) {
95 std::cerr << "# Reading: " << path << '\n' << std::flush;
96 std::ifstream fs(path, std::ios_base::binary);
97 if (!fs.is_open()) {
98 fs = std::ifstream("../" + path, std::ios_base::binary);
99 if (!fs.is_open()) {
100 throw std::runtime_error("Failed to open file: " + path);
101 }
102 }
103 fs.seekg(0, std::ios_base::end);
104 auto size = fs.tellg();
105 fs.seekg(0);
106 std::string out;
107 out.resize(static_cast<size_t>(size));
108 fs.read(out.data(), static_cast<std::streamsize>(size));
109 return out;
110}
111
112static common_chat_templates_ptr read_templates(const std::string & path) {
113 return common_chat_templates_ptr(common_chat_templates_init(/* model= */ nullptr, read_file(path)));
114}
115
116static std::unique_ptr<llama_grammar> build_grammar(const std::string & grammar_str) {
117 return std::unique_ptr<llama_grammar>(
118 llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0));
119}
120
121// TODO: extract to common helper (copied from test-grammar-integration.cpp)
122static bool match_string(const std::string & input, llama_grammar * grammar) {
123 const auto cpts = unicode_cpts_from_utf8(input);
124
125 auto & stacks_cur = llama_grammar_get_stacks(grammar);
126
127 for (const auto & cpt : cpts) {
128 llama_grammar_accept(grammar, cpt);
129
130 if (stacks_cur.empty()) {
131 // no stacks means that the grammar failed to match at this point
132 return false;
133 }
134 }
135
136 if (std::any_of(stacks_cur.begin(), stacks_cur.end(), [](const auto & stack) { return stack.empty(); })) {
137 // An empty stack means that the grammar has been completed
138 return true;
139 }
140
141 return false;
142}
143
144static std::string renormalize_json(const std::string & json_str) {
145 try {
146 auto json_obj = json::parse(json_str);
147 return json_obj.dump();
148 } catch (const std::exception & e) {
149 std::cerr << "Failed to parse JSON: " << e.what() << '\n';
150 return json_str;
151 }
152}
153static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual, bool ignore_whitespace_differences = false) {
154 assert_equals(expected.role, actual.role);
155 if (ignore_whitespace_differences) {
156 assert_equals(string_strip(expected.content), string_strip(actual.content));
157 } else {
158 assert_equals(expected.content, actual.content);
159 }
160 assert_equals(expected.content_parts.size(), actual.content_parts.size());
161 for (size_t i = 0; i < expected.content_parts.size(); i++) {
162 const auto & expected_part = expected.content_parts[i];
163 const auto & actual_part = actual.content_parts[i];
164 assert_equals(expected_part.type, actual_part.type);
165 if (ignore_whitespace_differences) {
166 assert_equals(string_strip(expected_part.text), string_strip(actual_part.text));
167 } else {
168 assert_equals(expected_part.text, actual_part.text);
169 }
170 }
171 if (ignore_whitespace_differences) {
172 assert_equals(string_strip(expected.reasoning_content), string_strip(actual.reasoning_content));
173 } else {
174 assert_equals(expected.reasoning_content, actual.reasoning_content);
175 }
176 assert_equals(expected.tool_calls.size(), actual.tool_calls.size());
177 for (size_t i = 0; i < expected.tool_calls.size(); i++) {
178 const auto & expected_tool_call = expected.tool_calls[i];
179 const auto & actual_tool_call = actual.tool_calls[i];
180 assert_equals(expected_tool_call.name, actual_tool_call.name);
181 assert_equals(renormalize_json(expected_tool_call.arguments), renormalize_json(actual_tool_call.arguments));
182 assert_equals(expected_tool_call.id, actual_tool_call.id);
183 }
184}
185
186common_chat_tool special_function_tool {
187 /* .name = */ "special_function",
188 /* .description = */ "I'm special",
189 /* .parameters = */ R"({
190 "type": "object",
191 "properties": {
192 "arg1": {
193 "type": "integer",
194 "description": "The arg."
195 }
196 },
197 "required": ["arg1"]
198 })",
199};
200common_chat_tool special_function_tool_with_optional_param {
201 /* .name = */ "special_function_with_opt",
202 /* .description = */ "I'm special but have optional stuff",
203 /* .parameters = */ R"({
204 "type": "object",
205 "properties": {
206 "arg1": {
207 "type": "integer",
208 "description": "The arg."
209 },
210 "arg2": {
211 "type": "integer",
212 "description": "The optional arg."
213 }
214 },
215 "required": ["arg1"]
216 })",
217};
218common_chat_tool python_tool {
219 /* .name = */ "python",
220 /* .description = */ "an ipython interpreter",
221 /* .parameters = */ R"({
222 "type": "object",
223 "properties": {
224 "code": {
225 "type": "string",
226 "description": "Python code to execute."
227 }
228 },
229 "required": ["code"]
230 })",
231};
232common_chat_tool code_interpreter_tool {
233 /* .name = */ "code_interpreter",
234 /* .description = */ "an ipython interpreter",
235 /* .parameters = */ R"({
236 "type": "object",
237 "properties": {
238 "code": {
239 "type": "string",
240 "description": "Python code to execute."
241 }
242 },
243 "required": ["code"]
244 })",
245};
246std::vector<common_chat_tool> tools { special_function_tool, special_function_tool_with_optional_param, python_tool };
247std::vector<common_chat_tool> llama_3_1_tools { special_function_tool, code_interpreter_tool };
248
249struct delta_data {
250 std::string delta;
251 common_chat_params params;
252};
253
254static common_chat_msg simple_assist_msg(const std::string & content, const std::string & reasoning_content = "", const std::string & tool_name = "", const std::string & arguments = "", const std::string & id = "") {
255 common_chat_msg msg;
256 msg.role = "assistant";
257 msg.content = content;
258 msg.reasoning_content = reasoning_content;
259 if (!tool_name.empty()) {
260 msg.tool_calls.push_back({ tool_name, arguments, id });
261 }
262 return msg;
263}
264
265static delta_data init_delta(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
266 const common_chat_msg & user_message,
267 const common_chat_msg & delta_message,
268 const std::vector<common_chat_tool> & tools,
269 const common_chat_tool_choice & tool_choice) {
270 common_chat_templates_inputs inputs;
271 inputs.parallel_tool_calls = true;
272 inputs.messages.push_back(user_message);
273 inputs.tools = tools;
274 inputs.tool_choice = tool_choice;
275 auto params_prefix = common_chat_templates_apply(tmpls, inputs);
276
277 inputs.messages.push_back(delta_message);
278 inputs.add_generation_prompt = false;
279 auto params_full = common_chat_templates_apply(tmpls, inputs);
280
281 std::string prefix = params_prefix.prompt;
282 std::string full = params_full.prompt;
283
284 if (full == prefix) {
285 throw std::runtime_error("Full message is the same as the prefix");
286 }
287
288 size_t common_prefix_length = 0;
289 for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
290 if (prefix[i] != full[i]) {
291 break;
292 }
293 if (prefix[i] == '<') {
294 // DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
295 // but it removes thinking tags for past messages.
296 // The prefix and full strings diverge at <think> vs. <๏ฝtoolโcallsโbegin๏ฝ>, we avoid consuming the leading <.
297 continue;
298 }
299 common_prefix_length = i + 1;
300 }
301 auto delta = full.substr(common_prefix_length);
302
303 // Strip end tokens
304 for (const auto & end_token : end_tokens) {
305 // rfind to find the last occurrence
306 auto pos = delta.rfind(end_token);
307 if (pos != std::string::npos) {
308 delta = delta.substr(0, pos);
309 break;
310 }
311 }
312 return { delta, params_full };
313}
314
315/*
316 Applies the template to 1 user message w/ add_generation_prompt=true, then w/ the test message w/ add_generation_prompt=false,
317 gets the diff, removes any end tokens and parses the result w/ the grammar, checking that
318 the parsed message is the same as the test_message
319*/
320static void test_templates(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
321 const common_chat_msg & test_message,
322 const std::vector<common_chat_tool> & tools = {},
323 const std::string & expected_delta = "",
324 bool expect_grammar_triggered = true,
325 bool test_grammar_if_triggered = true,
326 common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE,
327 bool ignore_whitespace_differences = false
328 ) {
329 common_chat_msg user_message;
330 user_message.role = "user";
331 user_message.content = "Hello, world!";
332
333 for (const auto & tool_choice : std::vector<common_chat_tool_choice> {COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED}) {
334 auto data = init_delta(tmpls, end_tokens, user_message, test_message, tools, tool_choice);
335 if (!expected_delta.empty()) {
336 if (ignore_whitespace_differences) {
337 assert_equals(string_strip(expected_delta), string_strip(data.delta));
338 } else {
339 assert_equals(expected_delta, data.delta);
340 }
341 }
342
343 if (expect_grammar_triggered) {
344 // TODO @ngxson : refactor common_chat_parse to avoid passing format/reasoning_format every time
345 common_chat_parser_params params;
346 params.format = data.params.format;
347 params.reasoning_format = reasoning_format;
348 const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, params);
349 assert_msg_equals(test_message, msg, ignore_whitespace_differences);
350 }
351
352 if (!test_message.tool_calls.empty()) {
353 GGML_ASSERT(!data.params.grammar.empty());
354 }
355 if (!data.params.grammar.empty()) {
356 auto grammar = build_grammar(data.params.grammar);
357 if (!grammar) {
358 throw std::runtime_error("Failed to build grammar");
359 }
360 auto earliest_trigger_pos = std::string::npos;
361 auto constrained = data.delta;
362 for (const auto & trigger : data.params.grammar_triggers) {
363 size_t pos = std::string::npos;
364 std::smatch match;
365 switch (trigger.type) {
366 case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
367 {
368 const auto & word = trigger.value;
369 pos = constrained.find(word);
370 break;
371 }
372 case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
373 {
374 const auto & pattern = trigger.value;
375 if (std::regex_search(constrained, match, std::regex(pattern))) {
376 pos = match.position(1);
377 }
378 break;
379 }
380 case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
381 {
382 const auto & pattern = trigger.value;
383 if (std::regex_match(constrained, match, std::regex(pattern))) {
384 auto mpos = std::string::npos;
385 for (size_t i = 1; i < match.size(); ++i) {
386 if (match[i].length() > 0) {
387 mpos = match.position(i);
388 break;
389 }
390 }
391 if (mpos == std::string::npos) {
392 mpos = match.position(0);
393 }
394 pos = mpos;
395 }
396 break;
397 }
398 default:
399 throw std::runtime_error("Unknown trigger type");
400 }
401 if (pos == std::string::npos) {
402 continue;
403 }
404 if (earliest_trigger_pos == std::string::npos || pos < earliest_trigger_pos) {
405 earliest_trigger_pos = pos;
406 }
407 }
408 auto grammar_triggered = false;
409 if (earliest_trigger_pos != std::string::npos) {
410 constrained = constrained.substr(earliest_trigger_pos);
411 grammar_triggered = true;
412 }
413 if (data.params.grammar_lazy) {
414 assert_equals(expect_grammar_triggered, grammar_triggered);
415 }
416
417 if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) {
418 throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta +
419 "\n\nConstrained: " + constrained +
420 "\n\nGrammar: " + data.params.grammar);
421 }
422 }
423 }
424}
425
426/**
427 * Test if streaming=true is consistant with streaming=false for given partial parser
428 * Also test if there is any problem with partial message
429 */
430template <typename T>
431static void test_parser_with_streaming(const common_chat_msg & expected, const std::string & raw_message, T parse_msg) {
432 constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
433 auto len = s.size();
434 if (len == 0) return 0;
435 auto i = len;
436 for (size_t back = 0; back < 4 && i > 0; ++back) {
437 --i;
438 unsigned char c = s[i];
439 if ((c & 0x80) == 0) {
440 return len;
441 } else if ((c & 0xC0) == 0xC0) {
442 size_t expected_len = 0;
443 if ((c & 0xE0) == 0xC0) expected_len = 2;
444 else if ((c & 0xF0) == 0xE0) expected_len = 3;
445 else if ((c & 0xF8) == 0xF0) expected_len = 4;
446 else return i;
447 if (len - i >= expected_len) {
448 return len;
449 } else {
450 return i;
451 }
452 }
453 }
454 return len - std::min(len, size_t(3));
455 };
456 constexpr auto utf8_truncate_safe_view = [utf8_truncate_safe_len](const std::string_view s) {
457 return s.substr(0, utf8_truncate_safe_len(s));
458 };
459
460 auto merged = simple_assist_msg("");
461 auto last_msg = parse_msg("");
462 for (size_t i = 1; i <= raw_message.size(); ++i) {
463 auto curr_msg = parse_msg(std::string(utf8_truncate_safe_view(std::string_view(raw_message).substr(0, i))));
464 if (curr_msg == simple_assist_msg("")) continue;
465 LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({curr_msg}).dump().c_str());
466 for (auto diff: common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) {
467 LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str());
468 if (!diff.reasoning_content_delta.empty()) {
469 merged.reasoning_content += diff.reasoning_content_delta;
470 }
471 if (!diff.content_delta.empty()) {
472 merged.content += diff.content_delta;
473 }
474 if (diff.tool_call_index != std::string::npos) {
475 if (!diff.tool_call_delta.name.empty()) {
476 merged.tool_calls.push_back({diff.tool_call_delta.name, "", ""});
477 }
478 if (!diff.tool_call_delta.arguments.empty()) {
479 GGML_ASSERT(!merged.tool_calls.empty());
480 merged.tool_calls.back().arguments += diff.tool_call_delta.arguments;
481 }
482 }
483 LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({merged}).dump().c_str());
484 }
485 assert_msg_equals(curr_msg, merged, true);
486 last_msg = curr_msg;
487 }
488 assert_msg_equals(expected, parse_msg(raw_message), true);
489 assert_msg_equals(expected, merged, true);
490}
491
492const common_chat_msg message_user {
493 "user",
494 "Hey there!",
495 /* .content_parts = */ {},
496 /* .tool_calls = */ {},
497 /* .reasoning_content = */ "",
498 /* .tool_name = */ "",
499 /* .tool_call_id = */ "",
500};
501
502const common_chat_msg message_user_parts {
503 "user",
504 /* .content = */ "",
505 /* .content_parts = */ {
506 { "text", "Hey" },
507 { "text", "there" },
508 },
509 /* .tool_calls = */ {},
510 /* .reasoning_content = */ "",
511 /* .tool_name = */ "",
512 /* .tool_call_id = */ "",
513};
514
515const common_chat_msg message_assist = simple_assist_msg("Hello, world!\nWhat's up?");
516const common_chat_msg message_assist_empty = simple_assist_msg("");
517const common_chat_msg message_assist_thoughts_unparsed_deepseek = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
518const common_chat_msg message_assist_thoughts_unparsed_md = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
519const common_chat_msg message_assist_thoughts_unparsed_md_partial = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
520
521const common_chat_msg message_assist_thoughts_unparsed_r7b = simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
522const common_chat_msg message_assist_thoughts_unparsed_magistral = simple_assist_msg("[THINK]raisonnement[/THINK]Rรฉponse");
523const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
524const common_chat_msg message_assist_thoughts_unopened_unparsed = simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
525const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking");
526const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
527const common_chat_msg message_assist_call_noopt = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
528const common_chat_msg message_assist_call_withopt = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
529const common_chat_msg message_assist_call_content = simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
530const common_chat_msg message_assist_call_empty_args = simple_assist_msg("", "", "special_function");
531const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg");
532const common_chat_msg message_assist_call_thoughts = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
533const common_chat_msg message_assist_call_thoughts_unparsed = simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
534const common_chat_msg message_assist_call_thoughts_content = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
535const common_chat_msg message_assist_call_id = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
536const common_chat_msg message_assist_call_idx = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
537const common_chat_msg message_assist_thoughts_call_idx = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
538const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
539const common_chat_msg message_assist_call_python_lines = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
540const common_chat_msg message_assist_call_python_lines_unclosed = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
541const common_chat_msg message_assist_call_code_interpreter = simple_assist_msg("", "", "code_interpreter", "{\"code\":\"print('hey')\"}");
542
543// Use for PEG parser implementations
544struct peg_test_case {
545 common_chat_templates_inputs params;
546 std::string input;
547 common_chat_msg expect;
548};
549
550struct make_peg_parser {
551 common_chat_params params_;
552 common_peg_arena arena_;
553
554 make_peg_parser(common_chat_templates * tmpls, const common_chat_templates_inputs & inputs) {
555 params_ = common_chat_templates_apply(tmpls, inputs);
556 arena_.load(params_.parser);
557 }
558
559 common_chat_msg parse(const std::string & msg, bool is_partial) {
560 common_chat_parser_params parser_params;
561 parser_params.format = params_.format;
562 return common_chat_peg_parse(arena_, msg, is_partial, parser_params);
563 }
564};
565
566static void test_peg_parser(common_chat_templates * tmpls, const std::function<void(peg_test_case &)> & init) {
567 peg_test_case tc;
568 init(tc);
569 if (tc.params.messages.empty()) {
570 tc.params.messages = {message_user};
571 }
572 if (tc.expect.role.empty()) {
573 tc.expect.role = "assistant";
574 }
575
576 auto parser = make_peg_parser(tmpls, tc.params);
577
578 common_chat_msg msg_accum;
579 common_chat_msg msg_prev;
580 msg_accum.role = msg_prev.role = "assistant";
581
582 for (size_t i = 1; i <= tc.input.size(); ++i) {
583 auto is_partial = i < tc.input.size();
584 common_chat_msg msg_current = parser.parse(tc.input.substr(0, i), is_partial);
585
586 for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) {
587 if (!diff.reasoning_content_delta.empty()) {
588 msg_accum.reasoning_content += diff.reasoning_content_delta;
589 }
590 if (!diff.content_delta.empty()) {
591 msg_accum.content += diff.content_delta;
592 }
593 if (diff.tool_call_index != std::string::npos) {
594 if (!diff.tool_call_delta.name.empty()) {
595 msg_accum.tool_calls.push_back({diff.tool_call_delta.name, "", diff.tool_call_delta.id});
596 }
597 if (!diff.tool_call_delta.arguments.empty()) {
598 msg_accum.tool_calls.back().arguments += diff.tool_call_delta.arguments;
599 }
600 }
601 }
602 assert_msg_equals(msg_current, msg_accum, true);
603 msg_prev = msg_current;
604 }
605
606 assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
607 assert_msg_equals(tc.expect, msg_accum, true);
608}
609
610static void test_msgs_oaicompat_json_conversion() {
611 printf("[%s]\n", __func__);
612 std::vector<common_chat_msg> msgs{
613 message_user,
614 message_user_parts,
615 message_assist_call,
616 message_assist_call_thoughts,
617 message_assist_call_thoughts_unparsed,
618 message_assist_call_thoughts_content,
619 message_assist_call_id,
620 message_assist_call_idx,
621 message_assist_call_python,
622 message_assist_call_code_interpreter,
623 };
624 for (const auto & msg : msgs) {
625 auto oai_json = common_chat_msgs_to_json_oaicompat({msg});
626 auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json);
627 assert_equals((size_t) 1, msgs2.size());
628 auto msg2 = msgs2[0];
629 assert_msg_equals(msg, msg2);
630 }
631 assert_equals(
632 std::string(
633 "[\n"
634 " {\n"
635 " \"role\": \"user\",\n"
636 " \"content\": [\n"
637 " {\n"
638 " \"type\": \"text\",\n"
639 " \"text\": \"Hey\"\n"
640 " },\n"
641 " {\n"
642 " \"type\": \"text\",\n"
643 " \"text\": \"there\"\n"
644 " }\n"
645 " ]\n"
646 " }\n"
647 "]"
648 ),
649 common_chat_msgs_to_json_oaicompat({message_user_parts}).dump(2));
650
651 assert_equals(
652 std::string(
653 "[\n"
654 " {\n"
655 " \"role\": \"assistant\",\n"
656 " \"content\": \"\",\n"
657 " \"tool_calls\": [\n"
658 " {\n"
659 " \"type\": \"function\",\n"
660 " \"function\": {\n"
661 " \"name\": \"python\",\n"
662 " \"arguments\": \"{\\\"code\\\":\\\"print('hey')\\\"}\"\n"
663 " }\n"
664 " }\n"
665 " ]\n"
666 " }\n"
667 "]"
668 ),
669 common_chat_msgs_to_json_oaicompat({message_assist_call_python}).dump(2));
670
671 auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]"));
672 assert_equals<size_t>(1, res.size());
673 assert_equals<std::string>(res[0].role, "assistant");
674 assert_equals(true, res[0].content.empty());
675 assert_equals(true, res[0].tool_calls.empty());
676
677 try {
678 common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\"}]"));
679 throw std::runtime_error("Expected exception");
680 } catch (const std::exception & e) {
681 if (std::string(e.what()).find("'content'") == std::string::npos) {
682 throw std::runtime_error("Expected exception about missing 'content'");
683 }
684 }
685}
686
687static void test_tools_oaicompat_json_conversion() {
688 printf("[%s]\n", __func__);
689 std::vector<common_chat_tool> tools{
690 special_function_tool,
691 python_tool,
692 code_interpreter_tool,
693 };
694
695 for (const auto & tool : tools) {
696 auto oai_json = common_chat_tools_to_json_oaicompat({tool});
697 auto tools2 = common_chat_tools_parse_oaicompat(oai_json);
698 assert_equals((size_t) 1, tools2.size());
699 auto tool2 = tools2[0];
700 assert_equals(tool.name, tool2.name);
701 assert_equals(tool.description, tool2.description);
702 assert_equals(json::parse(tool.parameters).dump(2), json::parse(tool2.parameters).dump(2));
703 }
704
705 assert_equals(
706 std::string(
707 "[\n"
708 " {\n"
709 " \"type\": \"function\",\n"
710 " \"function\": {\n"
711 " \"name\": \"special_function\",\n"
712 " \"description\": \"I'm special\",\n"
713 " \"parameters\": {\n"
714 " \"type\": \"object\",\n"
715 " \"properties\": {\n"
716 " \"arg1\": {\n"
717 " \"type\": \"integer\",\n"
718 " \"description\": \"The arg.\"\n"
719 " }\n"
720 " },\n"
721 " \"required\": [\n"
722 " \"arg1\"\n"
723 " ]\n"
724 " }\n"
725 " }\n"
726 " }\n"
727 "]"
728 ),
729 common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2));
730
731 {
732 auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse(
733 R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])"));
734 assert_equals((size_t) 1, tools_no_params.size());
735 assert_equals(std::string("test_func"), tools_no_params[0].name);
736 assert_equals(std::string("A test"), tools_no_params[0].description);
737 assert_equals(std::string("{}"), tools_no_params[0].parameters);
738 }
739 {
740 auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse(
741 R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])"));
742 assert_equals((size_t) 1, tools_no_desc.size());
743 assert_equals(std::string("test_func"), tools_no_desc[0].name);
744 assert_equals(std::string(""), tools_no_desc[0].description);
745 }
746 {
747 auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse(
748 R"([{"type": "function", "function": {"name": "test_func"}}])"));
749 assert_equals((size_t) 1, tools_minimal.size());
750 assert_equals(std::string("test_func"), tools_minimal[0].name);
751 assert_equals(std::string(""), tools_minimal[0].description);
752 assert_equals(std::string("{}"), tools_minimal[0].parameters);
753 }
754}
755
756// for compat; ref: https://github.com/ggml-org/llama.cpp/pull/18961
757struct test_parser_params {
758 common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
759 common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
760 bool reasoning_in_content = false;
761 bool thinking_forced_open = false;
762 bool parse_tool_calls = true;
763};
764
765static common_chat_msg test_chat_parse(const std::string & input, bool is_partial, const test_parser_params & syntax) {
766 common_chat_parser_params params;
767 params.format = syntax.format;
768 params.reasoning_format = syntax.reasoning_format;
769 params.reasoning_in_content = syntax.reasoning_in_content;
770 params.thinking_forced_open = syntax.thinking_forced_open;
771 params.parse_tool_calls = syntax.parse_tool_calls;
772 return common_chat_parse(input, is_partial, params);
773}
774
775static void test_template_output_parsers() {
776 printf("[%s]\n", __func__);
777
778 common_chat_templates_inputs inputs_no_tools;
779 inputs_no_tools.messages = {message_user};
780
781 common_chat_templates_inputs inputs_tools;
782 inputs_tools.messages = {message_user};
783 inputs_tools.tools = {special_function_tool};
784
785 common_chat_templates_inputs inputs_tools_builtin;
786 inputs_tools_builtin.messages = {message_user};
787 inputs_tools_builtin.tools = {python_tool};
788
789 {
790 // Not supported yet
791 auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja");
792 assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
793 assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
794 }
795 {
796 auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
797 std::vector<std::string> end_tokens{ "<|END_OF_TURN_TOKEN|>" };
798
799 for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
800 auto params = common_chat_templates_apply(tmpls.get(), inputs);
801 assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, params.format);
802 assert_equals(false, params.thinking_forced_open);
803 }
804
805 assert_msg_equals(message_assist,
806 test_chat_parse(
807 "Hello, world!\nWhat's up?",
808 /* is_partial= */ false,
809 {COMMON_CHAT_FORMAT_COMMAND_R7B}));
810 assert_msg_equals(message_assist,
811 test_chat_parse(
812 "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
813 /* is_partial= */ false,
814 {COMMON_CHAT_FORMAT_COMMAND_R7B}));
815 assert_msg_equals(message_assist_thoughts,
816 test_chat_parse(
817 "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
818 "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
819 /* is_partial= */ false,
820 {
821 /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
822 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
823 }));
824 assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
825 test_chat_parse(
826 "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
827 "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
828 /* is_partial= */ false,
829 {
830 /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
831 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
832 /* .reasoning_in_content = */ true,
833 /* .thinking_forced_open = */ false,
834 }));
835 assert_msg_equals(message_assist_thoughts_unparsed_r7b,
836 test_chat_parse(
837 "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
838 "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
839 /* is_partial= */ false,
840 {COMMON_CHAT_FORMAT_COMMAND_R7B}));
841 assert_msg_equals(message_assist_thoughts,
842 test_chat_parse(
843 "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
844 "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
845 /* is_partial= */ false,
846 {
847 /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
848 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
849 }));
850 assert_msg_equals(message_assist_thoughts_call_idx,
851 test_chat_parse(
852 "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
853 "<|START_ACTION|>[\n"
854 " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
855 "]<|END_ACTION|>",
856 /* is_partial= */ false,
857 {
858 /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
859 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
860 }));
861 assert_msg_equals(message_assist_thoughts_no_content,
862 test_chat_parse(
863 "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
864 "<|START_ACTION|>[\n"
865 " {\"tool_call_id\": \"0\", \"tool_name\": \"special",
866 /* is_partial= */ true,
867 {
868 /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
869 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
870 }));
871
872 test_templates(tmpls.get(), end_tokens, message_assist_call_idx, tools,
873 "<|START_THINKING|><|END_THINKING|>"
874 "<|START_ACTION|>[\n"
875 " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
876 "]<|END_ACTION|>",
877 /* expect_grammar_triggered= */ true,
878 /* test_grammar_if_triggered= */ true,
879 COMMON_REASONING_FORMAT_DEEPSEEK);
880 test_templates(tmpls.get(), end_tokens, message_assist, tools,
881 "<|START_RESPONSE|>Hello, world!\n"
882 "What's up?<|END_RESPONSE|>",
883 /* expect_grammar_triggered= */ false);
884 }
885 // TODO @ngxson : generic tool calls is too costly to maintain, consider removing it in the future
886 {
887 auto tmpls = read_templates("models/templates/google-gemma-2-2b-it.jinja");
888 std::vector<std::string> end_tokens{ "<end_of_turn>" };
889
890 assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
891 assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
892 assert_equals(COMMON_CHAT_FORMAT_GENERIC,
893 common_chat_templates_apply(
894 read_templates("models/templates/microsoft-Phi-3.5-mini-instruct.jinja").get(),
895 inputs_tools)
896 .format);
897
898 // Generic tool calls doesn't generate / parse content-only messages symmetrically.
899
900 assert_equals(
901 simple_assist_msg("{ \"tool_call\" : { \"name\" : \"t"),
902 test_chat_parse(
903 "{ \"tool_call\" : { \"name\" : \"t",
904 /* is_partial= */ true,
905 {
906 /* .format = */ COMMON_CHAT_FORMAT_GENERIC,
907 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
908 /* .reasoning_in_content = */ false,
909 /* .thinking_forced_open = */ true,
910 /* .parse_tool_calls = */ false,
911 }));
912 assert_equals(
913 message_assist_empty,
914 test_chat_parse(
915 "{ \"tool_call\" : { \"name\" : \"t",
916 /* is_partial= */ true,
917 {COMMON_CHAT_FORMAT_GENERIC}));
918
919 assert_equals(
920 simple_assist_msg("", "", "puppeteer_screenshot", "{\"name\":\"servethehome_homepage\","),
921 test_chat_parse(
922 R"({"tool_call": {"name": "puppeteer_screenshot", "arguments": {"name": "servethehome_homepage",)",
923 /* is_partial= */ true,
924 {COMMON_CHAT_FORMAT_GENERIC}));
925
926 assert_equals(
927 message_assist_call_empty_args,
928 test_chat_parse(
929 "{ \"tool_call\" : { \"name\" : \"special_function\"",
930 /* is_partial= */ true,
931 {COMMON_CHAT_FORMAT_GENERIC}));
932 assert_equals(
933 message_assist_call_cutoff_args,
934 test_chat_parse(
935 "{ \"tool_call\" : { \"name\" : \"special_function\", \"arguments\" : { \"arg",
936 /* is_partial= */ true,
937 {COMMON_CHAT_FORMAT_GENERIC}));
938
939 assert_msg_equals(message_assist,
940 test_chat_parse(
941 "{\n"
942 " \"response\": \"Hello, world!\\nWhat's up?\"\n"
943 "}",
944 /* is_partial= */ false,
945 {COMMON_CHAT_FORMAT_GENERIC}));
946#if 0
947 test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
948 "{\n"
949 " \"tool_calls\": [\n"
950 " {\n"
951 " \"name\": \"special_function\",\n"
952 " \"arguments\": {\n"
953 " \"arg1\": 1\n"
954 " },\n"
955 " \"id\": \"123456789\"\n"
956 " }\n"
957 " ],\n"
958 " \"content\": \"\"\n"
959 "}");
960#endif
961 }
962 {
963 auto tmpls = read_templates("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja");
964 std::vector<std::string> end_tokens{ "</s>" };
965
966 assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
967
968 test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
969 test_templates(
970 tmpls.get(), end_tokens, message_assist_call_id, tools,
971 "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]");
972 }
973 {
974 assert_msg_equals(
975 simple_assist_msg("Rรฉponse", "raisonnement"),
976 test_chat_parse(
977 message_assist_thoughts_unparsed_magistral.content,
978 /* is_partial= */ false,
979 {
980 /* .format = */ COMMON_CHAT_FORMAT_MAGISTRAL,
981 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
982 }));
983 }
984 {
985 auto tmpls = read_templates("models/templates/Qwen-QwQ-32B.jinja");
986 std::vector<std::string> end_tokens{ "<|im_end|>" };
987
988 assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
989 assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
990 }
991 {
992 auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
993 std::vector<std::string> end_tokens{ "<|im_end|>" };
994
995 assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
996 assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
997 assert_equals(
998 COMMON_CHAT_FORMAT_HERMES_2_PRO,
999 common_chat_templates_apply(
1000 read_templates("models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja").get(),
1001 inputs_tools)
1002 .format);
1003 assert_equals(
1004 COMMON_CHAT_FORMAT_HERMES_2_PRO,
1005 common_chat_templates_apply(
1006 read_templates("models/templates/Qwen-Qwen2.5-7B-Instruct.jinja").get(),
1007 inputs_tools)
1008 .format);
1009
1010 // Test parsing
1011 assert_msg_equals(
1012 simple_assist_msg("", "", "python", ""),
1013 test_chat_parse(
1014 "```json\n"
1015 "<function_call> { \"name\" : \"python\"",
1016 /* is_partial= */ true,
1017 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1018 assert_msg_equals(
1019 simple_assist_msg("Let's call something\n"),
1020 test_chat_parse(
1021 "Let's call something\n"
1022 "<tool_call>{\"name\"",
1023 /* is_partial= */ true,
1024 {
1025 /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1026 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1027 }));
1028 assert_msg_equals(
1029 simple_assist_msg("Let's call something\n"),
1030 test_chat_parse(
1031 "Let's call something\n"
1032 "<tool_call>{\"name",
1033 /* is_partial= */ true,
1034 {
1035 /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1036 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1037 }));
1038 assert_msg_equals(message_assist_call_thoughts,
1039 test_chat_parse(
1040 // QwQ-32B's template adds a trailing <think> if add_generation_prompt
1041 "I'm\nthinking</think>\n"
1042 "<tool_call>{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>",
1043 /* is_partial= */ false,
1044 {
1045 /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1046 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1047 /* .reasoning_in_content = */ false,
1048 /* .thinking_forced_open = */ true,
1049 }));
1050 assert_msg_equals(
1051 message_assist_call,
1052 test_chat_parse(
1053 "<tool_call>\n"
1054 "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1055 "</tool_call>",
1056 /* is_partial= */ false,
1057 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1058 assert_msg_equals(message_assist_call_content,
1059 test_chat_parse(
1060 "Hello, world!\nWhat's up?<tool_call>\n"
1061 "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1062 "</tool_call>",
1063 /* is_partial= */ false,
1064 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1065 assert_msg_equals(
1066 message_assist_call,
1067 test_chat_parse(
1068 "<function=special_function>{\"arg1\": 1}</function>",
1069 /* is_partial= */ false,
1070 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1071 assert_msg_equals(
1072 message_assist_call,
1073 test_chat_parse(
1074 "<function name=\"special_function\">\n"
1075 "{\"arg1\": 1}\n"
1076 "</function>",
1077 /* is_partial= */ false,
1078 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1079 assert_msg_equals(
1080 message_assist_call,
1081 test_chat_parse(
1082 "<tool>\n"
1083 " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1084 "</tool>",
1085 /* is_partial= */ false,
1086 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1087 assert_msg_equals(
1088 message_assist_call,
1089 test_chat_parse(
1090 "<tools>\n"
1091 " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1092 "</tools>",
1093 /* is_partial= */ false,
1094 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1095 assert_msg_equals(
1096 message_assist_call,
1097 test_chat_parse(
1098 "<response>\n"
1099 " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1100 "</response>",
1101 /* is_partial= */ false,
1102 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1103 assert_msg_equals(
1104 message_assist_call,
1105 test_chat_parse(
1106 "```xml\n"
1107 "<response>\n"
1108 " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1109 "</response>\n"
1110 "```",
1111 /* is_partial= */ false,
1112 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1113 assert_msg_equals(
1114 message_assist_call,
1115 test_chat_parse(
1116 "```xml\n"
1117 " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1118 "```",
1119 /* is_partial= */ false,
1120 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1121 assert_msg_equals(
1122 message_assist_call,
1123 test_chat_parse(
1124 "```\n"
1125 " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1126 "```",
1127 /* is_partial= */ false,
1128 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1129 assert_msg_equals(
1130 message_assist_call,
1131 test_chat_parse(
1132 "```\n"
1133 "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1134 "```",
1135 /* is_partial= */ false,
1136 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1137 assert_msg_equals(
1138 message_assist_call,
1139 test_chat_parse(
1140 "```json\n"
1141 " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1142 "```",
1143 /* is_partial= */ false,
1144 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1145 assert_msg_equals(
1146 message_assist_call,
1147 test_chat_parse(
1148 "```json\n"
1149 "\n"
1150 " <function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}} \n"
1151 " </function_call> \n"
1152 "``` ",
1153 /* is_partial= */ false,
1154 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1155 assert_msg_equals(
1156 message_assist_call,
1157 test_chat_parse(
1158 "<json>\n"
1159 " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1160 "</json>",
1161 /* is_partial= */ false,
1162 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1163 assert_msg_equals(
1164 message_assist_call,
1165 test_chat_parse(
1166 "<xml>\n"
1167 " {\n"
1168 " \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}\n"
1169 " }\n"
1170 "</xml>",
1171 /* is_partial= */ false,
1172 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1173 assert_msg_equals(
1174 message_assist_call,
1175 test_chat_parse(
1176 "<JSON>\n"
1177 " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1178 "</JSON>",
1179 /* is_partial= */ false,
1180 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1181 assert_msg_equals(
1182 message_assist_call,
1183 test_chat_parse(
1184 "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
1185 /* is_partial= */ false,
1186 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1187 assert_msg_equals(
1188 message_assist_call,
1189 test_chat_parse(
1190 "{\n \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
1191 /* is_partial= */ false,
1192 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1193
1194 // Test multiple tool calls
1195 common_chat_msg message_assist_multiple_calls;
1196 message_assist_multiple_calls.role = "assistant";
1197 message_assist_multiple_calls.content = "";
1198 message_assist_multiple_calls.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
1199 message_assist_multiple_calls.tool_calls.push_back({"python", "{\"code\":\"print('hello')\"}", ""});
1200
1201 assert_msg_equals(
1202 message_assist_multiple_calls,
1203 test_chat_parse(
1204 "<tool_call>\n"
1205 "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1206 "</tool_call>\n"
1207 "<tool_call>\n"
1208 "{\"name\": \"python\", \"arguments\": {\"code\":\"print('hello')\"}}\n"
1209 "</tool_call>",
1210 /* is_partial= */ false,
1211 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1212
1213 assert_msg_equals(
1214 message_assist_multiple_calls,
1215 test_chat_parse(
1216 "<function=special_function>{\"arg1\": 1}</function>\n"
1217 "<function=python>{\"code\":\"print('hello')\"}</function>",
1218 /* is_partial= */ false,
1219 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1220
1221 assert_msg_equals(
1222 simple_assist_msg(
1223 "This is not a tool call:",
1224 "",
1225 "special_function",
1226 "{\"arg1\": 1}"),
1227 test_chat_parse(
1228 "This is not a tool call:\n"
1229 "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
1230 /* is_partial= */ false,
1231 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1232 assert_msg_equals(message_assist,
1233 test_chat_parse(
1234 "Hello, world!\nWhat's up?",
1235 /* is_partial= */ false,
1236 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1237 assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
1238 test_chat_parse(
1239 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
1240 /* is_partial= */ false,
1241 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
1242 // assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
1243 // test_chat_parse(
1244 // "I'm\nthinking</think>Hello, world!\nWhat's up?",
1245 // COMMON_CHAT_FORMAT_HERMES_2_PRO));
1246 assert_msg_equals(message_assist_thoughts,
1247 test_chat_parse(
1248 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
1249 /* is_partial= */ false,
1250 {
1251 /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1252 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1253 }));
1254 assert_msg_equals(message_assist_thoughts,
1255 test_chat_parse(
1256 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
1257 /* is_partial= */ true,
1258 {
1259 /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1260 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1261 }));
1262 assert_msg_equals(message_assist_thoughts_unparsed_md,
1263 test_chat_parse(
1264 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
1265 /* is_partial= */ false,
1266 {
1267 /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1268 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1269 /* .reasoning_in_content = */ true,
1270 /* .thinking_forced_open = */ false,
1271 /* .parse_tool_calls = */ false,
1272 }));
1273 assert_msg_equals(message_assist_thoughts_unparsed_md_partial,
1274 test_chat_parse(
1275 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
1276 /* is_partial= */ true,
1277 {
1278 /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1279 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1280 /* .reasoning_in_content = */ true,
1281 /* .thinking_forced_open = */ false,
1282 }));
1283 assert_msg_equals(message_assist_thoughts_unopened_unparsed,
1284 test_chat_parse(
1285 "I'm\nthinking</think>Hello, world!\nWhat's up?",
1286 /* is_partial= */ false,
1287 {
1288 /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1289 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1290 }));
1291 assert_msg_equals(message_assist_thoughts,
1292 test_chat_parse(
1293 "I'm\nthinking</think>Hello, world!\nWhat's up?",
1294 /* is_partial= */ false,
1295 {
1296 /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1297 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1298 /* .reasoning_in_content = */ false,
1299 /* .thinking_forced_open = */ true,
1300 }));
1301
1302 test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
1303 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
1304 "<tool_call>\n"
1305 "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1306 "</tool_call>");
1307
1308 // Test multiple tool calls with template
1309 common_chat_msg message_assist_multiple_calls_template;
1310 message_assist_multiple_calls_template.role = "assistant";
1311 message_assist_multiple_calls_template.content = "";
1312 message_assist_multiple_calls_template.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
1313 message_assist_multiple_calls_template.tool_calls.push_back({"python", "{\"code\":\"print('test')\"}", ""});
1314
1315 test_templates(tmpls.get(), end_tokens, message_assist_multiple_calls_template, tools,
1316 "<tool_call>\n"
1317 "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
1318 "</tool_call>\n"
1319 "<tool_call>\n"
1320 "{\"name\": \"python\", \"arguments\": {\"code\":\"print('test')\"}}\n"
1321 "</tool_call>");
1322
1323 test_templates(tmpls.get(), end_tokens, message_assist_call_python_lines, tools,
1324 "<tool_call>\n"
1325 "{\"name\": \"python\", \"arguments\": {\"code\":\"# This is a program:\\nprint('hey')\"}}\n"
1326 "</tool_call>");
1327 assert_msg_equals(
1328 simple_assist_msg("", /* reasoning_content= */ "<tool_call>nah uhg</tool_call>"),
1329 test_chat_parse(
1330 "<think><tool_call>nah uhg</tool_call>",
1331 /* is_partial= */ false,
1332 {
1333 /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1334 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1335 }));
1336 }
1337 {
1338 auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
1339 std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
1340
1341 assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1342 assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
1343 assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
1344 common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format);
1345 assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
1346 common_chat_templates_apply(
1347 read_templates("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja").get(),
1348 inputs_tools_builtin)
1349 .format);
1350
1351 assert_equals(
1352 message_assist_call,
1353 test_chat_parse(
1354 "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}",
1355 /* is_partial= */ false,
1356 {COMMON_CHAT_FORMAT_LLAMA_3_X}));
1357
1358 // test_templates(tmpls.get(), end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false);
1359 test_templates(tmpls.get(), end_tokens, message_assist_call_code_interpreter, llama_3_1_tools,
1360 "<|python_tag|>code_interpreter.call(code=\"print('hey')\")");
1361 test_templates(tmpls.get(), end_tokens, message_assist_call_python, tools,
1362 "<|python_tag|>python.call(code=\"print('hey')\")");
1363 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
1364 "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
1365 }
1366 {
1367 auto tmpls = read_templates("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja");
1368 std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
1369
1370 assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
1371 assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1372
1373 test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
1374 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
1375 "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
1376 }
1377 {
1378 auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
1379 std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
1380
1381 assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
1382 common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1383 assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
1384 common_chat_templates_apply(tmpls.get(), inputs_tools).format);
1385 assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
1386 common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1387
1388 for (auto is_partial : { false, true }) {
1389 assert_equals(
1390 message_assist_call,
1391 test_chat_parse(
1392 "<function=special_function>{\"arg1\": 1}</function>",
1393 is_partial,
1394 {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
1395 }
1396
1397 assert_equals(
1398 message_assist_call,
1399 test_chat_parse(
1400 "<function=special_function>{\"arg1\": 1}<",
1401 /* is_partial= */ true,
1402 {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
1403
1404 test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
1405 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
1406 "<function=special_function>{\"arg1\": 1}</function>");
1407 }
1408 {
1409 auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.2.jinja");
1410 std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
1411
1412 assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1413 assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
1414
1415 assert_msg_equals(
1416 simple_assist_msg(
1417 "Hello, world!\nnono\nWhat's up?",
1418 "",
1419 "special_function",
1420 "{\"arg1\": 1}"),
1421 test_chat_parse(
1422 "all\n"
1423 "Hello, world!\n"
1424 "nono\n"
1425 "What's up?>>>special_function\n"
1426 "{\"arg1\": 1}\n",
1427 /* is_partial= */ false,
1428 {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
1429 assert_msg_equals(message_assist_call_python_lines,
1430 test_chat_parse(
1431 "python\n"
1432 "# This is a program:\n"
1433 "print('hey')",
1434 /* is_partial= */ false,
1435 {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
1436 assert_msg_equals(message_assist_call_python_lines_unclosed,
1437 test_chat_parse(
1438 "python\n"
1439 "# This is a program:\n"
1440 "print('hey')",
1441 /* is_partial= */ true,
1442 {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
1443 assert_msg_equals(message_assist_call,
1444 test_chat_parse(
1445 "special_function\n"
1446 "{\"arg1\": 1} \n ",
1447 /* is_partial= */ false,
1448 {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
1449 assert_msg_equals(message_assist,
1450 test_chat_parse(
1451 "all\n"
1452 "Hello, world!\nWhat's up?",
1453 /* is_partial= */ false,
1454 {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
1455
1456 test_templates(tmpls.get(), end_tokens, message_assist, {},
1457 "all\n"
1458 "Hello, world!\n"
1459 "What's up?",
1460 /* expect_grammar_triggered= */ false);
1461 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
1462 "special_function\n"
1463 "{\"arg1\": 1}");
1464 }
1465 {
1466 auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja");
1467 std::vector<std::string> end_tokens{ "<|eot_id|>" };
1468
1469 assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1470 assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
1471
1472 test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
1473 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
1474 " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]");
1475 }
1476 {
1477 // Original DeepSeek R1 template. Leaves <๏ฝtoolโcallsโbegin๏ฝ> and others unclosed. Our logic fixes the prompt.
1478 auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja");
1479 std::vector<std::string> end_tokens{ "<๏ฝendโofโsentence๏ฝ>" };
1480
1481 for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
1482 auto params = common_chat_templates_apply(tmpls.get(), inputs);
1483 assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, params.format);
1484 assert_equals(true, params.thinking_forced_open);
1485 }
1486
1487 test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
1488 test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
1489 assert_msg_equals(
1490 simple_assist_msg("Hello, world!\nWhat's up?", "<think>I'm\nthinking"),
1491 test_chat_parse(
1492 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
1493 /* is_partial= */ false,
1494 {
1495 COMMON_CHAT_FORMAT_DEEPSEEK_R1,
1496 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1497 /* .reasoning_in_content = */ false,
1498 /* .thinking_forced_open = */ true,
1499 }));
1500 assert_msg_equals(
1501 simple_assist_msg("", "I need to remember the correct syntax. It starts with <๏ฝtoolโcallsโbegin๏ฝ> and ends with"),
1502 test_chat_parse(
1503 "I need to remember the correct syntax. It starts with <๏ฝtoolโcallsโbegin๏ฝ> and ends with",
1504 /* is_partial= */ true,
1505 {
1506 COMMON_CHAT_FORMAT_DEEPSEEK_R1,
1507 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1508 /* .reasoning_in_content = */ false,
1509 /* .thinking_forced_open = */ true,
1510 }));
1511 assert_msg_equals(message_assist_thoughts,
1512 test_chat_parse(
1513 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
1514 /* is_partial= */ false,
1515 {
1516 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
1517 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1518 }));
1519 assert_msg_equals(message_assist_thoughts_unopened_unparsed,
1520 test_chat_parse(
1521 "I'm\nthinking</think>Hello, world!\nWhat's up?",
1522 /* is_partial= */ false,
1523 {
1524 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
1525 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1526 }));
1527 assert_msg_equals(message_assist_thoughts,
1528 test_chat_parse(
1529 "I'm\nthinking</think>Hello, world!\nWhat's up?",
1530 /* is_partial= */ false,
1531 {
1532 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
1533 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1534 /* .reasoning_in_content = */ false,
1535 /* .thinking_forced_open = */ true,
1536 }));
1537 assert_msg_equals(message_assist_thoughts,
1538 // Latest template update (ast of 20250209) adds a trailing <think>\n if add_generation_prompt is true.
1539 test_chat_parse(
1540 "I'm\nthinking</think>Hello, world!\nWhat's up?",
1541 /* is_partial= */ false,
1542 {
1543 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
1544 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1545 /* .reasoning_in_content = */ false,
1546 /* .thinking_forced_open = */ true,
1547 }));
1548 // test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
1549 // "<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>function<๏ฝtoolโsep๏ฝ>special_function\n"
1550 // "```json\n"
1551 // "{\"arg1\": 1}\n"
1552 // // Look what's not here: <๏ฝtoolโcallsโend๏ฝ> (also missing the <๏ฝendโofโsentence๏ฝ>, but that is removed lazily by the test's delta logic)
1553 // "```<๏ฝtoolโcallโend๏ฝ>",
1554 // /* expect_grammar_triggered= */ true,
1555 // /* test_grammar_if_triggered= */ false);
1556 }
1557 {
1558 // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
1559 auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
1560 std::vector<std::string> end_tokens{ "<๏ฝendโofโsentence๏ฝ>" };
1561
1562 assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1563 assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
1564
1565 test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
1566 test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
1567 assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
1568 test_chat_parse(
1569 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
1570 /* is_partial= */ false,
1571 {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
1572 assert_msg_equals(message_assist_thoughts,
1573 test_chat_parse(
1574 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
1575 /* is_partial= */ false,
1576 {
1577 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
1578 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1579 }));
1580 assert_msg_equals(message_assist_thoughts,
1581 test_chat_parse(
1582 "I'm\nthinking</think>Hello, world!\nWhat's up?",
1583 /* is_partial= */ false,
1584 {
1585 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
1586 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1587 /* .reasoning_in_content = */ false,
1588 /* .thinking_forced_open = */ true,
1589 }));
1590
1591 assert_msg_equals(message_assist_call_thoughts_unparsed,
1592 test_chat_parse(
1593 "<think>I'm\nthinking</think>\n\n"
1594 "<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>function<๏ฝtoolโsep๏ฝ>special_function\n"
1595 "```json\n"
1596 "{\"arg1\": 1}\n"
1597 "```<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>",
1598 /* is_partial= */ false,
1599 {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
1600 assert_msg_equals(message_assist_call,
1601 test_chat_parse(
1602 "<๏ฝtoolโcalls๏ฝ>function<๏ฝtoolโsep๏ฝ>special_function\n"
1603 "```json\n"
1604 "{\"arg1\": 1}\n"
1605 "```<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>",
1606 /* is_partial= */ false,
1607 {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
1608
1609 assert_msg_equals(message_assist_call_thoughts,
1610 test_chat_parse(
1611 "<think>I'm\nthinking</think>\n\n"
1612 "<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>function<๏ฝtoolโsep๏ฝ>special_function\n"
1613 "```json\n"
1614 "{\"arg1\": 1}\n"
1615 "```<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>",
1616 /* is_partial= */ false,
1617 {
1618 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
1619 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1620 }));
1621 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
1622 "<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>function<๏ฝtoolโsep๏ฝ>special_function\n"
1623 "```json\n"
1624 "{\"arg1\": 1}\n"
1625 "```<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>");
1626 }
1627 {
1628 auto tmpls = read_templates("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja");
1629 std::vector<std::string> end_tokens{ "<|end_of_text|>" };
1630
1631 assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1632
1633 assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
1634
1635 // Test parsing regular content
1636 assert_msg_equals(message_assist,
1637 test_chat_parse(
1638 "Hello, world!\nWhat's up?",
1639 /* is_partial= */ false,
1640 {COMMON_CHAT_FORMAT_GRANITE}));
1641 assert_msg_equals(
1642 message_assist,
1643 test_chat_parse(
1644 "Hello, world!\nWhat's up?",
1645 /* is_partial= */ true,
1646 {COMMON_CHAT_FORMAT_GRANITE}));
1647
1648 // Test parsing content with thinking
1649 assert_msg_equals(message_assist_thoughts,
1650 test_chat_parse(
1651 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
1652 /* is_partial= */ false,
1653 {
1654 /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1655 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1656 }));
1657 assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
1658 test_chat_parse(
1659 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
1660 /* is_partial= */ false,
1661 {COMMON_CHAT_FORMAT_GRANITE}));
1662 assert_msg_equals(message_assist_thoughts,
1663 test_chat_parse(
1664 "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?",
1665 /* is_partial= */ true,
1666 {
1667 /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1668 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1669 }));
1670 assert_msg_equals(message_assist_thoughts,
1671 test_chat_parse(
1672 "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
1673 /* is_partial= */ false,
1674 {
1675 /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1676 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1677 }));
1678 assert_msg_equals(simple_assist_msg("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>"),
1679 test_chat_parse(
1680 "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
1681 /* is_partial= */ false,
1682 {COMMON_CHAT_FORMAT_GRANITE}));
1683 assert_msg_equals(message_assist_empty,
1684 test_chat_parse(
1685 "<think",
1686 /* is_partial= */ true,
1687 {
1688 /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1689 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1690 }));
1691 assert_msg_equals(message_assist_empty,
1692 test_chat_parse(
1693 "<think",
1694 /* is_partial= */ true,
1695 {COMMON_CHAT_FORMAT_GRANITE}));
1696 assert_msg_equals(message_assist_thoughts_no_content,
1697 test_chat_parse(
1698 "<think>I'm\nthinking",
1699 /* is_partial= */ true,
1700 {
1701 /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1702 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1703 }));
1704 assert_msg_equals(
1705 message_assist_empty,
1706 test_chat_parse(
1707 "<think>I'm\nthinking</think><response",
1708 /* is_partial= */ true,
1709 {COMMON_CHAT_FORMAT_GRANITE}));
1710
1711 // Test parsing tool calls
1712 assert_msg_equals(message_assist_call,
1713 test_chat_parse(
1714 "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]",
1715 /* is_partial= */ false,
1716 {COMMON_CHAT_FORMAT_GRANITE}));
1717 assert_msg_equals(
1718 message_assist_call_empty_args,
1719 test_chat_parse(
1720 "<|tool_call|>[{\"name\": \"special_function\"",
1721 /* is_partial= */ true,
1722 {COMMON_CHAT_FORMAT_GRANITE}));
1723 assert_msg_equals(
1724 message_assist_call_cutoff_args,
1725 test_chat_parse(
1726 "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
1727 /* is_partial= */ true,
1728 {COMMON_CHAT_FORMAT_GRANITE}));
1729 assert_msg_equals(
1730 message_assist_call_cutoff_args,
1731 test_chat_parse(
1732 "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
1733 /* is_partial= */ true,
1734 {
1735 /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1736 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1737 }));
1738
1739 // Test parsing tool calls with thinking
1740 assert_msg_equals(
1741 message_assist_call_thoughts,
1742 test_chat_parse(
1743 "<think>I'm\nthinking</think><|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {",
1744 /* is_partial= */ true,
1745 {
1746 /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1747 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1748 }));
1749
1750 // Test template generation for regular content
1751 test_templates(tmpls.get(), end_tokens, message_assist, tools,
1752 "Hello, world!\nWhat's up?",
1753 /* expect_grammar_triggered= */ false);
1754 // TODO @ngxson : generic tool call should be removed in the future
1755#if 0
1756 // Test template generation for tool calls
1757 test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
1758 "{\n"
1759 " \"tool_calls\": [\n"
1760 " {\n"
1761 " \"name\": \"special_function\",\n"
1762 " \"arguments\": {\n"
1763 " \"arg1\": 1\n"
1764 " },\n"
1765 " \"id\": \"123456789\"\n"
1766 " }\n"
1767 " ],\n"
1768 " \"content\": \"\"\n"
1769 "}",
1770 /* expect_grammar_triggered= */ false
1771 );
1772#endif
1773 }
1774 {
1775 auto tmpls = read_templates("models/templates/openai-gpt-oss-120b.jinja");
1776 std::vector<std::string> end_tokens{ "<|return|>", "<|call|>" };
1777
1778 assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1779 assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
1780
1781 assert_msg_equals(simple_assist_msg("", "I'm\nthink"),
1782 test_chat_parse(
1783 "<|channel|>analysis<|message|>I'm\nthink",
1784 /* is_partial= */ true,
1785 {
1786 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1787 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1788 }));
1789 assert_msg_equals(simple_assist_msg("", "I'm\nthinking"),
1790 test_chat_parse(
1791 "<|channel|>analysis<|message|>I'm\nthinking<|end|>",
1792 /* is_partial= */ true,
1793 {
1794 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1795 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1796 }));
1797 assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
1798 test_chat_parse(
1799 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1800 "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
1801 /* is_partial= */ false,
1802 {
1803 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1804 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1805 }));
1806 assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
1807 test_chat_parse(
1808 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1809 "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1",
1810 /* is_partial= */ true,
1811 {
1812 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1813 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1814 }));
1815 assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
1816 test_chat_parse(
1817 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1818 "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
1819 /* is_partial= */ true,
1820 {
1821 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1822 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1823 }));
1824 assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
1825 test_chat_parse(
1826 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1827 "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
1828 /* is_partial= */ false,
1829 {
1830 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1831 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1832 }));
1833 assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
1834 test_chat_parse(
1835 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1836 "<|start|>assistant<|channel|>analysis to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
1837 /* is_partial= */ false,
1838 {
1839 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1840 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1841 }));
1842 assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
1843 test_chat_parse(
1844 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1845 "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?",
1846 /* is_partial= */ true,
1847 {
1848 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1849 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1850 }));
1851 assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
1852 test_chat_parse(
1853 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1854 "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?<|end|>"
1855 "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
1856 /* is_partial= */ true,
1857 {
1858 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1859 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1860 }));
1861
1862 // Test parse_tool_calls == false
1863 assert_msg_equals(
1864 simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
1865 test_chat_parse(
1866 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1867 "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
1868 /* is_partial= */ true,
1869 {
1870 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1871 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1872 /* .reasoning_in_content = */ false,
1873 /* .thinking_forced_open = */ false,
1874 /* .parse_tool_calls = */ false,
1875 }));
1876 assert_msg_equals(
1877 simple_assist_msg("", "I'm\nthinking"),
1878 test_chat_parse(
1879 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1880 "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
1881 /* is_partial= */ true,
1882 {
1883 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1884 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1885 /* .reasoning_in_content = */ false,
1886 /* .thinking_forced_open = */ false,
1887 /* .parse_tool_calls = */ false,
1888 }));
1889 assert_msg_equals(
1890 simple_assist_msg("", "I'm\nthinking"),
1891 test_chat_parse(
1892 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1893 "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
1894 /* is_partial= */ false,
1895 {
1896 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1897 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1898 /* .reasoning_in_content = */ false,
1899 /* .thinking_forced_open = */ false,
1900 /* .parse_tool_calls = */ false,
1901 }));
1902
1903 // Test reasoning formats
1904 assert_msg_equals(
1905 simple_assist_msg(
1906 "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
1907 test_chat_parse(
1908 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1909 "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
1910 /* is_partial= */ false,
1911 {
1912 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1913 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
1914 }));
1915
1916 assert_msg_equals(
1917 simple_assist_msg(
1918 "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
1919 test_chat_parse(
1920 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1921 "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
1922 /* is_partial= */ false,
1923 {
1924 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1925 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1926 /* .reasoning_in_content = */ true,
1927 }));
1928
1929 // Test tool calling in role header
1930 assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
1931 test_chat_parse(
1932 " to=functions.special_function<|channel|>commentary <|constrain|>json<|message|>{\"arg1\": 1}",
1933 /* is_partial= */ false,
1934 {
1935 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1936 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1937 }));
1938 assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
1939 test_chat_parse(
1940 " to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
1941 /* is_partial= */ false,
1942 {
1943 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1944 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1945 }));
1946 assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
1947 test_chat_parse(
1948 "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
1949 "<|start|>assistant to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
1950 /* is_partial= */ false,
1951 {
1952 /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
1953 /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
1954 }));
1955 }
1956 {
1957 // Seed-OSS format tests
1958 auto tmpls = read_templates("models/templates/ByteDance-Seed-OSS.jinja");
1959 std::vector<std::string> end_tokens{ "<seed:eos>" };
1960
1961 assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1962 assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
1963
1964 test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
1965
1966 // Test simple reasoning content
1967 assert_msg_equals(
1968 simple_assist_msg("Hello, world!", "I'm thinking about the answer"),
1969 test_chat_parse(
1970 "<seed:think>I'm thinking about the answer</seed:think>Hello, world!",
1971 /* is_partial= */ false,
1972 {
1973 /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
1974 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1975 }));
1976
1977 // Test budget reflection tags
1978 common_chat_msg msg_budget_reflect;
1979 msg_budget_reflect.role = "assistant";
1980 msg_budget_reflect.content = "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>I need to calculate this step by step.";
1981 msg_budget_reflect.reasoning_content = "Token usage: 45/1000\nI should continue thinking to find the best solution.";
1982 assert_msg_equals(
1983 msg_budget_reflect,
1984 test_chat_parse(
1985 "<seed:think>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:think>"
1986 "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>"
1987 "I need to calculate this step by step.",
1988 /* is_partial= */ false,
1989 {
1990 /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
1991 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1992 }));
1993
1994 // Test tool calls with Seed-OSS format
1995 common_chat_msg msg_tool_call;
1996 msg_tool_call.role = "assistant";
1997 msg_tool_call.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
1998 assert_msg_equals(
1999 msg_tool_call,
2000 test_chat_parse(
2001 "<seed:tool_call>\n"
2002 "<function=calculate_sum>\n"
2003 "<parameter=numbers>[1, 2, 3]</parameter>\n"
2004 "</function>\n"
2005 "</seed:tool_call>",
2006 /* is_partial= */ false,
2007 {COMMON_CHAT_FORMAT_SEED_OSS}));
2008
2009 // Test reasoning + tool call combination
2010 common_chat_msg msg_reasoning_tool;
2011 msg_reasoning_tool.role = "assistant";
2012 msg_reasoning_tool.content = "";
2013 msg_reasoning_tool.reasoning_content = "I need to calculate the sum of these numbers";
2014 msg_reasoning_tool.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
2015 assert_msg_equals(
2016 msg_reasoning_tool,
2017 test_chat_parse(
2018 "<seed:think>I need to calculate the sum of these numbers</seed:think>"
2019 "<seed:tool_call>\n"
2020 "<function=calculate_sum>\n"
2021 "<parameter=numbers>[1, 2, 3]</parameter>\n"
2022 "</function>\n"
2023 "</seed:tool_call>",
2024 /* is_partial= */ false,
2025 {
2026 /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
2027 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2028 }));
2029
2030 // Test deltas: the number of tool calls in partial parses should never decrease
2031 std::string tool_msg = "<seed:tool_call>\n"
2032 "<function=fun>\n"
2033 "<parameter=smth>[1, 2, 3]</parameter>\n"
2034 "</function>";
2035 std::size_t previousToolCalls = 0;
2036 for (std::size_t i = std::string("<seed:tool_call>").length(); i < tool_msg.length() - 1; i++) {
2037 auto partial = tool_msg.substr(0, i);
2038 auto partial_res = test_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK });
2039 if (partial_res.tool_calls.size() < previousToolCalls) {
2040 throw std::runtime_error("Tool call size decreased on partial: " + partial + " from " + std::to_string(previousToolCalls) + " to " + std::to_string(partial_res.tool_calls.size()));
2041 }
2042 previousToolCalls = partial_res.tool_calls.size();
2043 }
2044
2045 // Test multiple parameters in tool call
2046 common_chat_msg msg_multi_param;
2047 msg_multi_param.role = "assistant";
2048 msg_multi_param.tool_calls.push_back({"process_data", "{\"input\": \"test\", \"format\": \"json\"}", ""});
2049 assert_msg_equals(
2050 msg_multi_param,
2051 test_chat_parse(
2052 "<seed:tool_call>\n"
2053 "<function=process_data>\n"
2054 "<parameter=input>test</parameter>\n"
2055 "<parameter=format>json</parameter>\n"
2056 "</function>\n"
2057 "</seed:tool_call>",
2058 /* is_partial= */ false,
2059 {COMMON_CHAT_FORMAT_SEED_OSS}));
2060
2061 // Test partial parsing for incomplete tool call - don't actually add the call until parsing parameters is done
2062 assert_msg_equals(
2063 simple_assist_msg("", "", "calculate_sum", "{\"numbers\":"),
2064 test_chat_parse(
2065 "<seed:tool_call>\n"
2066 "<function=calculate_sum>\n"
2067 "<parameter=numbers>[1,\n",
2068 /* is_partial= */ true,
2069 {COMMON_CHAT_FORMAT_SEED_OSS}));
2070
2071 // Test incomplete reasoning tag
2072 assert_msg_equals(
2073 simple_assist_msg("", "I was thinking"),
2074 test_chat_parse(
2075 "<seed:think>I was thinking",
2076 /* is_partial= */ true,
2077 {
2078 /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
2079 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2080 }));
2081
2082 // Test content without reasoning
2083 assert_msg_equals(
2084 simple_assist_msg("This is a simple response without reasoning."),
2085 test_chat_parse(
2086 "This is a simple response without reasoning.",
2087 /* is_partial= */ false,
2088 {COMMON_CHAT_FORMAT_SEED_OSS}));
2089 }
2090 {
2091 auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-Nano-v2.jinja");
2092 std::vector<std::string> end_tokens{ "<SPECIAL_12>" };
2093
2094 assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
2095 assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
2096
2097 // Test parsing regular content
2098 assert_msg_equals(message_assist,
2099 test_chat_parse(
2100 "Hello, world!\nWhat's up?",
2101 /* is_partial= */ false,
2102 {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
2103
2104 // Test parsing content with thinking
2105 assert_msg_equals(message_assist_thoughts,
2106 test_chat_parse(
2107 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
2108 /* is_partial= */ false,
2109 {
2110 /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
2111 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2112 }));
2113
2114 // Test parsing tool calls
2115 assert_msg_equals(message_assist_call,
2116 test_chat_parse(
2117 "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
2118 /* is_partial= */ false,
2119 {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
2120
2121 // Test parsing tool calls with thinking
2122 assert_msg_equals(message_assist_call_thoughts,
2123 test_chat_parse(
2124 "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
2125 /* is_partial= */ false,
2126 {
2127 /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
2128 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2129 }));
2130
2131 // Test tool calls with extra content
2132 assert_msg_equals(message_assist_call_content,
2133 test_chat_parse(
2134 "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
2135 /* is_partial= */ false,
2136 {COMMON_CHAT_FORMAT_NEMOTRON_V2}
2137 ));
2138
2139 // Test tool calls with extra content AND thinking
2140 assert_msg_equals(message_assist_call_thoughts_content,
2141 test_chat_parse(
2142 "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
2143 /* is_partial= */ false,
2144 {
2145 /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
2146 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2147 }));
2148
2149 // Test template generation for regular content
2150 test_templates(tmpls.get(), end_tokens, message_assist, tools,
2151 "Hello, world!\nWhat's up?\n",
2152 /* expect_grammar_triggered= */ false);
2153
2154 // Test template generation for tool calls
2155 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
2156 "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
2157 /* expect_grammar_triggered= */ true
2158 );
2159 }
2160 {
2161 auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-V3.1.jinja");
2162 std::vector<std::string> end_tokens{ "<๏ฝendโofโsentence๏ฝ>" };
2163
2164 for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
2165 auto params = common_chat_templates_apply(tmpls.get(), inputs);
2166 assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, params.format);
2167 assert_equals(true, params.thinking_forced_open);
2168 }
2169
2170 test_templates(tmpls.get(), end_tokens, message_assist, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
2171 test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
2172 assert_msg_equals(
2173 simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
2174 test_chat_parse(
2175 "I'm\nthinking</think>Hello, world!\nWhat's up?",
2176 /* is_partial= */ false,
2177 {
2178 COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
2179 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2180 /* .reasoning_in_content = */ false,
2181 /* .thinking_forced_open = */ true,
2182 }));
2183 // variant: thinking forced open, reasoning_format none
2184 assert_msg_equals(
2185 simple_assist_msg("REASONING</think>ok", ""),
2186 test_chat_parse(
2187 "REASONING</think>ok",
2188 /* is_partial= */ false,
2189 {
2190 COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
2191 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
2192 /* .reasoning_in_content = */ false,
2193 /* .thinking_forced_open = */ true,
2194 /* .parse_tool_calls = */ true,
2195 }));
2196 // variant: happy path for when it works as the model card says it should
2197 assert_msg_equals(
2198 simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"),
2199 test_chat_parse(
2200 "<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>get_time<๏ฝtoolโsep๏ฝ>{\"city\": \"Tokyo\"}<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>",
2201 /* is_partial= */ false,
2202 {
2203 COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
2204 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2205 /* .reasoning_in_content = */ false,
2206 /* .thinking_forced_open = */ false,
2207 /* .parse_tool_calls = */ true,
2208 }));
2209 // variant: simple + thinking open
2210 assert_msg_equals(
2211 simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
2212 test_chat_parse(
2213 "REASONING</think><๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>get_time<๏ฝtoolโsep๏ฝ>{\"city\": \"Tokyo\"}<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>",
2214 /* is_partial= */ false,
2215 {
2216 COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
2217 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2218 /* .reasoning_in_content = */ false,
2219 /* .thinking_forced_open = */ true,
2220 /* .parse_tool_calls = */ true,
2221 }));
2222 // variant: simple + multiple tool calls
2223 common_chat_msg message_assist_multiple_calls;
2224 message_assist_multiple_calls.role = "assistant";
2225 message_assist_multiple_calls.content = "CONTENT";
2226 message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""});
2227 message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""});
2228 assert_msg_equals(
2229 message_assist_multiple_calls,
2230 test_chat_parse(
2231 "CONTENT<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>get_time<๏ฝtoolโsep๏ฝ>{\"city\": \"Paris\"}<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallโbegin๏ฝ>get_weather<๏ฝtoolโsep๏ฝ>{\"city\": \"Paris\"}<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>",
2232 /* is_partial= */ false,
2233 {
2234 COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
2235 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2236 /* .reasoning_in_content = */ false,
2237 /* .thinking_forced_open = */ false,
2238 /* .parse_tool_calls = */ true,
2239 }));
2240 // variant: thinking forced open + tool call in reasoning content
2241 assert_msg_equals(
2242 simple_assist_msg("", "REASONING<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>get_time2<๏ฝtoolโsep๏ฝ>{\"city\": \"Tokyo2\"}<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
2243 test_chat_parse(
2244 "REASONING<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>get_time2<๏ฝtoolโsep๏ฝ>{\"city\": \"Tokyo2\"}<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>REASONING</think><๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>get_time<๏ฝtoolโsep๏ฝ>{\"city\": \"Tokyo\"}<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>",
2245 /* is_partial= */ false,
2246 {
2247 COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
2248 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2249 /* .reasoning_in_content = */ false,
2250 /* .thinking_forced_open = */ true,
2251 /* .parse_tool_calls = */ true,
2252 }));
2253 // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
2254 // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
2255 // to make tool calls in reasoning content according to the model card, but it does sometimes, so
2256 // add the reasoning content as regular content and parse the tool calls.
2257 assert_msg_equals(
2258 simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"),
2259 test_chat_parse(
2260 "REASONING<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>get_time<๏ฝtoolโsep๏ฝ>{\"city\": \"Tokyo\"}<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>",
2261 /* is_partial= */ false,
2262 {
2263 COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
2264 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2265 /* .reasoning_in_content = */ false,
2266 /* .thinking_forced_open = */ true,
2267 /* .parse_tool_calls = */ true,
2268 }));
2269 // variant: thinking forced open + tool call in reasoning content + no closing think + partial
2270 assert_msg_equals(
2271 simple_assist_msg("", "REASONING<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>get_time<๏ฝtoolโsep๏ฝ>{\"city\": \"Tokyo\"}<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>", "", ""),
2272 test_chat_parse(
2273 "REASONING<๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>get_time<๏ฝtoolโsep๏ฝ>{\"city\": \"Tokyo\"}<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>",
2274 /* is_partial= */ true,
2275 {
2276 COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
2277 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2278 /* .reasoning_in_content = */ false,
2279 /* .thinking_forced_open = */ true,
2280 /* .parse_tool_calls = */ true,
2281 }));
2282 // variant: thinking not forced open + missing reasoning + no tool calls
2283 assert_msg_equals(
2284 simple_assist_msg("CONTENT", ""),
2285 test_chat_parse(
2286 "CONTENT",
2287 /* is_partial= */ false,
2288 {
2289 COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
2290 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2291 /* .reasoning_in_content = */ false,
2292 /* .thinking_forced_open = */ false,
2293 /* .parse_tool_calls = */ true,
2294 }));
2295 }
2296 {
2297 auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja");
2298 std::vector<std::string> end_tokens{ "<|assistant_end|>" };
2299
2300 assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
2301 assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
2302
2303 // Test parsing regular content
2304 assert_msg_equals(message_assist,
2305 test_chat_parse(
2306 "Hello, world!\nWhat's up?",
2307 /* is_partial= */ false,
2308 {COMMON_CHAT_FORMAT_APERTUS}));
2309
2310 // Test parsing content with thinking
2311 assert_msg_equals(message_assist_thoughts,
2312 test_chat_parse(
2313 "<|inner_prefix|>I'm\nthinking<|inner_suffix|>Hello, world!\nWhat's up?",
2314 /* is_partial= */ false,
2315 {
2316 /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
2317 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2318 }));
2319
2320 // Test parsing tool calls
2321 assert_msg_equals(message_assist_call,
2322 test_chat_parse(
2323 "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
2324 /* is_partial= */ false,
2325 {COMMON_CHAT_FORMAT_APERTUS}));
2326
2327 // Test parsing tool calls with thinking
2328 assert_msg_equals(message_assist_call_thoughts,
2329 test_chat_parse(
2330 "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
2331 /* is_partial= */ false,
2332 {
2333 /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
2334 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2335 }));
2336
2337 // Test tool calls with extra content
2338 assert_msg_equals(message_assist_call_content,
2339 test_chat_parse(
2340 "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
2341 /* is_partial= */ false,
2342 {COMMON_CHAT_FORMAT_APERTUS}
2343 ));
2344
2345 // Test tool calls with extra content AND thinking
2346 assert_msg_equals(message_assist_call_thoughts_content,
2347 test_chat_parse(
2348 "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
2349 /* is_partial= */ false,
2350 {
2351 /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
2352 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2353 }));
2354
2355 // Test template generation for regular content
2356 test_templates(tmpls.get(), end_tokens, message_assist, tools,
2357 "Hello, world!\nWhat's up?",
2358 /* expect_grammar_triggered= */ false);
2359
2360 // Test template generation for tool calls
2361 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
2362 "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
2363 /* expect_grammar_triggered= */ true
2364 );
2365
2366 // TODO @ngxson : not sure why this fails, but not very important for now
2367 // assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));
2368 }
2369 {
2370 // LFM2 format tests
2371 auto tmpls = read_templates("models/templates/llama-cpp-lfm2.jinja");
2372 std::vector<std::string> end_tokens{ "<|im_end|>" };
2373
2374 auto inputs_tools_forced_json_schema = std::invoke([&]() -> common_chat_templates_inputs {
2375 common_chat_templates_inputs inputs;
2376 inputs.messages = {
2377 std::invoke([&]() -> common_chat_msg {
2378 common_chat_msg msg;
2379 msg.role = "system";
2380 msg.content = "force json schema.\n";
2381 return msg;
2382 }),
2383 message_user,
2384 };
2385 inputs.tools = {special_function_tool};
2386 return inputs;
2387 });
2388
2389 {
2390 auto params = common_chat_templates_apply(tmpls.get(), inputs_no_tools);
2391 assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
2392 assert_equals(false, params.grammar_lazy);
2393 assert_equals(std::string(R"(<|im_start|>user
2394Hey there!<|im_end|>
2395<|im_start|>assistant
2396)"), params.prompt);
2397 }
2398
2399 {
2400 auto params = common_chat_templates_apply(tmpls.get(), inputs_tools);
2401 assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
2402 assert_equals(false, params.grammar_lazy);
2403 assert_equals(std::string(R"(<|im_start|>system
2404List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
2405<|im_start|>user
2406Hey there!<|im_end|>
2407<|im_start|>assistant
2408)"), params.prompt);
2409 assert_equals(true, params.grammar.empty());
2410 }
2411
2412 {
2413 auto params = common_chat_templates_apply(tmpls.get(), inputs_tools_forced_json_schema);
2414 assert_equals(COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, params.format);
2415 assert_equals(true, params.grammar_lazy);
2416 assert_equals(std::string(R"(<|im_start|>system
2417List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
2418<|im_start|>user
2419Hey there!<|im_end|>
2420<|im_start|>assistant
2421)"), params.prompt);
2422 assert_equals(false, params.grammar.empty());
2423 }
2424
2425 // Test parsing regular content
2426 assert_msg_equals(message_assist,
2427 test_chat_parse(
2428 "Hello, world!\nWhat's up?",
2429 /* is_partial= */ false,
2430 {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
2431
2432 // Test single tool call with JSON format
2433 common_chat_msg msg_single_tool_call;
2434 msg_single_tool_call.role = "assistant";
2435 msg_single_tool_call.tool_calls.push_back({"special_function", "{\"arg1\":1}", ""});
2436 assert_msg_equals(
2437 msg_single_tool_call,
2438 test_chat_parse(
2439 "<|tool_call_start|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]<|tool_call_end|>",
2440 /* is_partial= */ false,
2441 {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
2442
2443 // Test tool call with string argument
2444 common_chat_msg msg_tool_call_string;
2445 msg_tool_call_string.role = "assistant";
2446 msg_tool_call_string.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
2447 assert_msg_equals(
2448 msg_tool_call_string,
2449 test_chat_parse(
2450 "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
2451 /* is_partial= */ false,
2452 {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
2453
2454 // Test tool call with multiple arguments
2455 common_chat_msg msg_multi_args;
2456 msg_multi_args.role = "assistant";
2457 msg_multi_args.tool_calls.push_back({"calculate", "{\"x\":10,\"y\":20,\"operation\":\"add\"}", ""});
2458 assert_msg_equals(
2459 msg_multi_args,
2460 test_chat_parse(
2461 "<|tool_call_start|>[{\"name\": \"calculate\", \"arguments\": {\"x\": 10, \"y\": 20, \"operation\": \"add\"}}]<|tool_call_end|>",
2462 /* is_partial= */ false,
2463 {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
2464
2465 // Test multiple tool calls in single array
2466 common_chat_msg msg_multiple_tools;
2467 msg_multiple_tools.role = "assistant";
2468 msg_multiple_tools.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
2469 msg_multiple_tools.tool_calls.push_back({"get_time", "{\"timezone\":\"UTC\"}", ""});
2470 assert_msg_equals(
2471 msg_multiple_tools,
2472 test_chat_parse(
2473 "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}, {\"name\": \"get_time\", \"arguments\": {\"timezone\": \"UTC\"}}]<|tool_call_end|>",
2474 /* is_partial= */ false,
2475 {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
2476
2477 // Test tool call with content before
2478 common_chat_msg msg_content_before_tool;
2479 msg_content_before_tool.role = "assistant";
2480 msg_content_before_tool.content = "Let me check the weather for you.";
2481 msg_content_before_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
2482 assert_msg_equals(
2483 msg_content_before_tool,
2484 test_chat_parse(
2485 "Let me check the weather for you.<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
2486 /* is_partial= */ false,
2487 {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
2488
2489 // Test tool call with content after
2490 common_chat_msg msg_content_after_tool;
2491 msg_content_after_tool.role = "assistant";
2492 msg_content_after_tool.content = "Here's the result.";
2493 msg_content_after_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
2494 assert_msg_equals(
2495 msg_content_after_tool,
2496 test_chat_parse(
2497 "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>Here's the result.",
2498 /* is_partial= */ false,
2499 {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
2500
2501 // Test tool call with newlines (common in LLM output)
2502 common_chat_msg msg_tool_call_newlines;
2503 msg_tool_call_newlines.role = "assistant";
2504 msg_tool_call_newlines.tool_calls.push_back({"get_current_time", "{\"location\":\"Paris\"}", ""});
2505 assert_msg_equals(
2506 msg_tool_call_newlines,
2507 test_chat_parse(
2508 "<|tool_call_start|>[{\n \"name\": \"get_current_time\",\n \"arguments\": {\n \"location\": \"Paris\"\n }\n}]<|tool_call_end|>",
2509 /* is_partial= */ false,
2510 {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
2511
2512 // Note: LFM2 uses JSON format for tool calls: [{"name": "...", "arguments": {...}}]
2513 // Unlike other formats, LFM2 template does not render tool calls in conversation history,
2514 // so we don't use test_templates() for tool call generation. Instead, the parsing tests
2515 // above verify edge cases and format variations for the tool call output format.
2516 }
2517
2518 {
2519 auto tmpls = read_templates("models/templates/MiniMax-M2.jinja");
2520 std::vector<std::string> end_tokens{ "[e~[" };
2521
2522 assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
2523 assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
2524
2525 // Test parsing regular content
2526 assert_msg_equals(message_assist,
2527 test_chat_parse(
2528 "Hello, world!\nWhat's up?",
2529 /* is_partial= */ false,
2530 {COMMON_CHAT_FORMAT_MINIMAX_M2}));
2531
2532 // Test parsing content with thinking
2533 assert_msg_equals(message_assist_thoughts,
2534 test_chat_parse(
2535 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
2536 /* is_partial= */ false,
2537 {
2538 /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
2539 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2540 }));
2541
2542 // Test parsing tool calls
2543 assert_msg_equals(message_assist_call,
2544 test_chat_parse(
2545 "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
2546 /* is_partial= */ false,
2547 {COMMON_CHAT_FORMAT_MINIMAX_M2}));
2548
2549 // Test parsing tool calls with thinking
2550 assert_msg_equals(message_assist_call_thoughts,
2551 test_chat_parse(
2552 "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
2553 /* is_partial= */ false,
2554 {
2555 /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
2556 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2557 }));
2558
2559 // Test tool calls with extra content
2560 assert_msg_equals(message_assist_call_content,
2561 test_chat_parse(
2562 "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
2563 /* is_partial= */ false,
2564 {COMMON_CHAT_FORMAT_MINIMAX_M2}
2565 ));
2566
2567 // Test tool calls with extra content AND thinking
2568 assert_msg_equals(message_assist_call_thoughts_content,
2569 test_chat_parse(
2570 "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
2571 /* is_partial= */ false,
2572 {
2573 /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
2574 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2575 }));
2576
2577 // Test streaming
2578 test_parser_with_streaming(message_assist_call_thoughts_content,
2579 "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
2580 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2581 /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
2582 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2583 }); });
2584 test_parser_with_streaming(message_assist_call_thoughts_unparsed,
2585 "<think>I'm\nthinking</think>\n\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
2586 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2587 /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
2588 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
2589 }); });
2590 test_parser_with_streaming(message_assist_call_thoughts_content,
2591 "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>\n",
2592 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2593 /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
2594 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2595 }); });
2596 test_parser_with_streaming(message_assist_call_withopt,
2597 "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
2598 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2599 /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
2600 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
2601 }); });
2602
2603 // Test template generation for regular content
2604 test_templates(tmpls.get(), end_tokens, message_assist, tools,
2605 "Hello, world!\nWhat's up?",
2606 /* expect_grammar_triggered= */ false);
2607
2608 // Test template generation for tool calls
2609 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
2610 "<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
2611 /* expect_grammar_triggered= */ true,
2612 /* test_grammar_if_triggered= */ true,
2613 /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
2614 /* ignore_whitespace_differences= */ true
2615 );
2616
2617 // Test template generation for tools with optional parameters
2618 test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
2619 "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
2620 /* expect_grammar_triggered= */ true,
2621 /* test_grammar_if_triggered= */ true,
2622 /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
2623 /* ignore_whitespace_differences= */ true
2624 );
2625 test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
2626 "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
2627 /* expect_grammar_triggered= */ true,
2628 /* test_grammar_if_triggered= */ true,
2629 /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
2630 /* ignore_whitespace_differences= */ true
2631 );
2632 }
2633
2634 {
2635 auto tmpls = read_templates("models/templates/GLM-4.6.jinja");
2636 std::vector<std::string> end_tokens{ "<|assistant|>", "<|observation|>" };
2637
2638 assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
2639 assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
2640
2641 // Test parsing regular content
2642 assert_msg_equals(message_assist,
2643 test_chat_parse(
2644 "Hello, world!\nWhat's up?",
2645 /* is_partial= */ false,
2646 {COMMON_CHAT_FORMAT_GLM_4_5}));
2647
2648 // Test parsing content with thinking
2649 assert_msg_equals(message_assist_thoughts,
2650 test_chat_parse(
2651 "\n<think>I'm\nthinking</think>\nHello, world!\nWhat's up?",
2652 /* is_partial= */ false,
2653 {
2654 /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
2655 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2656 }), true);
2657
2658 // Test parsing tool calls
2659 assert_msg_equals(message_assist_call,
2660 test_chat_parse(
2661 "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
2662 /* is_partial= */ false,
2663 {COMMON_CHAT_FORMAT_GLM_4_5}), true);
2664
2665 // Test parsing tool calls with thinking
2666 assert_msg_equals(message_assist_call_thoughts,
2667 test_chat_parse(
2668 "\n<think>I'm\nthinking</think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
2669 /* is_partial= */ false,
2670 {
2671 /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
2672 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2673 }), true);
2674
2675 // Test tool calls with extra content
2676 assert_msg_equals(message_assist_call_content,
2677 test_chat_parse(
2678 "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>Hello, world!\nWhat's up?",
2679 /* is_partial= */ false,
2680 {COMMON_CHAT_FORMAT_GLM_4_5}
2681 ), true);
2682
2683 // Test tool calls with extra content AND thinking
2684 assert_msg_equals(message_assist_call_thoughts_content,
2685 test_chat_parse(
2686 "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
2687 /* is_partial= */ false,
2688 {
2689 /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
2690 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2691 }), true);
2692
2693 // Test streaming
2694 test_parser_with_streaming(message_assist_call_thoughts_content,
2695 "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
2696 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2697 /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
2698 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2699 }); });
2700 test_parser_with_streaming(message_assist_call_thoughts_unparsed,
2701 "\n<think>I'm\nthinking</think>\n\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
2702 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2703 /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
2704 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
2705 }); });
2706 test_parser_with_streaming(message_assist_call_withopt,
2707 "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
2708 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2709 /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
2710 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2711 }); });
2712 test_parser_with_streaming(
2713 simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
2714 "<tool_call>complex_function\n"
2715 "<arg_key>name</arg_key>\n"
2716 "<arg_value>John Doe</arg_value>\n"
2717 "<arg_key>age</arg_key>\n"
2718 "<arg_value>30</arg_value>\n"
2719 "<arg_key>active</arg_key>\n"
2720 "<arg_value>true</arg_value>\n"
2721 "<arg_key>score</arg_key>\n"
2722 "<arg_value>95.5</arg_value>\n"
2723 "</tool_call>",
2724 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
2725 test_parser_with_streaming(
2726 simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
2727 "<tool_call>web_search\n"
2728 "<arg_key>query</arg_key>\n"
2729 "<arg_value>\"From Zero\" Linkin Park album tracklist complete songs</arg_value>\n"
2730 "<arg_key>limit</arg_key>\n"
2731 "<arg_value>3</arg_value>\n"
2732 "<arg_key>type</arg_key>\n"
2733 "<arg_value>text</arg_value>\n"
2734 "</tool_call>",
2735 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
2736
2737 // Test interleaved thinking
2738 test_parser_with_streaming(simple_assist_msg("Hello, world!\n\nWhat's up?", "I'm\nthinkingThinking2", "special_function", "{\"arg1\": 1}"),
2739 "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
2740 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2741 /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
2742 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2743 }); });
2744 test_parser_with_streaming(simple_assist_msg("\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?", "", "special_function", "{\"arg1\": 1}"),
2745 "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
2746 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2747 /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
2748 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
2749 }); });
2750
2751 // Test template generation for regular content
2752 test_templates(tmpls.get(), end_tokens, message_assist, tools,
2753 "\n<think></think>\nHello, world!\nWhat's up?",
2754 /* expect_grammar_triggered= */ false);
2755
2756 // Test template generation for tool calls
2757 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
2758 "\n<think></think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
2759 /* expect_grammar_triggered= */ true,
2760 /* test_grammar_if_triggered= */ false,
2761 /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
2762 /* ignore_whitespace_differences= */ true
2763 );
2764
2765 // Test template generation for tools with optional parameters
2766 test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
2767 "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
2768 /* expect_grammar_triggered= */ true,
2769 /* test_grammar_if_triggered= */ false,
2770 /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
2771 /* ignore_whitespace_differences= */ true
2772 );
2773 test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
2774 "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
2775 /* expect_grammar_triggered= */ true,
2776 /* test_grammar_if_triggered= */ false,
2777 /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
2778 /* ignore_whitespace_differences= */ true
2779 );
2780 }
2781
2782 {
2783 auto tmpls = read_templates("models/templates/Kimi-K2-Thinking.jinja");
2784 std::vector<std::string> end_tokens{ "<|im_end|>" };
2785
2786 assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
2787 assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
2788
2789 // Test parsing regular content
2790 assert_msg_equals(message_assist,
2791 test_chat_parse(
2792 "Hello, world!\nWhat's up?",
2793 /* is_partial= */ false,
2794 {COMMON_CHAT_FORMAT_KIMI_K2}));
2795
2796 // Test parsing content with thinking
2797 assert_msg_equals(message_assist_thoughts,
2798 test_chat_parse(
2799 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
2800 /* is_partial= */ false,
2801 {
2802 /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
2803 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
2804 }));
2805
2806 // Test parsing tool calls
2807 assert_msg_equals(message_assist_call,
2808 test_chat_parse(
2809 "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
2810 /* is_partial= */ false,
2811 {COMMON_CHAT_FORMAT_KIMI_K2}));
2812
2813 // Test parsing tool calls with thinking
2814 assert_msg_equals(message_assist_call_thoughts,
2815 test_chat_parse(
2816 "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
2817 /* is_partial= */ false,
2818 {
2819 /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
2820 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2821 }));
2822
2823 // Test tool calls with extra content
2824 assert_msg_equals(message_assist_call_content,
2825 test_chat_parse(
2826 "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
2827 /* is_partial= */ false,
2828 {COMMON_CHAT_FORMAT_KIMI_K2}
2829 ));
2830
2831 // Test tool calls with extra content AND thinking
2832 assert_msg_equals(message_assist_call_thoughts_content,
2833 test_chat_parse(
2834 "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
2835 /* is_partial= */ false,
2836 {
2837 /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
2838 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2839 }));
2840
2841 // Test streaming
2842 test_parser_with_streaming(message_assist_call_thoughts_content,
2843 "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
2844 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2845 /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
2846 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2847 }); });
2848 test_parser_with_streaming(message_assist_call_thoughts_unparsed,
2849 "<think>I'm\nthinking</think>\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
2850 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2851 /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
2852 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
2853 }); });
2854 test_parser_with_streaming(message_assist_call_thoughts_content,
2855 "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>\n",
2856 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2857 /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
2858 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2859 }); });
2860 test_parser_with_streaming(message_assist_call_withopt,
2861 "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
2862 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2863 /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
2864 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
2865 }); });
2866 test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": \"123456\"}"),
2867 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": \"123456\"}<|tool_call_end|><|tool_calls_section_end|>",
2868 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2869 /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
2870 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2871 }); });
2872 test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": [1, 2, \"345\", 6]}"),
2873 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": [1, 2, \"345\", 6]}<|tool_call_end|><|tool_calls_section_end|>",
2874 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2875 /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
2876 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2877 }); });
2878 test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}"),
2879 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}<|tool_call_end|><|tool_calls_section_end|>",
2880 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2881 /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
2882 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
2883 }); });
2884 test_parser_with_streaming(
2885 simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
2886 "<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>"
2887 "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
2888 "<|tool_call_end|><|tool_calls_section_end|>",
2889 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
2890 test_parser_with_streaming(
2891 simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
2892 "<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:0<|tool_call_argument_begin|>"
2893 "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
2894 "<|tool_call_end|><|tool_calls_section_end|>",
2895 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
2896 test_parser_with_streaming(
2897 simple_assist_msg("", "", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"),
2898 "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
2899 "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"
2900 "<|tool_call_end|><|tool_calls_section_end|>",
2901 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
2902 test_parser_with_streaming(
2903 simple_assist_msg(
2904 "Let me start by examining the relevant files to understand the current implementation.", "",
2905 "read_file",
2906 "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}"),
2907 "Let me start by examining the relevant files to understand the current implementation."
2908 "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
2909 "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
2910 "<|tool_call_end|><|tool_calls_section_end|>",
2911 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
2912 auto multi_tool_msg = simple_assist_msg("Let me call multiple tools.", "I'm thinking.");
2913 multi_tool_msg.tool_calls.push_back({ "read_file", "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}", "" });
2914 multi_tool_msg.tool_calls.push_back({ "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}", "" });
2915 multi_tool_msg.tool_calls.push_back({ "complex_function", "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}", "" });
2916 multi_tool_msg.tool_calls.push_back({ "emoji_function", "{\"message\":\"Hello! ๐ ๐ ๐ Testing emojis: ๐๐๐๐ and symbols: โโโโ\"}", "" });
2917 test_parser_with_streaming(multi_tool_msg,
2918 "<think>I'm thinking.</think>Let me call multiple tools."
2919 "<|tool_calls_section_begin|>"
2920 "<|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
2921 "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
2922 "<|tool_call_end|>"
2923 "<|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>"
2924 "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
2925 "<|tool_call_end|>"
2926 "<|tool_call_begin|>functions.complex_function:2<|tool_call_argument_begin|>"
2927 "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
2928 "<|tool_call_end|>"
2929 "<|tool_call_begin|>functions.emoji_function:3<|tool_call_argument_begin|>"
2930 "{\"message\":\"Hello! ๐ ๐ ๐ Testing emojis: ๐๐๐๐ and symbols: โโโโ\"}"
2931 "<|tool_call_end|>"
2932 "<|tool_calls_section_end|>",
2933 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2934 COMMON_CHAT_FORMAT_KIMI_K2,
2935 COMMON_REASONING_FORMAT_DEEPSEEK
2936 }); });
2937 test_parser_with_streaming(
2938 simple_assist_msg("", "I'm thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
2939 "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
2940 "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
2941 "<|tool_call_end|><|tool_calls_section_end|>",
2942 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2943 COMMON_CHAT_FORMAT_KIMI_K2,
2944 COMMON_REASONING_FORMAT_DEEPSEEK
2945 }); });
2946 test_parser_with_streaming(
2947 simple_assist_msg("Hello", "I'm thinkingI'm still thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
2948 "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
2949 "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
2950 "<|tool_call_end|><|tool_calls_section_end|>I'm still thinking</think>Hello",
2951 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
2952 COMMON_CHAT_FORMAT_KIMI_K2,
2953 COMMON_REASONING_FORMAT_DEEPSEEK
2954 }); });
2955
2956 // Test template rendering
2957 common_chat_templates_inputs conversation_with_tools = inputs_tools;
2958 conversation_with_tools.messages.push_back(simple_assist_msg("Let's do it", "Think first", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"));
2959 conversation_with_tools.messages.push_back({
2960 "tool",
2961 "Tool response 1",
2962 /* .content_parts = */ {},
2963 /* .tool_calls = */ {},
2964 /* .reasoning_content = */ "",
2965 /* .tool_name = */ "complex_function",
2966 /* .tool_call_id = */ "",
2967 });
2968 conversation_with_tools.messages.push_back(simple_assist_msg("Continue", "Think next", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"));
2969 conversation_with_tools.messages.push_back({
2970 "tool",
2971 "Tool response 2",
2972 /* .content_parts = */ {},
2973 /* .tool_calls = */ {},
2974 /* .reasoning_content = */ "",
2975 /* .tool_name = */ "web_search",
2976 /* .tool_call_id = */ "",
2977 });
2978 conversation_with_tools.messages.push_back(simple_assist_msg("CC", "Think last", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"));
2979 conversation_with_tools.messages.push_back({
2980 "tool",
2981 "Tool response 3",
2982 /* .content_parts = */ {},
2983 /* .tool_calls = */ {},
2984 /* .reasoning_content = */ "",
2985 /* .tool_name = */ "read_file",
2986 /* .tool_call_id = */ "",
2987 });
2988 assert_equals(common_chat_templates_apply(tmpls.get(), conversation_with_tools).prompt, std::string("<|im_system|>tool_declare<|im_middle|>[{\"type\": \"function\", \"function\": {\"name\": \"special_function\", \"description\": \"I'm special\", \"parameters\": {\"type\": \"object\", \"properties\": {\"arg1\": {\"type\": \"integer\", \"description\": \"The arg.\"}}, \"required\": [\"arg1\"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hey there!<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think first</think>Let's do it<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>complex_function<|im_middle|>## Return of functions.complex_function:0\nTool response 1<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think next</think>Continue<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>web_search<|im_middle|>## Return of functions.web_search:1\nTool response 2<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think last</think>CC<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:2<|tool_call_argument_begin|>{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>read_file<|im_middle|>## Return of functions.read_file:2\nTool response 3<|im_end|><|im_assistant|>assistant<|im_middle|>"));
2989
2990 // Test template generation for regular content
2991 test_templates(tmpls.get(), end_tokens, message_assist, tools,
2992 "<think></think>Hello, world!\nWhat's up?",
2993 /* expect_grammar_triggered= */ false);
2994
2995 // Test template generation for tool calls
2996 test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
2997 "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
2998 /* expect_grammar_triggered= */ true,
2999 /* test_grammar_if_triggered= */ true,
3000 /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
3001 /* ignore_whitespace_differences= */ true
3002 );
3003
3004 // Test template generation for tools with optional parameters
3005 test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
3006 "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
3007 /* expect_grammar_triggered= */ true,
3008 /* test_grammar_if_triggered= */ true,
3009 /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
3010 /* ignore_whitespace_differences= */ true
3011 );
3012 test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
3013 "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
3014 /* expect_grammar_triggered= */ true,
3015 /* test_grammar_if_triggered= */ true,
3016 /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
3017 /* ignore_whitespace_differences= */ true
3018 );
3019 }
3020
3021 // Test Qwen3-Coder XML format
3022 {
3023 // Basic XML tool call parsing
3024 assert_msg_equals(
3025 message_assist_call,
3026 test_chat_parse(
3027 "<tool_call>\n"
3028 " <function=special_function>\n"
3029 " <parameter=arg1>\n"
3030 " 1\n"
3031 " </parameter>\n"
3032 " </function>\n"
3033 "</tool_call>",
3034 /* is_partial= */ false,
3035 {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
3036
3037 // Multiple parameters with different types
3038 common_chat_msg expected_multi_param;
3039 expected_multi_param.role = "assistant";
3040 expected_multi_param.tool_calls = {
3041 { "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}", "" }
3042 };
3043
3044 test_parser_with_streaming(expected_multi_param,
3045 "<tool_call>\n"
3046 " <function=complex_function>\n"
3047 " <parameter=name>\n"
3048 " John Doe\n"
3049 " </parameter>\n"
3050 " <parameter=age>\n"
3051 " 30\n"
3052 " </parameter>\n"
3053 " <parameter=active>\n"
3054 " true\n"
3055 " </parameter>\n"
3056 " <parameter=score>\n"
3057 " 95.5\n"
3058 " </parameter>\n"
3059 " </function>\n"
3060 "</tool_call>",
3061 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3062
3063 // Special characters and Unicode
3064 common_chat_msg expected_special_chars;
3065 expected_special_chars.role = "assistant";
3066 expected_special_chars.tool_calls = {
3067 { "unicode_function", "{\"message\":\"Hello ไธ็! ๐ Special chars: @#$%^&*()\"}", "" }
3068 };
3069
3070 test_parser_with_streaming(expected_special_chars,
3071 "<tool_call>\n"
3072 " <function=unicode_function>\n"
3073 " <parameter=message>\n"
3074 " Hello ไธ็! ๐ Special chars: @#$%^&*()\n"
3075 " </parameter>\n"
3076 " </function>\n"
3077 "</tool_call>",
3078 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3079
3080 // Multiline content with newlines and indentation
3081 common_chat_msg expected_multiline;
3082 expected_multiline.role = "assistant";
3083 expected_multiline.tool_calls = {
3084 { "code_function", "{\"code\":\"def hello():\\n print(\\\"Hello, World!\\\")\\n return True\"}", "" }
3085 };
3086
3087 test_parser_with_streaming(expected_multiline,
3088 "<tool_call>\n"
3089 " <function=code_function>\n"
3090 " <parameter=code>\n"
3091 "def hello():\n"
3092 " print(\"Hello, World!\")\n"
3093 " return True\n"
3094 " </parameter>\n"
3095 " </function>\n"
3096 "</tool_call>",
3097 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3098
3099 // JSON object as parameter value
3100 common_chat_msg expected_json_param;
3101 expected_json_param.role = "assistant";
3102 expected_json_param.tool_calls = {
3103 { "json_function", "{\"config\":{\"host\":\"localhost\",\"port\":8080,\"ssl\":false}}", "" }
3104 };
3105
3106 test_parser_with_streaming(
3107 expected_json_param,
3108 "<tool_call>\n"
3109 " <function=json_function>\n"
3110 " <parameter=config>\n"
3111 " {\"host\": \"localhost\", \"port\": 8080, \"ssl\": false}\n"
3112 " </parameter>\n"
3113 " </function>\n"
3114 "</tool_call>",
3115 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3116
3117 // Array as parameter value
3118 common_chat_msg expected_array_param;
3119 expected_array_param.role = "assistant";
3120 expected_array_param.tool_calls = {
3121 { "array_function", "{\"items\":[\"apple\",\"banana\",\"cherry\"]}", "" }
3122 };
3123
3124 test_parser_with_streaming(
3125 expected_array_param,
3126 "<tool_call>\n"
3127 " <function=array_function>\n"
3128 " <parameter=items>\n"
3129 " [\"apple\", \"banana\", \"cherry\"]\n"
3130 " </parameter>\n"
3131 " </function>\n"
3132 "</tool_call>",
3133 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3134
3135 // Empty parameter
3136 common_chat_msg expected_empty_param;
3137 expected_empty_param.role = "assistant";
3138 expected_empty_param.tool_calls = {
3139 { "empty_function", "{\"empty_param\":\"\"}", "" }
3140 };
3141
3142 test_parser_with_streaming(
3143 expected_empty_param,
3144 "<tool_call>\n"
3145 " <function=empty_function>\n"
3146 " <parameter=empty_param>\n"
3147 " </parameter>\n"
3148 " </function>\n"
3149 "</tool_call>",
3150 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3151
3152 // Boolean values (true/false)
3153 common_chat_msg expected_boolean;
3154 expected_boolean.role = "assistant";
3155 expected_boolean.tool_calls = {
3156 { "boolean_function", "{\"enabled\":true,\"debug\":false}", "" }
3157 };
3158
3159 test_parser_with_streaming(
3160 expected_boolean,
3161 "<tool_call>\n"
3162 " <function=boolean_function>\n"
3163 " <parameter=enabled>\n"
3164 " true\n"
3165 " </parameter>\n"
3166 " <parameter=debug>\n"
3167 " false\n"
3168 " </parameter>\n"
3169 " </function>\n"
3170 "</tool_call>",
3171 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3172
3173 // Null value
3174 common_chat_msg expected_null;
3175 expected_null.role = "assistant";
3176 expected_null.tool_calls = {
3177 { "null_function", "{\"optional_param\":null}", "" }
3178 };
3179
3180 test_parser_with_streaming(
3181 expected_null,
3182 "<tool_call>\n"
3183 " <function=null_function>\n"
3184 " <parameter=optional_param>\n"
3185 " null\n"
3186 " </parameter>\n"
3187 " </function>\n"
3188 "</tool_call>",
3189 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3190
3191 // Negative numbers and scientific notation
3192 common_chat_msg expected_numbers;
3193 expected_numbers.role = "assistant";
3194 expected_numbers.tool_calls = {
3195 { "math_function", "{\"negative\":-42,\"decimal\":-3.14,\"scientific\":1.23e-4}", "" }
3196 };
3197
3198 test_parser_with_streaming(
3199 expected_numbers,
3200 "<tool_call>\n"
3201 " <function=math_function>\n"
3202 " <parameter=negative>\n"
3203 " -42\n"
3204 " </parameter>\n"
3205 " <parameter=decimal>\n"
3206 " -3.14\n"
3207 " </parameter>\n"
3208 " <parameter=scientific>\n"
3209 " 1.23e-4\n"
3210 " </parameter>\n"
3211 " </function>\n"
3212 "</tool_call>",
3213 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3214
3215 // XML-like content in parameters (should be escaped)
3216 common_chat_msg expected_xml_content;
3217 expected_xml_content.role = "assistant";
3218 expected_xml_content.tool_calls = {
3219 { "xml_function", "{\"xml_content\":\"<root><item>value</item></root>\"}", "" }
3220 };
3221
3222 test_parser_with_streaming(
3223 expected_xml_content,
3224 "<tool_call>\n"
3225 " <function=xml_function>\n"
3226 " <parameter=xml_content>\n"
3227 " <root><item>value</item></root>\n"
3228 " </parameter>\n"
3229 " </function>\n"
3230 "</tool_call>",
3231 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3232
3233 // Quotes and escape characters
3234 common_chat_msg expected_quotes;
3235 expected_quotes.role = "assistant";
3236 expected_quotes.tool_calls = {
3237 { "quote_function", "{\"message\":\"She said \\\"Hello!\\\" and left.\"}", "" }
3238 };
3239
3240 test_parser_with_streaming(
3241 expected_quotes,
3242 "<tool_call>\n"
3243 " <function=quote_function>\n"
3244 " <parameter=message>\n"
3245 " She said \"Hello!\" and left.\n"
3246 " </parameter>\n"
3247 " </function>\n"
3248 "</tool_call>",
3249 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3250
3251 // Long parameter value (simplified)
3252 std::string long_text = "This is a long text parameter that should test the parser's ability to handle larger amounts of text data.";
3253
3254 common_chat_msg expected_long_text;
3255 expected_long_text.role = "assistant";
3256 expected_long_text.tool_calls = {
3257 { "long_function", "{\"long_text\":\"" + long_text + "\"}", "" }
3258 };
3259
3260 test_parser_with_streaming(
3261 expected_long_text,
3262 "<tool_call>\n"
3263 " <function=long_function>\n"
3264 " <parameter=long_text>\n"
3265 " " + long_text + "\n"
3266 " </parameter>\n"
3267 " </function>\n"
3268 "</tool_call>",
3269 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3270
3271 // Mixed content with text before and after tool call
3272 common_chat_msg expected_mixed_content;
3273 expected_mixed_content.role = "assistant";
3274 expected_mixed_content.content = "I'll help you search for products. ";
3275 expected_mixed_content.tool_calls = {
3276 { "search_function", "{\"query\":\"laptops\"}", "" }
3277 };
3278
3279 test_parser_with_streaming(
3280 expected_mixed_content,
3281 "I'll help you search for products. <tool_call>\n"
3282 " <function=search_function>\n"
3283 " <parameter=query>\n"
3284 " laptops\n"
3285 " </parameter>\n"
3286 " </function>\n"
3287 "</tool_call>",
3288 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3289
3290 // Compact format (no extra whitespace)
3291 common_chat_msg expected_compact;
3292 expected_compact.role = "assistant";
3293 expected_compact.tool_calls = {
3294 { "compact_function", "{\"param\":\"value\"}", "" }
3295 };
3296
3297 test_parser_with_streaming(
3298 expected_compact,
3299 "<tool_call><function=compact_function><parameter=param>value</parameter></function></tool_call>",
3300 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3301
3302 // Function name with underscores and numbers
3303 common_chat_msg expected_complex_name;
3304 expected_complex_name.role = "assistant";
3305 expected_complex_name.tool_calls = {
3306 { "get_user_data_v2", "{\"user_id\":12345}", "" }
3307 };
3308
3309 test_parser_with_streaming(
3310 expected_complex_name,
3311 "<tool_call>\n"
3312 " <function=get_user_data_v2>\n"
3313 " <parameter=user_id>\n"
3314 " 12345\n"
3315 " </parameter>\n"
3316 " </function>\n"
3317 "</tool_call>",
3318 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3319
3320 // Parameter names with underscores and numbers
3321 common_chat_msg expected_complex_params;
3322 expected_complex_params.role = "assistant";
3323 expected_complex_params.tool_calls = {
3324 { "test_function", "{\"param_1\":\"value1\",\"param_2_name\":\"value2\",\"param3\":123}", "" }
3325 };
3326
3327 test_parser_with_streaming(
3328 expected_complex_params,
3329 "<tool_call>\n"
3330 " <function=test_function>\n"
3331 " <parameter=param_1>\n"
3332 " value1\n"
3333 " </parameter>\n"
3334 " <parameter=param_2_name>\n"
3335 " value2\n"
3336 " </parameter>\n"
3337 " <parameter=param3>\n"
3338 " 123\n"
3339 " </parameter>\n"
3340 " </function>\n"
3341 "</tool_call>",
3342 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3343
3344 // Very deeply nested XML content in parameter
3345 common_chat_msg expected_deep_xml;
3346 expected_deep_xml.role = "assistant";
3347 expected_deep_xml.tool_calls = {
3348 { "xml_parser", "{\"xml\":\"<root><level1><level2><level3>deep content</level3></level2></level1></root>\"}", "" }
3349 };
3350
3351 test_parser_with_streaming(
3352 expected_deep_xml,
3353 "<tool_call>\n"
3354 " <function=xml_parser>\n"
3355 " <parameter=xml>\n"
3356 " <root><level1><level2><level3>deep content</level3></level2></level1></root>\n"
3357 " </parameter>\n"
3358 " </function>\n"
3359 "</tool_call>",
3360 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3361
3362 // Parameter with only whitespace
3363 common_chat_msg expected_whitespace_param;
3364 expected_whitespace_param.role = "assistant";
3365 expected_whitespace_param.tool_calls = {
3366 { "whitespace_function", "{\"spaces\":\"\"}", "" }
3367 };
3368
3369 test_parser_with_streaming(
3370 expected_whitespace_param,
3371 "<tool_call>\n"
3372 " <function=whitespace_function>\n"
3373 " <parameter=spaces>\n"
3374 " \n"
3375 " </parameter>\n"
3376 " </function>\n"
3377 "</tool_call>",
3378 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3379
3380 // Parameter with tabs and mixed whitespace
3381 common_chat_msg expected_mixed_whitespace;
3382 expected_mixed_whitespace.role = "assistant";
3383 expected_mixed_whitespace.tool_calls = {
3384 { "tab_function", "{\"content\":\"line1\\n\\tindented line\\n spaces\"}", "" }
3385 };
3386
3387 test_parser_with_streaming(
3388 expected_mixed_whitespace,
3389 "<tool_call>\n"
3390 " <function=tab_function>\n"
3391 " <parameter=content>\n"
3392 "line1\n"
3393 "\tindented line\n"
3394 " spaces\n"
3395 " </parameter>\n"
3396 " </function>\n"
3397 "</tool_call>",
3398 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3399
3400 // Control characters and special Unicode
3401 common_chat_msg expected_control_chars;
3402 expected_control_chars.role = "assistant";
3403 expected_control_chars.tool_calls = {
3404 { "control_function", "{\"text\":\"Line1\\nLine2\\tTabbed\\rCarriage return\"}", "" }
3405 };
3406
3407 test_parser_with_streaming(
3408 expected_control_chars,
3409 "<tool_call>\n"
3410 " <function=control_function>\n"
3411 " <parameter=text>\n"
3412 "Line1\nLine2\tTabbed\rCarriage return\n"
3413 " </parameter>\n"
3414 " </function>\n"
3415 "</tool_call>",
3416 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3417
3418 // Emoji and extended Unicode characters
3419 common_chat_msg expected_emoji;
3420 expected_emoji.role = "assistant";
3421 expected_emoji.tool_calls = {
3422 { "emoji_function", "{\"message\":\"Hello! ๐ ๐ ๐ Testing emojis: ๐๐๐๐ and symbols: โโโโ\"}", "" }
3423 };
3424
3425 test_parser_with_streaming(
3426 expected_emoji,
3427 "<tool_call>\n"
3428 " <function=emoji_function>\n"
3429 " <parameter=message>\n"
3430 " Hello! ๐ ๐ ๐ Testing emojis: ๐๐๐๐ and symbols: โโโโ\n"
3431 " </parameter>\n"
3432 " </function>\n"
3433 "</tool_call>",
3434 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3435
3436 // Mathematical expressions and formulas
3437 common_chat_msg expected_math;
3438 expected_math.role = "assistant";
3439 expected_math.tool_calls = {
3440 { "math_function", "{\"formula\":\"E = mcยฒ and โซf(x)dx = F(x) + C\"}", "" }
3441 };
3442
3443 test_parser_with_streaming(
3444 expected_math,
3445 "<tool_call>\n"
3446 " <function=math_function>\n"
3447 " <parameter=formula>\n"
3448 " E = mcยฒ and โซf(x)dx = F(x) + C\n"
3449 " </parameter>\n"
3450 " </function>\n"
3451 "</tool_call>",
3452 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3453
3454 // SQL injection-like content (should be safely escaped)
3455 common_chat_msg expected_sql;
3456 expected_sql.role = "assistant";
3457 expected_sql.tool_calls = {
3458 { "sql_function", "{\"query\":\"SELECT * FROM users WHERE id = 1; DROP TABLE users; --\"}", "" }
3459 };
3460
3461 test_parser_with_streaming(
3462 expected_sql,
3463 "<tool_call>\n"
3464 " <function=sql_function>\n"
3465 " <parameter=query>\n"
3466 " SELECT * FROM users WHERE id = 1; DROP TABLE users; --\n"
3467 " </parameter>\n"
3468 " </function>\n"
3469 "</tool_call>",
3470 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3471
3472 // HTML/XML injection content
3473 common_chat_msg expected_html;
3474 expected_html.role = "assistant";
3475 expected_html.tool_calls = {
3476 { "html_function", "{\"content\":\"<script>alert('xss')</script><img src=x onerror=alert(1)>\"}", "" }
3477 };
3478
3479 test_parser_with_streaming(
3480 expected_html,
3481 "<tool_call>\n"
3482 " <function=html_function>\n"
3483 " <parameter=content>\n"
3484 " <script>alert('xss')</script><img src=x onerror=alert(1)>\n"
3485 " </parameter>\n"
3486 " </function>\n"
3487 "</tool_call>",
3488 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3489
3490 // Binary-like content (base64)
3491 common_chat_msg expected_binary;
3492 expected_binary.role = "assistant";
3493 expected_binary.tool_calls = {
3494 { "binary_function", "{\"data\":\"SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\"}", "" }
3495 };
3496
3497 test_parser_with_streaming(
3498 expected_binary,
3499 "<tool_call>\n"
3500 " <function=binary_function>\n"
3501 " <parameter=data>\n"
3502 " SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\n"
3503 " </parameter>\n"
3504 " </function>\n"
3505 "</tool_call>",
3506 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3507
3508 // Very large numbers (should be parsed as scientific notation)
3509 common_chat_msg expected_large_numbers;
3510 expected_large_numbers.role = "assistant";
3511 expected_large_numbers.tool_calls = {
3512 { "number_function", "{\"big_int\":1e+60}", "" } // Large number becomes scientific notation
3513 };
3514
3515 test_parser_with_streaming(
3516 expected_large_numbers,
3517 "<tool_call>\n"
3518 " <function=number_function>\n"
3519 " <parameter=big_int>\n"
3520 " 999999999999999999999999999999999999999999999999999999999999\n"
3521 " </parameter>\n"
3522 " </function>\n"
3523 "</tool_call>",
3524 [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
3525 }
3526
3527 {
3528 // Qwen3-Coder template
3529 auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
3530 common_chat_templates_inputs inputs;
3531 inputs.messages = { message_user };
3532
3533 common_chat_tool qwen_union_tool {
3534 /* .name = */ "qwen_union",
3535 /* .description = */ "Test tool for union/anyOf handling",
3536 /* .parameters = */ R"({
3537 "type": "object",
3538 "properties": {
3539 "priority": { "type": ["number", "null"] },
3540 "maybe_text": { "anyOf": [ { "type": "string" } ] },
3541 "config": { "anyOf": [ { "type": "object" }, { "type": "null" } ] }
3542 },
3543 "required": []
3544 })",
3545 };
3546 inputs.tools = { qwen_union_tool };
3547
3548 auto params = common_chat_templates_apply(tmpls.get(), inputs);
3549 assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, params.format);
3550 assert_equals(false, params.grammar.empty());
3551
3552 // Grammar should compile successfully
3553 auto grammar = build_grammar(params.grammar);
3554 GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types");
3555 }
3556}
3557
3558static void test_template_output_peg_parsers() {
3559 printf("[%s]\n", __func__);
3560
3561 // JSON schemas
3562 const char * invoice_schema = R"({
3563 "type": "object",
3564 "properties": {
3565 "amount": {"type": "number"},
3566 "date": {"type": "string"}
3567 }
3568 })";
3569
3570 {
3571 // Ministral-3-14B-Reasoning-2512
3572 auto tmpls = read_templates("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja");
3573
3574 // Test basic message
3575 test_peg_parser(tmpls.get(), [&](auto & t) {
3576 t.input = "Hello, world!\nWhat's up?";
3577 t.expect = message_assist;
3578 });
3579
3580 // Test basic message and reasoning with reasoning_format = none
3581 test_peg_parser(tmpls.get(), [&](auto & t) {
3582 t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
3583 t.expect.content = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
3584 });
3585
3586 // Test basic message and reasoning with reasoning_format = auto
3587 test_peg_parser(tmpls.get(), [&](auto & t) {
3588 t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
3589 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3590
3591 t.expect = message_assist_thoughts;
3592 });
3593
3594 // Test tool call
3595 test_peg_parser(tmpls.get(), [&](auto & t) {
3596 t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
3597 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3598 t.params.tools = {special_function_tool};
3599
3600 t.expect = message_assist_call;
3601 });
3602
3603 // Test tool call with reasoning
3604 test_peg_parser(tmpls.get(), [&](auto & t) {
3605 t.input = "[THINK]I'm\nthinking[/THINK]"
3606 R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
3607 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3608 t.params.tools = {special_function_tool};
3609
3610 t.expect = message_assist_call_thoughts;
3611 });
3612
3613 // Test parallel tool calls
3614 test_peg_parser(tmpls.get(), [&](auto & t) {
3615 t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
3616 R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})";
3617 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3618 t.params.parallel_tool_calls = true;
3619 t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
3620
3621 t.expect.tool_calls = {{
3622 /* .name = */ "special_function",
3623 /* .arguments = */ R"({"arg1": 1})",
3624 /* .id = */ {},
3625 }, {
3626 /* .name = */ "special_function_with_opt",
3627 /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
3628 /* .id = */ {},
3629 }};
3630 });
3631
3632 // Test response format
3633 test_peg_parser(tmpls.get(), [&](auto & t) {
3634 t.input = "[THINK]I need to output the invoice details in JSON[/THINK]"
3635 "```json\n"
3636 R"({"amount": 123.45, "date": "2025-12-03"})"
3637 "\n```";
3638 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3639 t.params.json_schema = invoice_schema;
3640
3641 t.expect.reasoning_content = "I need to output the invoice details in JSON";
3642 t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
3643 });
3644 }
3645
3646 {
3647 // NVIDIA Nemotron-3 Nano
3648 auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
3649
3650 // Test basic message
3651 test_peg_parser(tmpls.get(), [&](auto & t) {
3652 t.input = "Hello, world!\nWhat's up?";
3653 t.expect = message_assist;
3654 });
3655
3656 // Test basic message and reasoning with reasoning_format = none
3657 test_peg_parser(tmpls.get(), [&](auto & t) {
3658 t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
3659 t.expect.content = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
3660 });
3661
3662 // Test basic message and reasoning with reasoning_format = auto
3663 test_peg_parser(tmpls.get(), [&](auto & t) {
3664 t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
3665 t.params.enable_thinking = true;
3666 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3667
3668 t.expect = message_assist_thoughts;
3669 });
3670
3671 // Test tool call
3672 test_peg_parser(tmpls.get(), [&](auto & t) {
3673 t.input =
3674 "<tool_call>\n"
3675 "<function=special_function>\n"
3676 "<parameter=arg1>\n"
3677 "1\n"
3678 "</parameter>\n"
3679 "</function>\n"
3680 "</tool_call>";
3681 t.params.enable_thinking = false;
3682 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3683 t.params.tools = {special_function_tool};
3684
3685 t.expect = message_assist_call;
3686 });
3687
3688 // Test tool call with reasoning
3689 test_peg_parser(tmpls.get(), [&](auto & t) {
3690 t.input =
3691 "I'm\nthinking\n</think>\n"
3692 "<tool_call>\n"
3693 "<function=special_function>\n"
3694 "<parameter=arg1>\n"
3695 "1\n"
3696 "</parameter>\n"
3697 "</function>\n"
3698 "</tool_call>";
3699 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3700 t.params.tools = {special_function_tool};
3701
3702 t.expect = message_assist_call_thoughts;
3703 });
3704
3705 // Test parallel tool calls
3706 test_peg_parser(tmpls.get(), [&](auto & t) {
3707 t.input =
3708 "<tool_call>\n"
3709 "<function=special_function>\n"
3710 "<parameter=arg1>\n"
3711 "1\n"
3712 "</parameter>\n"
3713 "</function>\n"
3714 "</tool_call>\n"
3715 "<tool_call>\n"
3716 "<function=special_function_with_opt>\n"
3717 "<parameter=arg1>\n"
3718 "1\n"
3719 "</parameter>\n"
3720 "<parameter=arg2>\n"
3721 "2\n"
3722 "</parameter>\n"
3723 "</function>\n"
3724 "</tool_call>";
3725 t.params.enable_thinking = false;
3726 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3727 t.params.parallel_tool_calls = true;
3728 t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
3729
3730 t.expect.tool_calls = {{
3731 /* .name = */ "special_function",
3732 /* .arguments = */ R"({"arg1": 1})",
3733 /* .id = */ {},
3734 }, {
3735 /* .name = */ "special_function_with_opt",
3736 /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
3737 /* .id = */ {},
3738 }};
3739 });
3740
3741 // Test tool call with string parameter
3742 test_peg_parser(tmpls.get(), [&](auto & t) {
3743 t.input =
3744 "<tool_call>\n"
3745 "<function=python>\n"
3746 "<parameter=code>\n"
3747 "def hello():\n"
3748 " print(\"Hello, world!\")\n"
3749 "\n"
3750 "hello()\n"
3751 "</parameter>\n"
3752 "</function>\n"
3753 "</tool_call>";
3754 t.params.enable_thinking = false;
3755 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3756 t.params.tools = {python_tool};
3757
3758 t.expect.tool_calls = {{
3759 /* .name = */ "python",
3760 /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
3761 /* .id = */ {},
3762 }};
3763 });
3764
3765 // Test tool call with string parameter and no closing </parameter> tag
3766 test_peg_parser(tmpls.get(), [&](auto & t) {
3767 t.input =
3768 "<tool_call>\n"
3769 "<function=python>\n"
3770 "<parameter=code>\n"
3771 "def hello():\n"
3772 " print(\"Hello, world!\")\n"
3773 "\n"
3774 "hello()\n"
3775 "</function>\n"
3776 "</tool_call>";
3777 t.params.enable_thinking = false;
3778 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3779 t.params.tools = {python_tool};
3780
3781 t.expect.tool_calls = {{
3782 /* .name = */ "python",
3783 /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
3784 /* .id = */ {},
3785 }};
3786 });
3787
3788 // Test response format
3789 test_peg_parser(tmpls.get(), [&](auto & t) {
3790 t.input =
3791 "I need to output the invoice details in JSON\n"
3792 "</think>\n"
3793 R"({"amount": 123.45, "date": "2025-12-03"})";
3794 t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
3795 t.params.json_schema = invoice_schema;
3796
3797 t.expect.reasoning_content = "I need to output the invoice details in JSON";
3798 t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
3799 });
3800 }
3801
3802 {
3803 // Solar-Open-100B
3804 auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja");
3805
3806 // Test basic message
3807 test_peg_parser(tmpls.get(), [&](auto & t) {
3808 t.input = "<|content|>Hello, world!\nWhat's up?";
3809 t.expect = message_assist;
3810 });
3811
3812 // Test basic message and reasoning
3813 test_peg_parser(tmpls.get(), [&](auto & t) {
3814 t.input = "<|think|>I'm\nthinking<|end|><|begin|>assistant<|content|>Hello, world!\nWhat's up?";
3815 t.expect = message_assist_thoughts;
3816 });
3817
3818 // Test basic message and reasoning_effort = low
3819 test_peg_parser(tmpls.get(), [&](auto & t) {
3820 t.input = "<|content|>Hello, world!\nWhat's up?";
3821 t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
3822 t.expect = message_assist;
3823 });
3824
3825 // Test tool call
3826 test_peg_parser(tmpls.get(), [&](auto & t) {
3827 t.input = "<|tool_calls|>"
3828 "<|tool_call:begin|>123456789"
3829 "<|tool_call:name|>special_function"
3830 "<|tool_call:args|>{\"arg1\":1}"
3831 "<|tool_call:end|>";
3832
3833 t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
3834 t.params.tools = {special_function_tool};
3835 t.expect = message_assist_call_id;
3836 });
3837
3838 // Test tool call with reasoning
3839 test_peg_parser(tmpls.get(), [&](auto & t) {
3840 t.input = "<|think|>I'm\nthinking<|end|>"
3841 "<|begin|>assistant<|tool_calls|>"
3842 "<|tool_call:begin|>0"
3843 "<|tool_call:name|>special_function"
3844 "<|tool_call:args|>{\"arg1\":1}"
3845 "<|tool_call:end|>";
3846
3847 t.params.tools = {special_function_tool};
3848 t.expect = message_assist_thoughts_call_idx;
3849 });
3850
3851 // Test tool call with reasoning and tool_choice = required
3852 test_peg_parser(tmpls.get(), [&](auto & t) {
3853 t.input = "<|think|>I'm\nthinking<|end|>"
3854 "<|begin|>assistant<|tool_calls|>"
3855 "<|tool_call:begin|>0"
3856 "<|tool_call:name|>special_function"
3857 "<|tool_call:args|>{\"arg1\":1}"
3858 "<|tool_call:end|>";
3859
3860 t.params.tools = {special_function_tool};
3861 t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
3862 t.expect = message_assist_thoughts_call_idx;
3863 });
3864
3865 // Test tool call without reasoning and tool_choice = required
3866 test_peg_parser(tmpls.get(), [&](auto & t) {
3867 t.input = "<|tool_calls|>"
3868 "<|tool_call:begin|>0"
3869 "<|tool_call:name|>special_function"
3870 "<|tool_call:args|>{\"arg1\":1}"
3871 "<|tool_call:end|>";
3872
3873 t.params.tools = {special_function_tool};
3874 t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
3875 t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
3876 t.expect = message_assist_call_idx;
3877 });
3878
3879 // Test parallel tool calls
3880 test_peg_parser(tmpls.get(), [&](auto & t) {
3881 t.input = "<|think|>I'm\nthinking<|end|>"
3882 "<|begin|>assistant<|tool_calls|>"
3883 "<|tool_call:begin|>0"
3884 "<|tool_call:name|>special_function"
3885 "<|tool_call:args|>{\"arg1\":1}"
3886 "<|tool_call:end|>"
3887 "<|tool_call:begin|>1"
3888 "<|tool_call:name|>special_function_with_opt"
3889 "<|tool_call:args|>{\"arg1\": 1, \"arg2\": 2}"
3890 "<|tool_call:end|>";
3891
3892 t.params.parallel_tool_calls = true;
3893 t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
3894
3895 t.expect.reasoning_content = "I'm\nthinking";
3896 t.expect.tool_calls = {{
3897 /* .name = */ "special_function",
3898 /* .arguments = */ R"({"arg1": 1})",
3899 /* .id = */ "0",
3900 }, {
3901 /* .name = */ "special_function_with_opt",
3902 /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
3903 /* .id = */ "1",
3904 }};
3905 });
3906
3907 // Test response format
3908 test_peg_parser(tmpls.get(), [&](auto & t) {
3909 t.input = "<|think|>I need to output the invoice details in JSON<|end|>"
3910 "<|begin|>assistant<|content|>"
3911 R"({"amount": 123.45, "date": "2025-12-03"})";
3912
3913 t.params.json_schema = invoice_schema;
3914
3915 t.expect.reasoning_content = "I need to output the invoice details in JSON";
3916 t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
3917 });
3918
3919 // Test response format no reasoning
3920 test_peg_parser(tmpls.get(), [&](auto & t) {
3921 t.input = "<|content|>"
3922 R"({"amount": 123.45, "date": "2025-12-03"})";
3923
3924 t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
3925 t.params.json_schema = invoice_schema;
3926
3927 t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
3928 });
3929 }
3930}
3931
3932static void test_msg_diffs_compute() {
3933 printf("[%s]\n", __func__);
3934 {
3935 common_chat_msg msg1;
3936
3937 common_chat_msg msg2;
3938 msg2.content = "Hello, world!";
3939
3940 common_chat_msg_diff diff;
3941 diff.content_delta = "Hello, world!";
3942
3943 assert_equals(
3944 {diff},
3945 common_chat_msg_diff::compute_diffs(msg1, msg2));
3946 }
3947 {
3948 common_chat_msg msg1;
3949 msg1.content = "Hello,";
3950
3951 common_chat_msg msg2;
3952 msg2.content = "Hello, world!";
3953
3954 common_chat_msg_diff diff;
3955 diff.content_delta = " world!";
3956
3957 assert_equals(
3958 {diff},
3959 common_chat_msg_diff::compute_diffs(msg1, msg2));
3960 }
3961 {
3962 common_chat_msg msg0;
3963
3964 common_chat_msg msg1;
3965 msg1.tool_calls = { { "special_function", "{\"ar", /* .id = */ "123" } };
3966
3967 common_chat_msg msg2;
3968 msg2.tool_calls = { { "special_function", "{\"arg1\": 1}", /* .id = */ "123" } };
3969
3970 common_chat_msg_diff diff01;
3971 diff01.tool_call_index = 0;
3972 diff01.tool_call_delta.name = "special_function";
3973 diff01.tool_call_delta.id = "123";
3974 diff01.tool_call_delta.arguments = "{\"ar";
3975
3976 assert_equals(
3977 {diff01},
3978 common_chat_msg_diff::compute_diffs(msg0, msg1));
3979
3980 common_chat_msg_diff diff12;
3981 diff12.tool_call_index = 0;
3982 // Note: neither id nor name change here.
3983 diff12.tool_call_delta.arguments = "g1\": 1}";
3984
3985 assert_equals(
3986 {diff12},
3987 common_chat_msg_diff::compute_diffs(msg1, msg2));
3988 }
3989 {
3990 common_chat_msg msg0;
3991
3992 common_chat_msg msg2;
3993 msg2.tool_calls = {
3994 { "f1", "{\"arg1\": 1}", /* .id = */ "123" },
3995 { "f2", "{\"arg2\": 2}", /* .id = */ "222" },
3996 };
3997
3998 common_chat_msg_diff diff1;
3999 diff1.tool_call_index = 0;
4000 diff1.tool_call_delta.name = "f1";
4001 diff1.tool_call_delta.id = "123";
4002 diff1.tool_call_delta.arguments = "{\"arg1\": 1}";
4003
4004 common_chat_msg_diff diff2;
4005 diff2.tool_call_index = 1;
4006 diff2.tool_call_delta.name = "f2";
4007 diff2.tool_call_delta.id = "222";
4008 diff2.tool_call_delta.arguments = "{\"arg2\": 2}";
4009
4010 assert_equals(
4011 {diff1, diff2},
4012 common_chat_msg_diff::compute_diffs(msg0, msg2));
4013 }
4014}
4015
4016int main(int argc, char ** argv) {
4017 common_log_set_verbosity_thold(999);
4018
4019 // try {
4020#ifndef _WIN32
4021 if (argc > 1) {
4022 common_chat_templates_inputs inputs;
4023 common_chat_msg msg;
4024 msg.role = "user";
4025 msg.content = "Hey";
4026 inputs.messages = {msg};
4027 inputs.tools = { special_function_tool };
4028
4029 std::cout << "| Template | Format |\n";
4030 std::cout << "|----------|--------|\n";
4031
4032 for (int i = 1; i < argc; i++) {
4033 try {
4034 std::string path = argv[i];
4035 if (path.rfind(".jinja") != path.size() - 6) {
4036 std::cerr << "Skipping non-jinja file: " << path << '\n';
4037 continue;
4038 }
4039 auto tmpls = read_templates(path);
4040 auto parts = string_split(path, "/");
4041 auto name = parts[parts.size() - 1];
4042 auto format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
4043 std::cout << "| " << name << " | " << format << " |\n";
4044 } catch (const std::exception & e) {
4045 std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
4046 }
4047 }
4048 } else
4049#endif
4050 {
4051 test_msg_diffs_compute();
4052 test_msgs_oaicompat_json_conversion();
4053 test_tools_oaicompat_json_conversion();
4054 test_template_output_parsers();
4055 test_template_output_peg_parsers();
4056 std::cout << "\n[chat] All tests passed!" << '\n';
4057 }
4058 return 0;
4059 // } catch (const std::exception & e) {
4060 // std::cerr << "Error: " << e.what() << '\n';
4061 // return 1;
4062 // }
4063}