1// Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
2//
3// Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
4// e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
5//
6// cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
7//
8#include <exception>
9#include <iostream>
10#include <string>
11
12#include "chat-parser.h"
13#include "common.h"
14#include "log.h"
15#include "regex-partial.h"
16
17template <class T>
18static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
19 if (expected != actual) {
20 std::cerr << label << std::endl;
21 std::cerr << "Expected: " << expected << std::endl;
22 std::cerr << "Actual: " << actual << std::endl;
23 std::cerr << std::flush;
24 throw std::runtime_error("Test failed");
25 }
26}
27
28template <class T>
29static void assert_equals(const T & expected, const T & actual) {
30 assert_equals("", expected, actual);
31}
32static void assert_equals(const char * expected, const std::string & actual) {
33 return assert_equals<std::string>(expected, actual);
34}
35
36static void assert_throws(const std::function<void()> & fn, const std::string & expected_exception_pattern = "") {
37 try {
38 fn();
39 } catch (const std::exception & e) {
40 if (expected_exception_pattern.empty()) {
41 return;
42 }
43 std::regex expected_exception_regex(expected_exception_pattern);
44 std::string actual_message = e.what();
45 if (std::regex_search(actual_message, expected_exception_regex)) {
46 return;
47 }
48 throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")");
49 throw std::runtime_error("Exception of unexpected type: " + std::string(e.what()));
50 }
51 throw std::runtime_error("Exception was expected but not thrown");
52}
53
54static void test_reasoning() {
55 //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
56 {
57 common_chat_parser_params params;
58 params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
59 params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
60 params.reasoning_in_content = false;
61 params.thinking_forced_open = false;
62 common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
63 assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
64 assert_equals("<tnk>Cogito</tnk>Ergo sum", builder.consume_rest());
65 }
66 {
67 common_chat_parser_params params;
68 params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
69 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
70 params.reasoning_in_content = false;
71 params.thinking_forced_open = false;
72 common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
73 assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
74 assert_equals(std::string("Cogito"), builder.result().reasoning_content);
75 assert_equals("Ergo sum", builder.consume_rest());
76 }
77 {
78 common_chat_parser_params params;
79 params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
80 params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
81 params.reasoning_in_content = false;
82 params.thinking_forced_open = false;
83 common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
84 assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
85 assert_equals("Cogito</tnk>Ergo sum", builder.consume_rest());
86 }
87 {
88 common_chat_parser_params params;
89 params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
90 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
91 params.reasoning_in_content = false;
92 params.thinking_forced_open = true;
93 common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
94 assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
95 assert_equals(std::string("Cogito"), builder.result().reasoning_content);
96 assert_equals("Ergo sum", builder.consume_rest());
97 }
98 {
99 common_chat_parser_params params;
100 params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
101 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
102 params.reasoning_in_content = true;
103 params.thinking_forced_open = true;
104 common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
105 assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
106 assert_equals("<think>Cogito</think>", builder.result().content);
107 assert_equals("Ergo sum", builder.consume_rest());
108 }
109 {
110 const std::string variant("content_only_inline_think");
111 common_chat_parser_params params;
112 params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
113 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
114 params.reasoning_in_content = false;
115 params.thinking_forced_open = false;
116 params.parse_tool_calls = false;
117 const std::string input = "<think>Pense</think>Bonjour";
118 auto msg = common_chat_parse(input, false, params);
119 assert_equals(variant, std::string("Pense"), msg.reasoning_content);
120 assert_equals(variant, std::string("Bonjour"), msg.content);
121 }
122 {
123 const std::string variant("llama_3_inline_think");
124 common_chat_parser_params params;
125 params.format = COMMON_CHAT_FORMAT_LLAMA_3_X;
126 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
127 params.reasoning_in_content = false;
128 params.thinking_forced_open = false;
129 params.parse_tool_calls = false;
130 const std::string input = "<think>Plan</think>Réponse";
131 auto msg = common_chat_parse(input, false, params);
132 assert_equals(variant, std::string("Plan"), msg.reasoning_content);
133 assert_equals(variant, std::string("Réponse"), msg.content);
134 }
135 // Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
136 {
137 common_chat_parser_params params;
138 params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
139 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
140 params.reasoning_in_content = false;
141 params.thinking_forced_open = true;
142 params.parse_tool_calls = true;
143 const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
144 common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, params);
145 assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
146 assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
147 assert_equals(variant, std::string("ok"), builder.consume_rest());
148 }
149 // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
150 {
151 common_chat_parser_params params;
152 params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
153 params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
154 params.reasoning_in_content = false;
155 params.thinking_forced_open = true;
156 params.parse_tool_calls = true;
157 const std::string variant("deepseek_v3_1_reasoning_format_none");
158 const std::string input = "REASONING</think>ok";
159 auto msg = common_chat_parse(input, false, params);
160 assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
161 assert_equals(variant, std::string(""), msg.reasoning_content);
162 }
163}
164
165static void test_regex() {
166 auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") {
167 common_chat_msg_parser builder(input, /* is_partial= */ false, {});
168 assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern);
169 };
170
171 test_throws("Hello, world!", "abc", "^abc$");
172 test_throws("Hello, world!", "e", "^e$");
173
174 {
175 common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
176 builder.consume_regex(common_regex("Hello"));
177 assert_equals(", world!", builder.consume_rest());
178 }
179
180 {
181 // When in non partial mode, we can say whether the regex was consumed or not.
182 common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
183 assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value());
184 }
185 {
186 common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
187 auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?"));
188 assert_equals(true, res.has_value());
189 // Verify captures
190 assert_equals<size_t>(2, res->groups.size());
191 assert_equals("Hell", builder.str(res->groups[0]));
192 assert_equals("el", builder.str(res->groups[1]));
193 // Verify position is after the match
194 assert_equals<size_t>(4, builder.pos());
195 assert_equals("o,", builder.consume_rest());
196 }
197 {
198 // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception.
199 common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {});
200 assert_throws([&]() {
201 builder.try_consume_regex(common_regex("Hello, world!"));
202 }, "^Hello, world!$");
203 }
204
205 // Now regardless of the mode, we can tell these aren't a match.
206 for (const auto is_partial : {false, true}) {
207 common_chat_msg_parser builder("Hello,", is_partial, {});
208 assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value());
209 }
210 for (const auto is_partial : {false, true}) {
211 common_chat_msg_parser builder("Hello,", is_partial, {});
212 assert_equals(false, builder.try_consume_literal("Oh"));
213 }
214}
215
216const std::vector<std::string> barely_healable_jsons = {
217 "{",
218 "{\"",
219 "{\"\\",
220 "{\"n",
221 "{\"name\"",
222 "{\"name\":",
223 "{\"name\":\"",
224 "{\"name\":\"\\",
225 "{\"name\":\"python",
226 "{\"name\":\"python\\",
227 "{\",",
228 "{\":",
229 "{\"[",
230 "{\"]",
231 "{\"{",
232 "{\"}",
233 "{\"1",
234 "{\"name\":\",",
235 "{\"name\":\":",
236 "{\"name\":\"[",
237 "{\"name\":\"]",
238 "{\"name\":\"{",
239 "{\"name\":\"}",
240 "{\"name\":\"1",
241};
242
243static void test(const std::string & input, bool is_partial, const std::vector<std::vector<std::string>> & args_paths, const std::vector<std::vector<std::string>> & content_paths, const std::string & expected) {
244 common_chat_msg_parser builder(input, is_partial, {});
245 auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths);
246 assert_equals(true, js.has_value());
247 assert_equals(is_partial, js->is_partial);
248 assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
249}
250
251static void test_deepseek_v3_1_tool_calls() {
252 //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
253 // variant: happy path for when it works as the model card says it should
254 const std::string variant("simple");
255 common_chat_parser_params params;
256 params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
257 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
258 params.reasoning_in_content = false;
259 params.thinking_forced_open = false;
260 params.parse_tool_calls = true;
261 const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
262 auto msg = common_chat_parse(input, false, params);
263 assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
264 assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
265 // JSON arguments are dumped without spaces
266 assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
267 assert_equals(variant, std::string(""), msg.content);
268 assert_equals(variant, std::string(""), msg.reasoning_content);
269
270 // variant: simple + thinking open
271 {
272 common_chat_parser_params params;
273 params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
274 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
275 params.reasoning_in_content = false;
276 params.thinking_forced_open = true;
277 params.parse_tool_calls = true;
278 const std::string variant("simple_thinking");
279 const std::string in = "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
280 auto m = common_chat_parse(in, false, params);
281 assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
282 assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
283 assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
284 assert_equals(variant, std::string(""), m.content);
285 assert_equals(variant, std::string("REASONING"), m.reasoning_content);
286 }
287 // variant: simple + multiple tool calls
288 {
289 common_chat_parser_params params;
290 params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
291 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
292 params.reasoning_in_content = false;
293 params.thinking_forced_open = false;
294 params.parse_tool_calls = true;
295 const std::string variant("simple_multiple_tool_calls");
296 const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>";
297 auto m = common_chat_parse(in, false, params);
298 assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
299 assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
300 assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
301 assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
302 assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
303 assert_equals(variant, std::string("CONTENT"), m.content);
304 assert_equals(variant, std::string(""), m.reasoning_content);
305 }
306
307
308 // variant: thinking forced open + tool call in reasoning content
309 {
310 common_chat_parser_params params;
311 params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
312 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
313 params.reasoning_in_content = false;
314 params.thinking_forced_open = true;
315 params.parse_tool_calls = true;
316 const std::string variant("thinking_forced_open_tool_call_in_reasoning");
317 const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
318 auto m = common_chat_parse(in, false, params);
319 assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
320 assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
321 assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
322 assert_equals(variant, std::string(""), m.content);
323 assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content);
324 }
325
326 // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
327 // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
328 // to make tool calls in reasoning content according to the model card, but it does sometimes, so
329 // add the reasoning content as regular content and parse the tool calls.
330 {
331 common_chat_parser_params params;
332 params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
333 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
334 params.reasoning_in_content = false;
335 params.thinking_forced_open = true;
336 params.parse_tool_calls = true;
337 const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
338 const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
339 auto m = common_chat_parse(in, false, params);
340 assert_equals(variant, std::string("REASONING"), m.content);
341 assert_equals(variant, std::string(""), m.reasoning_content);
342 assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
343 assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
344 assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
345 }
346
347 // variant: thinking forced open + tool call in reasoning content + no closing think + partial
348 {
349 common_chat_parser_params params;
350 params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
351 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
352 params.reasoning_in_content = false;
353 params.thinking_forced_open = true;
354 params.parse_tool_calls = true;
355 const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
356 const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
357 auto m = common_chat_parse(in, /* is_partial= */ true, params);
358 assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), m.reasoning_content);
359 assert_equals(variant, std::string(""), m.content);
360 assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
361 }
362
363 // variant: thinking not forced open + reasoning + regular content + no tool calls
364 {
365 common_chat_parser_params params;
366 params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
367 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
368 params.reasoning_in_content = false;
369 params.thinking_forced_open = true;
370 params.parse_tool_calls = true;
371 const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
372 const std::string in = "REASONING</think>CONTENT";
373 auto m = common_chat_parse(in, false, params);
374 assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
375 assert_equals(variant, std::string("CONTENT"), m.content);
376 assert_equals(variant, std::string("REASONING"), m.reasoning_content);
377 }
378 // variant: thinking not forced open + missing reasoning + no tool calls
379 {
380 common_chat_parser_params params;
381 params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
382 params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
383 params.reasoning_in_content = false;
384 params.thinking_forced_open = false;
385 params.parse_tool_calls = true;
386 const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
387 const std::string in = "CONTENT";
388 auto m = common_chat_parse(in, false, params);
389 assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
390 assert_equals(variant, std::string("CONTENT"), m.content);
391 assert_equals(variant, std::string(""), m.reasoning_content);
392 }
393}
394
395static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
396 common_chat_msg_parser builder(input, parse_as_partial, {});
397 auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
398 assert_equals(true, js.has_value());
399 assert_equals(is_partial, js->is_partial);
400 assert_equals(expected, js->value.dump());
401}
402
403static void test_json_with_dumped_args_no_args() {
404 // Normal JSON, nothing to heal, nothing to dump
405 test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}");
406 // Full json is args
407 test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}");
408
409 // If the arguments are further down, don't heal partial content.
410 for (const auto & src : barely_healable_jsons) {
411 test(src, true, {{"arguments"}}, {}, "{}");
412 }
413 // But heal content that isn't partial.
414 test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}");
415}
416
417static void test_json_with_dumped_args() {
418
419 // Partial content.
420 test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}");
421 test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}");
422 test("{\"content\": ", true, {}, {{"content"}}, "{}");
423
424 // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted).
425 test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python");
426 for (const auto & src : barely_healable_jsons) {
427 test(src, true, {{}}, {}, src);
428 }
429
430 // Full JSON w/ args
431 for (auto parse_as_partial : {true, false}) {
432 test_with_args(
433 R"({"name": "python", "args": {"arg1": 1}})",
434 R"({"name":"python","args":"{\"arg1\":1}"})",
435 parse_as_partial,
436 /* is_partial= */ false
437 );
438 }
439
440 // Partial JSON w/ partial args
441 test_with_args(
442 R"({"foo": "bar", "args": {")",
443 R"({"foo":"bar","args":"{\""})"
444 );
445 // Partial args broken in object key
446 test_with_args(
447 R"({"foo": "bar", "args": {"ar)",
448 R"({"foo":"bar","args":"{\"ar"})"
449 );
450 // Partial args broken after object key
451 test_with_args(
452 R"({"foo": "bar", "args": {"arg1")",
453 R"({"foo":"bar","args":"{\"arg1\""})"
454 );
455 // Partial args broken before object value
456 test_with_args(
457 R"({"foo": "bar", "args": {"arg1":)",
458 R"({"foo":"bar","args":"{\"arg1\":"})"
459 );
460 // Partial args broken before object value (space)
461 test_with_args(
462 R"({"foo": "bar", "args": {"arg1": )",
463 R"({"foo":"bar","args":"{\"arg1\":"})"
464 );
465 // Partial args broken in object value that may not be complete (int)
466 test_with_args(
467 R"({"foo": "bar", "args": {"arg1": 1)",
468 R"({"foo":"bar","args":"{\"arg1\":"})"
469 );
470 // Partial args broken in object value that is complete (int)
471 test_with_args(
472 R"({"foo": "bar", "args": {"arg1": 1 )",
473 R"({"foo":"bar","args":"{\"arg1\":1"})"
474 );
475 // Partial args broken in object value that is incomplete (string)
476 test_with_args(
477 R"({"foo": "bar", "args": {"arg1": ")",
478 R"({"foo":"bar","args":"{\"arg1\":\""})"
479 );
480 // Partial args broken in object value that is complete (string)
481 test_with_args(
482 R"({"foo": "bar", "args": {"arg1": "1")",
483 R"({"foo":"bar","args":"{\"arg1\":\"1\""})"
484 );
485 // Partial args broken on array opening
486 test_with_args(
487 R"({"foo": "bar", "args": [)",
488 R"({"foo":"bar","args":"["})"
489 );
490 // Partial args broken on array value that is incomplete (int)
491 test_with_args(
492 R"({"foo": "bar", "args": [1)",
493 R"({"foo":"bar","args":"["})"
494 );
495 // Partial args broken on array value that is complete (int)
496 test_with_args(
497 R"({"foo": "bar", "args": [1 )",
498 R"({"foo":"bar","args":"[1"})"
499 );
500 // Partial args broken on array value that is complete (string)
501 test_with_args(
502 R"({"foo": "bar", "args": ["1")",
503 R"({"foo":"bar","args":"[\"1\""})"
504 );
505 // Partial args broken after array value
506 test_with_args(
507 R"({"foo": "bar", "args": [1,)",
508 R"({"foo":"bar","args":"[1,"})"
509 );
510 // Partial args broken on nested array
511 test_with_args(
512 R"({"foo": "bar", "args": {"arg1": [)",
513 R"({"foo":"bar","args":"{\"arg1\":["})"
514 );
515
516 // Unicode tests
517 test_with_args(
518 R"({"foo": "bar", "args": {"arg1": "\u)",
519 R"({"foo":"bar","args":"{\"arg1\":\"\\u"})"
520 );
521 test_with_args(
522 R"({"foo": "bar", "args": {"arg1": "\u0)",
523 R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})"
524 );
525 test_with_args(
526 R"({"foo": "bar", "args": {"arg1": "\u00)",
527 R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})"
528 );
529 test_with_args(
530 R"({"foo": "bar", "args": {"arg1": "\u000)",
531 R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})"
532 );
533 test_with_args(
534 R"({"foo": "bar", "args": {"arg1": "\u0000)",
535 R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})"
536 );
537 test_with_args(
538 R"({"foo": "bar", "args": {"arg1": "\ud8)",
539 R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})"
540 );
541 test_with_args(
542 R"({"foo": "bar", "args": {"arg1": "\ud80)",
543 R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})"
544 );
545 test_with_args(
546 R"({"foo": "bar", "args": {"arg1": "\ud800)",
547 R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})"
548 );
549 test_with_args(
550 R"({"foo": "bar", "args": {"arg1": "\ud800\)",
551 R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})"
552 );
553 test_with_args(
554 R"({"foo": "bar", "args": {"arg1": "\ud800\u)",
555 R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})"
556 );
557 test_with_args(
558 R"({"foo": "bar", "args": {"arg1": "\ud800\ud)",
559 R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})"
560 );
561 test_with_args(
562 R"({"foo": "bar", "args": {"arg1": "\ud800\udc)",
563 R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})"
564 );
565 test_with_args(
566 R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)",
567 R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})"
568 );
569 test_with_args(
570 R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)",
571 R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})"
572 );
573}
574
575static void test_positions() {
576 {
577 common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
578 assert_equals<size_t>(0, builder.pos());
579 assert_throws([&]() { builder.move_to(100); });
580 assert_equals<size_t>(0, builder.pos());
581 assert_throws([&]() { builder.move_back(1); });
582 assert_equals<size_t>(0, builder.pos());
583
584 builder.move_to(8);
585 assert_equals<size_t>(8, builder.pos());
586 builder.move_back(1);
587 assert_equals<size_t>(7, builder.pos());
588 assert_equals("world!", builder.consume_rest());
589
590 builder.move_to(0);
591 assert_equals<size_t>(0, builder.pos());
592
593 assert_throws([&]() { builder.finish(); });
594 assert_equals<size_t>(0, builder.pos());
595
596 builder.move_to(builder.input().size());
597 builder.finish();
598 }
599 {
600 common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {});
601
602 builder.move_to(builder.input().size());
603 assert_equals<size_t>(builder.input().size(), builder.pos());
604 builder.finish();
605 }
606}
607
608int main() {
609 test_positions();
610 test_json_with_dumped_args_no_args();
611 test_json_with_dumped_args();
612 test_reasoning();
613 test_regex();
614 test_deepseek_v3_1_tool_calls();
615 std::cout << "All tests passed!\n";
616 return 0;
617}