1//  Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
  2//
  3//  Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
  4//  e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
  5//
  6//    cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
  7//
  8#include <exception>
  9#include <iostream>
 10#include <string>
 11
 12#include "chat-parser.h"
 13#include "common.h"
 14#include "log.h"
 15#include "regex-partial.h"
 16
 17template <class T>
 18static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
 19    if (expected != actual) {
 20        std::cerr << label << std::endl;
 21        std::cerr << "Expected: " << expected << std::endl;
 22        std::cerr << "Actual: " << actual << std::endl;
 23        std::cerr << std::flush;
 24        throw std::runtime_error("Test failed");
 25    }
 26}
 27
 28template <class T>
 29static void assert_equals(const T & expected, const T & actual) {
 30    assert_equals("", expected, actual);
 31}
 32static void assert_equals(const char * expected, const std::string & actual) {
 33  return assert_equals<std::string>(expected, actual);
 34}
 35
 36static void assert_throws(const std::function<void()> & fn, const std::string & expected_exception_pattern = "") {
 37    try {
 38        fn();
 39    } catch (const std::exception & e) {
 40      if (expected_exception_pattern.empty()) {
 41          return;
 42        }
 43        std::regex expected_exception_regex(expected_exception_pattern);
 44        std::string actual_message = e.what();
 45        if (std::regex_search(actual_message, expected_exception_regex)) {
 46            return;
 47        }
 48        throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")");
 49        throw std::runtime_error("Exception of unexpected type: " + std::string(e.what()));
 50    }
 51    throw std::runtime_error("Exception was expected but not thrown");
 52}
 53
 54static void test_reasoning() {
 55  //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
 56  {
 57    common_chat_parser_params params;
 58    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
 59    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
 60    params.reasoning_in_content = false;
 61    params.thinking_forced_open = false;
 62    common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
 63    assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
 64    assert_equals("<tnk>Cogito</tnk>Ergo sum", builder.consume_rest());
 65  }
 66  {
 67    common_chat_parser_params params;
 68    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
 69    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
 70    params.reasoning_in_content = false;
 71    params.thinking_forced_open = false;
 72    common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
 73    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
 74    assert_equals(std::string("Cogito"), builder.result().reasoning_content);
 75    assert_equals("Ergo sum", builder.consume_rest());
 76  }
 77  {
 78    common_chat_parser_params params;
 79    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
 80    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
 81    params.reasoning_in_content = false;
 82    params.thinking_forced_open = false;
 83    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
 84    assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
 85    assert_equals("Cogito</tnk>Ergo sum", builder.consume_rest());
 86  }
 87  {
 88    common_chat_parser_params params;
 89    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
 90    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
 91    params.reasoning_in_content = false;
 92    params.thinking_forced_open = true;
 93    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
 94    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
 95    assert_equals(std::string("Cogito"), builder.result().reasoning_content);
 96    assert_equals("Ergo sum", builder.consume_rest());
 97  }
 98  {
 99    common_chat_parser_params params;
100    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
101    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
102    params.reasoning_in_content = true;
103    params.thinking_forced_open = true;
104    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
105    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
106    assert_equals("<think>Cogito</think>", builder.result().content);
107    assert_equals("Ergo sum", builder.consume_rest());
108  }
109  {
110    const std::string variant("content_only_inline_think");
111    common_chat_parser_params params;
112    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
113    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
114    params.reasoning_in_content = false;
115    params.thinking_forced_open = false;
116    params.parse_tool_calls = false;
117    const std::string input = "<think>Pense</think>Bonjour";
118    auto msg = common_chat_parse(input, false, params);
119    assert_equals(variant, std::string("Pense"), msg.reasoning_content);
120    assert_equals(variant, std::string("Bonjour"), msg.content);
121  }
122  {
123    const std::string variant("llama_3_inline_think");
124    common_chat_parser_params params;
125    params.format = COMMON_CHAT_FORMAT_LLAMA_3_X;
126    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
127    params.reasoning_in_content = false;
128    params.thinking_forced_open = false;
129    params.parse_tool_calls = false;
130    const std::string input = "<think>Plan</think>Réponse";
131    auto msg = common_chat_parse(input, false, params);
132    assert_equals(variant, std::string("Plan"), msg.reasoning_content);
133    assert_equals(variant, std::string("Réponse"), msg.content);
134  }
135  // Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
136  {
137    common_chat_parser_params params;
138    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
139    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
140    params.reasoning_in_content = false;
141    params.thinking_forced_open = true;
142    params.parse_tool_calls = true;
143    const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
144    common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, params);
145    assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
146    assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
147    assert_equals(variant, std::string("ok"), builder.consume_rest());
148  }
149  // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
150  {
151    common_chat_parser_params params;
152    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
153    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
154    params.reasoning_in_content = false;
155    params.thinking_forced_open = true;
156    params.parse_tool_calls = true;
157    const std::string variant("deepseek_v3_1_reasoning_format_none");
158    const std::string input = "REASONING</think>ok";
159    auto msg = common_chat_parse(input, false, params);
160    assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
161    assert_equals(variant, std::string(""), msg.reasoning_content);
162  }
163}
164
165static void test_regex() {
166  auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") {
167    common_chat_msg_parser builder(input, /* is_partial= */ false, {});
168    assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern);
169  };
170
171  test_throws("Hello, world!", "abc", "^abc$");
172  test_throws("Hello, world!", "e", "^e$");
173
174  {
175    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
176    builder.consume_regex(common_regex("Hello"));
177    assert_equals(", world!", builder.consume_rest());
178  }
179
180  {
181    // When in non partial mode, we can say whether the regex was consumed or not.
182    common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
183    assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value());
184  }
185  {
186    common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
187    auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?"));
188    assert_equals(true, res.has_value());
189    // Verify captures
190    assert_equals<size_t>(2, res->groups.size());
191    assert_equals("Hell", builder.str(res->groups[0]));
192    assert_equals("el", builder.str(res->groups[1]));
193    // Verify position is after the match
194    assert_equals<size_t>(4, builder.pos());
195    assert_equals("o,", builder.consume_rest());
196  }
197  {
198    // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception.
199    common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {});
200    assert_throws([&]() {
201      builder.try_consume_regex(common_regex("Hello, world!"));
202    }, "^Hello, world!$");
203  }
204
205  // Now regardless of the mode, we can tell these aren't a match.
206  for (const auto is_partial : {false, true}) {
207    common_chat_msg_parser builder("Hello,", is_partial, {});
208    assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value());
209  }
210  for (const auto is_partial : {false, true}) {
211    common_chat_msg_parser builder("Hello,", is_partial, {});
212    assert_equals(false, builder.try_consume_literal("Oh"));
213  }
214}
215
216const std::vector<std::string> barely_healable_jsons = {
217  "{",
218  "{\"",
219  "{\"\\",
220  "{\"n",
221  "{\"name\"",
222  "{\"name\":",
223  "{\"name\":\"",
224  "{\"name\":\"\\",
225  "{\"name\":\"python",
226  "{\"name\":\"python\\",
227  "{\",",
228  "{\":",
229  "{\"[",
230  "{\"]",
231  "{\"{",
232  "{\"}",
233  "{\"1",
234  "{\"name\":\",",
235  "{\"name\":\":",
236  "{\"name\":\"[",
237  "{\"name\":\"]",
238  "{\"name\":\"{",
239  "{\"name\":\"}",
240  "{\"name\":\"1",
241};
242
243static void test(const std::string & input, bool is_partial, const std::vector<std::vector<std::string>> & args_paths, const std::vector<std::vector<std::string>> & content_paths, const std::string & expected) {
244  common_chat_msg_parser builder(input, is_partial, {});
245  auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths);
246  assert_equals(true, js.has_value());
247  assert_equals(is_partial, js->is_partial);
248  assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
249}
250
251static void test_deepseek_v3_1_tool_calls() {
252    //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
253    // variant: happy path for when it works as the model card says it should
254    const std::string variant("simple");
255    common_chat_parser_params params;
256    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
257    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
258    params.reasoning_in_content = false;
259    params.thinking_forced_open = false;
260    params.parse_tool_calls = true;
261    const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
262    auto msg = common_chat_parse(input, false, params);
263    assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
264    assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
265    // JSON arguments are dumped without spaces
266    assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
267    assert_equals(variant, std::string(""), msg.content);
268    assert_equals(variant, std::string(""), msg.reasoning_content);
269
270    // variant: simple + thinking open
271    {
272        common_chat_parser_params params;
273        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
274        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
275        params.reasoning_in_content = false;
276        params.thinking_forced_open = true;
277        params.parse_tool_calls = true;
278        const std::string variant("simple_thinking");
279        const std::string in = "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
280        auto m = common_chat_parse(in, false, params);
281        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
282        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
283        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
284        assert_equals(variant, std::string(""), m.content);
285        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
286    }
287    // variant: simple + multiple tool calls
288    {
289        common_chat_parser_params params;
290        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
291        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
292        params.reasoning_in_content = false;
293        params.thinking_forced_open = false;
294        params.parse_tool_calls = true;
295        const std::string variant("simple_multiple_tool_calls");
296        const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>";
297        auto m = common_chat_parse(in, false, params);
298        assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
299        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
300        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
301        assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
302        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
303        assert_equals(variant, std::string("CONTENT"), m.content);
304        assert_equals(variant, std::string(""), m.reasoning_content);
305    }
306
307
308    // variant: thinking forced open + tool call in reasoning content
309    {
310        common_chat_parser_params params;
311        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
312        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
313        params.reasoning_in_content = false;
314        params.thinking_forced_open = true;
315        params.parse_tool_calls = true;
316        const std::string variant("thinking_forced_open_tool_call_in_reasoning");
317        const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
318        auto m = common_chat_parse(in, false, params);
319        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
320        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
321        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
322        assert_equals(variant, std::string(""), m.content);
323        assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content);
324    }
325
326    // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
327    //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
328    //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
329    //          add the reasoning content as regular content and parse the tool calls.
330    {
331        common_chat_parser_params params;
332        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
333        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
334        params.reasoning_in_content = false;
335        params.thinking_forced_open = true;
336        params.parse_tool_calls = true;
337        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
338        const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
339        auto m = common_chat_parse(in, false, params);
340        assert_equals(variant, std::string("REASONING"), m.content);
341        assert_equals(variant, std::string(""), m.reasoning_content);
342        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
343        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
344        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
345    }
346
347    // variant: thinking forced open + tool call in reasoning content + no closing think + partial
348    {
349        common_chat_parser_params params;
350        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
351        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
352        params.reasoning_in_content = false;
353        params.thinking_forced_open = true;
354        params.parse_tool_calls = true;
355        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
356        const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
357        auto m = common_chat_parse(in, /* is_partial= */ true, params);
358        assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), m.reasoning_content);
359        assert_equals(variant, std::string(""), m.content);
360        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
361    }
362
363    // variant: thinking not forced open + reasoning + regular content + no tool calls
364    {
365        common_chat_parser_params params;
366        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
367        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
368        params.reasoning_in_content = false;
369        params.thinking_forced_open = true;
370        params.parse_tool_calls = true;
371        const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
372        const std::string in = "REASONING</think>CONTENT";
373        auto m = common_chat_parse(in, false, params);
374        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
375        assert_equals(variant, std::string("CONTENT"), m.content);
376        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
377    }
378    // variant: thinking not forced open + missing reasoning + no tool calls
379    {
380        common_chat_parser_params params;
381        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
382        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
383        params.reasoning_in_content = false;
384        params.thinking_forced_open = false;
385        params.parse_tool_calls = true;
386        const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
387        const std::string in = "CONTENT";
388        auto m = common_chat_parse(in, false, params);
389        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
390        assert_equals(variant, std::string("CONTENT"), m.content);
391        assert_equals(variant, std::string(""), m.reasoning_content);
392    }
393}
394
395static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
396  common_chat_msg_parser builder(input, parse_as_partial, {});
397  auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
398  assert_equals(true, js.has_value());
399  assert_equals(is_partial, js->is_partial);
400  assert_equals(expected, js->value.dump());
401}
402
403static void test_json_with_dumped_args_no_args() {
404  // Normal JSON, nothing to heal, nothing to dump
405  test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}");
406  // Full json is args
407  test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}");
408
409  // If the arguments are further down, don't heal partial content.
410  for (const auto & src : barely_healable_jsons) {
411    test(src, true, {{"arguments"}}, {}, "{}");
412  }
413  // But heal content that isn't partial.
414  test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}");
415}
416
417static void test_json_with_dumped_args() {
418
419  // Partial content.
420  test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}");
421  test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}");
422  test("{\"content\": ", true, {}, {{"content"}}, "{}");
423
424  // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted).
425  test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python");
426  for (const auto & src : barely_healable_jsons) {
427    test(src, true, {{}}, {}, src);
428  }
429
430  // Full JSON w/ args
431  for (auto parse_as_partial : {true, false}) {
432    test_with_args(
433      R"({"name": "python", "args": {"arg1": 1}})",
434      R"({"name":"python","args":"{\"arg1\":1}"})",
435      parse_as_partial,
436      /* is_partial= */ false
437    );
438  }
439
440  // Partial JSON w/ partial args
441  test_with_args(
442    R"({"foo": "bar", "args": {")",
443    R"({"foo":"bar","args":"{\""})"
444  );
445  // Partial args broken in object key
446  test_with_args(
447    R"({"foo": "bar", "args": {"ar)",
448    R"({"foo":"bar","args":"{\"ar"})"
449  );
450  // Partial args broken after object key
451  test_with_args(
452    R"({"foo": "bar", "args": {"arg1")",
453    R"({"foo":"bar","args":"{\"arg1\""})"
454  );
455  // Partial args broken before object value
456  test_with_args(
457    R"({"foo": "bar", "args": {"arg1":)",
458    R"({"foo":"bar","args":"{\"arg1\":"})"
459  );
460  // Partial args broken before object value (space)
461  test_with_args(
462    R"({"foo": "bar", "args": {"arg1": )",
463    R"({"foo":"bar","args":"{\"arg1\":"})"
464  );
465  // Partial args broken in object value that may not be complete (int)
466  test_with_args(
467    R"({"foo": "bar", "args": {"arg1": 1)",
468    R"({"foo":"bar","args":"{\"arg1\":"})"
469  );
470  // Partial args broken in object value that is complete (int)
471  test_with_args(
472    R"({"foo": "bar", "args": {"arg1": 1 )",
473    R"({"foo":"bar","args":"{\"arg1\":1"})"
474  );
475  // Partial args broken in object value that is incomplete (string)
476  test_with_args(
477    R"({"foo": "bar", "args": {"arg1": ")",
478    R"({"foo":"bar","args":"{\"arg1\":\""})"
479  );
480  // Partial args broken in object value that is complete (string)
481  test_with_args(
482    R"({"foo": "bar", "args": {"arg1": "1")",
483    R"({"foo":"bar","args":"{\"arg1\":\"1\""})"
484  );
485  // Partial args broken on array opening
486  test_with_args(
487    R"({"foo": "bar", "args": [)",
488    R"({"foo":"bar","args":"["})"
489  );
490  // Partial args broken on array value that is incomplete (int)
491  test_with_args(
492    R"({"foo": "bar", "args": [1)",
493    R"({"foo":"bar","args":"["})"
494  );
495  // Partial args broken on array value that is complete (int)
496  test_with_args(
497    R"({"foo": "bar", "args": [1 )",
498    R"({"foo":"bar","args":"[1"})"
499  );
500  // Partial args broken on array value that is complete (string)
501  test_with_args(
502    R"({"foo": "bar", "args": ["1")",
503    R"({"foo":"bar","args":"[\"1\""})"
504  );
505  // Partial args broken after array value
506  test_with_args(
507    R"({"foo": "bar", "args": [1,)",
508    R"({"foo":"bar","args":"[1,"})"
509  );
510  // Partial args broken on nested array
511  test_with_args(
512    R"({"foo": "bar", "args": {"arg1": [)",
513    R"({"foo":"bar","args":"{\"arg1\":["})"
514  );
515
516  // Unicode tests
517  test_with_args(
518    R"({"foo": "bar", "args": {"arg1": "\u)",
519    R"({"foo":"bar","args":"{\"arg1\":\"\\u"})"
520  );
521  test_with_args(
522    R"({"foo": "bar", "args": {"arg1": "\u0)",
523    R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})"
524  );
525  test_with_args(
526    R"({"foo": "bar", "args": {"arg1": "\u00)",
527    R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})"
528  );
529  test_with_args(
530    R"({"foo": "bar", "args": {"arg1": "\u000)",
531    R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})"
532  );
533  test_with_args(
534    R"({"foo": "bar", "args": {"arg1": "\u0000)",
535    R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})"
536  );
537  test_with_args(
538    R"({"foo": "bar", "args": {"arg1": "\ud8)",
539    R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})"
540  );
541  test_with_args(
542    R"({"foo": "bar", "args": {"arg1": "\ud80)",
543    R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})"
544  );
545  test_with_args(
546    R"({"foo": "bar", "args": {"arg1": "\ud800)",
547    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})"
548  );
549  test_with_args(
550    R"({"foo": "bar", "args": {"arg1": "\ud800\)",
551    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})"
552  );
553  test_with_args(
554    R"({"foo": "bar", "args": {"arg1": "\ud800\u)",
555    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})"
556  );
557  test_with_args(
558    R"({"foo": "bar", "args": {"arg1": "\ud800\ud)",
559    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})"
560  );
561  test_with_args(
562    R"({"foo": "bar", "args": {"arg1": "\ud800\udc)",
563    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})"
564  );
565  test_with_args(
566    R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)",
567    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})"
568  );
569  test_with_args(
570    R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)",
571    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})"
572  );
573}
574
575static void test_positions() {
576  {
577    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
578    assert_equals<size_t>(0, builder.pos());
579    assert_throws([&]() { builder.move_to(100); });
580    assert_equals<size_t>(0, builder.pos());
581    assert_throws([&]() { builder.move_back(1); });
582    assert_equals<size_t>(0, builder.pos());
583
584    builder.move_to(8);
585    assert_equals<size_t>(8, builder.pos());
586    builder.move_back(1);
587    assert_equals<size_t>(7, builder.pos());
588    assert_equals("world!", builder.consume_rest());
589
590    builder.move_to(0);
591    assert_equals<size_t>(0, builder.pos());
592
593    assert_throws([&]() { builder.finish(); });
594    assert_equals<size_t>(0, builder.pos());
595
596    builder.move_to(builder.input().size());
597    builder.finish();
598  }
599  {
600    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {});
601
602    builder.move_to(builder.input().size());
603    assert_equals<size_t>(builder.input().size(), builder.pos());
604    builder.finish();
605  }
606}
607
608int main() {
609    test_positions();
610    test_json_with_dumped_args_no_args();
611    test_json_with_dumped_args();
612    test_reasoning();
613    test_regex();
614    test_deepseek_v3_1_tool_calls();
615    std::cout << "All tests passed!\n";
616    return 0;
617}