llmnpc - llama.cpp/tests/test-chat-peg-parser.cpp

Path: llmnpc / llama.cpp / tests / test-chat-peg-parser.cpp (raw)
  1#include <string>
  2#include <iostream>
  3#include <numeric>
  4
  5#include "chat-parser.h"
  6#include "chat-peg-parser.h"
  7#include "chat.h"
  8#include "common.h"
  9#include "json-schema-to-grammar.h"
 10#include "peg-parser.h"
 11#include "testing.h"
 12#include "peg-parser/simple-tokenize.h"
 13#include "nlohmann/json.hpp"
 14
 15using json = nlohmann::ordered_json;
 16
 17static json create_tools();
 18static void test_example_native(testing & t);
 19static void test_example_qwen3_coder(testing & t);
 20static void test_command7_parser_compare(testing & t);
 21
 22int main(int argc, char *argv[]) {
 23    testing t(std::cout);
 24    if (argc >= 2) {
 25        t.set_filter(argv[1]);
 26    }
 27
 28    const char * verbose = getenv("LLAMA_TEST_VERBOSE");
 29    if (verbose) {
 30        t.verbose = std::string(verbose) == "1";
 31    }
 32
 33    t.test("native", test_example_native);
 34    t.test("qwen3 coder", test_example_qwen3_coder);
 35    t.test("comparison", test_command7_parser_compare);
 36
 37    return t.summary();
 38}
 39
 40static json create_tools() {
 41    json tools = json::array();
 42
 43    json tool_weather = {
 44        {"type", "function"},
 45        {"function", {
 46            {"name", "get_current_weather"},
 47            {"description", "Get the current weather in a given location"},
 48            {"parameters", {
 49                {"type", "object"},
 50                {"properties", {
 51                    {"location", {
 52                        {"type", "string"},
 53                        {"description", "The city and state, e.g. San Francisco, CA"}
 54                    }},
 55                    {"unit", {
 56                        {"type", "string"},
 57                        {"enum", {"celsius", "fahrenheit"}},
 58                        {"description", "The temperature unit to use. Infer this from the users location."}
 59                    }}
 60                }},
 61                {"required", {"location", "unit"}},
 62            }},
 63        }}
 64    };
 65    tools.push_back(tool_weather);
 66
 67    json tool_forecast = {
 68        {"type", "function"},
 69        {"function", {
 70            {"name", "get_forecast"},
 71            {"description", "Get the weather forecast for a given location"},
 72            {"parameters", {
 73                {"type", "object"},
 74                {"properties", {
 75                    {"location", {
 76                        {"type", "string"},
 77                        {"description", "The city and state, e.g. San Francisco, CA"}
 78                    }},
 79                    {"unit", {
 80                        {"type", "string"},
 81                        {"enum", {"celsius", "fahrenheit"}},
 82                        {"description", "The temperature unit to use. Infer this from the users location."}
 83                    }},
 84                    {"days", {
 85                        {"type", "integer"},
 86                        {"description", "Number of days to forecast (1-10)"},
 87                        {"minimum", 1},
 88                        {"maximum", 10}
 89                    }}
 90                }},
 91                {"required", {"location", "unit"}},
 92            }},
 93        }}
 94    };
 95    tools.push_back(tool_forecast);
 96
 97    json tool_search = {
 98        {"type", "function"},
 99        {"function", {
100            {"name", "search_knowledge_base"},
101            {"description", "Search the internal technical documentation knowledge base."},
102            {"parameters", {
103                {"type", "object"},
104                {"properties", {
105                    {"query", {
106                        {"type", "string"},
107                        {"description", "The search query string."}
108                    }},
109                    {"max_results", {
110                        {"type", "integer"},
111                        {"description", "The maximum number of results to return."},
112                        {"default", 5}
113                    }},
114                    {"category", {
115                        {"type", "string"},
116                        {"enum", {"api", "troubleshooting", "billing", "general"}},
117                        {"description", "Filter search by specific category."}
118                    }}
119                }},
120                {"required", {"query", "category"}},
121                {"additionalProperties", false}
122            }},
123            {"strict", true}
124        }}
125    };
126    tools.push_back(tool_search);
127
128    return tools;
129}
130
131struct tool_argument {
132    std::string name;
133    std::string type;
134    bool is_required;
135    json schema;
136};
137
138struct tool_definition {
139    std::string name;
140    std::vector<tool_argument> arguments;
141    json schema;
142};
143
144// Test fictitious model output that emits arguments as JSON.
145static void test_example_native(testing & t) {
146    struct test_case {
147        // Parameters
148        std::string name;
149        json tools;
150        common_chat_tool_choice tool_choice;
151        common_reasoning_format reasoning_format;
152        json json_schema;
153        bool parallel_tool_calls;
154        bool thinking_forced_open;
155        std::string input;
156
157        // Expect
158        std::string expect_reasoning;
159        std::string expect_content;
160        std::vector<common_chat_tool_call> expect_tool_calls;
161    };
162
163    auto build_parser = [](const test_case & tc) {
164        return build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
165            auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE);
166            auto reasoning = p.eps();
167            if (tc.thinking_forced_open) {
168                // If thinking is forced open, expect a closing tag
169                reasoning = p.reasoning(p.until("</think>")) + "</think>" + p.space();
170            } else {
171                // Otherwise, optionally accept thinking wrapped in tags
172                reasoning = p.optional("<think>" + p.reasoning(p.until("</think>")) + "</think>" + p.space());
173            }
174
175            // tool calling parser
176            if (tc.tools.is_array() && !tc.tools.empty()) {
177                auto tools = p.choice();
178                for (const auto & tool : tc.tools) {
179                    const auto & function = tool.at("function");
180                    std::string name = function.at("name");
181                    const auto & schema = function.at("parameters");
182
183                    auto tool_name = p.json_member("name", "\"" + p.tool_name(p.literal(name)) + "\"");
184                    auto tool_args = p.json_member("arguments", p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
185
186                    tools |= p.rule("tool-" + name, p.tool_open(p.literal("{")) << tool_name << "," << tool_args << "}");
187                };
188
189                auto parallel_calls = p.eps();
190                if (tc.parallel_tool_calls) {
191                    parallel_calls = p.zero_or_more("," << tools);
192                }
193
194                auto tool_call = p.trigger_rule("tool-call",
195                    p.sequence({
196                        p.literal("<tool_call>["),
197                        tools,
198                        parallel_calls,
199                        p.literal("]</tool_call>")
200                    })
201                );
202
203                return p.sequence({
204                    (reasoning_in_content ? p.eps() : reasoning),
205                    p.content(p.until("<tool_call>")),
206                    p.optional(p.space() + tool_call),
207                    p.space(),
208                    p.end()
209                });
210            }
211
212            // response_format parser
213            if (tc.json_schema.is_object() && !tc.json_schema.empty()) {
214                return p.sequence({
215                    (reasoning_in_content ? p.eps() : reasoning),
216                    p.content(p.schema(p.json(), "response-output", tc.json_schema)),
217                    p.space(),
218                    p.end()
219                });
220            }
221
222            // Content-only parser
223            return p.sequence({
224                (reasoning_in_content ? p.eps() : reasoning),
225                p.content(p.rest()),
226                p.end()
227            });
228        });
229    };
230
231    std::vector<test_case> test_cases = std::vector<test_case>{
232        {
233            /* .name =                 */ "content with thinking_forced_open = false",
234            /* .tools =                */ {},
235            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
236            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
237            /* .json_schema =          */ {},
238            /* .parallel_tool_calls =  */ false,
239            /* .thinking_forced_open = */ false,
240            /* .input =                */ (
241                "<think>The user said hello, I must say hello back</think>\nHello"
242            ),
243            /* .expect_reasoning =     */ "The user said hello, I must say hello back",
244            /* .expect_content =       */ "Hello",
245            /* .expect_tool_calls =    */ {},
246        },
247        {
248            /* .name =                 */ "content with thinking_forced_open = false and no reasoning",
249            /* .tools =                */ {},
250            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
251            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
252            /* .json_schema =          */ {},
253            /* .parallel_tool_calls =  */ false,
254            /* .thinking_forced_open = */ false,
255            /* .input =                */ (
256                "Hello"
257            ),
258            /* .expect_reasoning =     */ "",
259            /* .expect_content =       */ "Hello",
260            /* .expect_tool_calls =    */ {},
261        },
262        {
263            /* .name =                 */ "content with thinking_forced_open = false and reasoning_format = none",
264            /* .tools =                */ {},
265            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
266            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
267            /* .json_schema =          */ {},
268            /* .parallel_tool_calls =  */ false,
269            /* .thinking_forced_open = */ true,
270            /* .input =                */ (
271                "<think>The user said hello, I must say hello back</think>\nHello"
272            ),
273            /* .expect_reasoning =     */ "",
274            /* .expect_content =       */ "<think>The user said hello, I must say hello back</think>\nHello",
275            /* .expect_tool_calls =    */ {},
276        },
277        {
278            /* .name =                 */ "content with thinking_forced_open = true",
279            /* .tools =                */ {},
280            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
281            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
282            /* .json_schema =          */ {},
283            /* .parallel_tool_calls =  */ false,
284            /* .thinking_forced_open = */ true,
285            /* .input =                */ (
286                "The user said hello, I must say hello back</think>\nHello"
287            ),
288            /* .expect_reasoning =     */ "The user said hello, I must say hello back",
289            /* .expect_content =       */ "Hello",
290            /* .expect_tool_calls =    */ {},
291        },
292        {
293            /* .name =                 */ "content with thinking_forced_open = true and reasoning_format = none",
294            /* .tools =                */ {},
295            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
296            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
297            /* .json_schema =          */ {},
298            /* .parallel_tool_calls =  */ false,
299            /* .thinking_forced_open = */ true,
300            /* .input =                */ (
301                "The user said hello, I must say hello back</think>\nHello"
302            ),
303            /* .expect_reasoning =     */ "",
304            /* .expect_content =       */ "The user said hello, I must say hello back</think>\nHello",
305            /* .expect_tool_calls =    */ {},
306        },
307        {
308            /* .name =                 */ "tools with tool_choice = auto and no parallel_tool_calls",
309            /* .tools =                */ create_tools(),
310            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
311            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
312            /* .json_schema =          */ {},
313            /* .parallel_tool_calls =  */ false,
314            /* .thinking_forced_open = */ true,
315            /* .input =                */ (
316                "I must get the weather in New York</think>\n"
317                "<tool_call>["
318                R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
319                "]</tool_call>"
320            ),
321            /* .expect_reasoning =     */ "I must get the weather in New York",
322            /* .expect_content =       */ "",
323            /* .expect_tool_calls =    */ {{
324                /* .name =      */ "get_current_weather",
325                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
326                /* .id =        */ "",
327            }},
328        },
329        {
330            /* .name =                 */ "tools with tool_choice = auto and parallel_tool_calls",
331            /* .tools =                */ create_tools(),
332            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
333            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
334            /* .json_schema =          */ {},
335            /* .parallel_tool_calls =  */ true,
336            /* .thinking_forced_open = */ true,
337            /* .input =                */ (
338                "I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me search that for you."
339                "<tool_call>["
340                R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
341                ", "
342                R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
343                ", "
344                R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
345                ", "
346                R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
347                "]</tool_call>"
348            ),
349            /* .expect_reasoning =     */ "I must get the weather in New York and San Francisco and a 3 day forecast of each.",
350            /* .expect_content =       */ "Let me search that for you.",
351            /* .expect_tool_calls =    */ {{
352                /* .name =      */ "get_current_weather",
353                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
354                /* .id =        */ "",
355            }, {
356                /* .name =      */ "get_current_weather",
357                /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
358                /* .id =        */ "",
359            }, {
360                /* .name =      */ "get_forecast",
361                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
362                /* .id =        */ "",
363            }, {
364                /* .name =      */ "get_forecast",
365                /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
366                /* .id =        */ "",
367            }},
368        },
369        {
370            /* .name =                 */ "response_format with thinking_forced_open = true",
371            /* .tools =                */ {},
372            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
373            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
374            /* .json_schema =          */ {
375                {"type", "object"},
376                {"properties", {
377                    {"invoice_number", {{"type", "string"}}},
378                    {"amount", {{"type", "number"}}},
379                    {"due_date", {{"type", "string"}}}
380                }},
381                {"required", {"invoice_number", "amount", "due_date"}}
382            },
383            /* .parallel_tool_calls =  */ false,
384            /* .thinking_forced_open = */ true,
385            /* .input =                */ (
386                "I must produce the invoice in the requested format</think>\n"
387                R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"
388            ),
389            /* .expect_reasoning =     */ "I must produce the invoice in the requested format",
390            /* .expect_content =       */ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})",
391            /* .expect_tool_calls =    */ {},
392        },
393    };
394
395    for (const auto & tc : test_cases) {
396        t.test(tc.name, [&](testing & t) {
397            auto parser = build_parser(tc);
398            auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
399            auto grammar = build_grammar([&](const common_grammar_builder & builder) {
400                for (auto const & def : tc.tools) {
401                    auto function = def.at("function");
402                    auto parameters = function.at("parameters");
403                    builder.resolve_refs(parameters);
404                };
405                parser.build_grammar(builder, lazy);
406            });
407
408            t.log("Grammar:");
409            for (auto const & line : string_split(grammar, "\n")) {
410                t.log(line);
411            }
412
413            common_peg_parse_context ctx(tc.input, false);
414            auto result = parser.parse(ctx);
415
416            t.assert_true("success", result.success());
417
418            common_chat_msg msg;
419            auto mapper = common_chat_peg_native_mapper(msg);
420            mapper.from_ast(ctx.ast, result);
421
422            t.assert_equal("content equal", tc.expect_content, msg.content);
423            t.assert_equal("reasoning equal", tc.expect_reasoning, msg.reasoning_content);
424            t.assert_equal("number of tool calls", tc.expect_tool_calls.size(), msg.tool_calls.size());
425            for (auto i = 0u; i < std::min(tc.expect_tool_calls.size(), msg.tool_calls.size()); i++) {
426                t.assert_equal("tool name", tc.expect_tool_calls[i].name, msg.tool_calls[i].name);
427                t.assert_equal("tool args", tc.expect_tool_calls[i].arguments, msg.tool_calls[i].arguments);
428            }
429        });
430    }
431}
432
433static void test_example_qwen3_coder(testing & t) {
434    auto tools = create_tools();
435    auto parser = build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
436        auto content = p.rule("content", p.content(p.until("<tool_call>")));
437
438        std::vector<common_peg_parser> tool_parsers;
439        for (auto const & def : tools) {
440            auto function = def.at("function");
441            std::string name = function.at("name");
442            auto parameters = function.at("parameters");
443            auto properties = parameters.at("properties");
444
445            std::set<std::string> required_properties;
446            if (function.contains("required")) {
447                function.at("required").get_to(required_properties);
448            }
449
450            std::vector<common_peg_parser> arg_parsers;
451            for (const auto & [param_name, param_schema] : properties.items()) {
452                bool is_required = required_properties.find(param_name) != required_properties.end();
453                auto type = param_schema.value("type", "object");
454
455                auto arg = p.tool_arg(p.sequence({
456                    p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
457                    (type == "string" ?
458                        p.tool_arg_string_value(
459                            p.schema(
460                                p.until_one_of({
461                                    "</parameter>\n<parameter=",
462                                    "</parameter>\n</function>"
463                                }),
464                                "tool-" + name + "-arg-" + param_name + "-schema",
465                                param_schema,
466                                true
467                            )
468                        ) : p.tool_arg_json_value(
469                            p.schema(
470                                p.json(),
471                                "tool-" + name + "-arg-" + param_name + "-schema",
472                                param_schema
473                            )
474                        )
475                    ),
476                    p.tool_arg_close(
477                        "</parameter>\n" +
478                        p.peek(p.literal("<parameter=") | p.literal("</function>"))
479                    )
480                }));
481
482                arg_parsers.push_back(is_required ?
483                    p.rule("tool-" + name + "-arg-" + param_name, arg) :
484                    p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
485            }
486
487            tool_parsers.push_back(p.rule("tool-" + name,
488                p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
489                << p.sequence(arg_parsers)
490                << p.tool_close(p.literal("</function>"))
491            ));
492        };
493
494        auto tool_call = p.trigger_rule("tool-call",
495            "<tool_call>"
496            << p.choice(tool_parsers)
497            << "</tool_call>"
498        );
499
500        return content + p.zero_or_more(p.space() + tool_call) + p.end();
501    });
502
503    auto grammar = build_grammar([&](const common_grammar_builder & builder) {
504        for (auto const & def : tools) {
505            auto function = def.at("function");
506            auto parameters = function.at("parameters");
507            builder.resolve_refs(parameters);
508        };
509        parser.build_grammar(builder);
510    });
511
512    t.log("Grammar:");
513    for (auto const & line : string_split(grammar, "\n")) {
514        t.log(line);
515    }
516
517    t.test("incremental parsing", [&](testing &t) {
518        std::string input =
519            "Let me search the knowledge base for cat pictures."
520            "<tool_call>\n"
521            "<function=search_knowledge_base>\n"
522            "<parameter=query>cat pictures</parameter>\n"
523            "<parameter=category>general</parameter>\n"
524            "</function>\n"
525            "</tool_call>";
526
527        std::vector<std::string> tokens = simple_tokenize(input);
528
529        common_chat_msg prev;
530        for (auto it = tokens.begin(); it != tokens.end(); it++) {
531            std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
532
533            common_peg_parse_context ctx(in, it + 1 < tokens.end());
534
535            auto result = parser.parse(ctx);
536            if (!t.assert_equal("not fail", false, result.fail())) {
537                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
538            }
539
540            common_chat_msg msg;
541            auto mapper = common_chat_peg_constructed_mapper(msg);
542            mapper.from_ast(ctx.ast, result);
543
544            //t.log("Input: " + input);
545            t.log("===========================================");
546            t.log("Iteration " + std::to_string(in.size()));
547            t.log("Reasoning: " + msg.reasoning_content);
548            t.log("Content  : " + msg.content);
549            for (const auto & tc : msg.tool_calls) {
550                t.log("Tool name: " + tc.name);
551                t.log("Tool args: " + tc.arguments);
552            }
553
554            try {
555                // This shouldn't emit any runtime errors
556                auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
557            } catch(const std::exception & e) {
558                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
559                t.assert_true(std::string("failed with ") + e.what(), false);
560            }
561
562            prev = msg;
563        }
564    });
565}
566
567void test_command7_parser_compare(testing & t) {
568    auto parser = build_chat_peg_native_parser([](common_chat_peg_native_builder & p) {
569        auto thinking = p.reasoning_block(
570            "<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
571
572        auto response = "<|START_RESPONSE|>" << p.content(p.until("<|END_RESPONSE|>")) << "<|END_RESPONSE|>";
573
574        auto tool_call_id = p.atomic("\"tool_call_id\"" << (":" << ("\"" + p.tool_id(p.json_string_content()) + "\"")));
575        auto tool_call_name = p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
576        auto tool_call_args = "\"parameters\"" << (":" << p.tool_args(p.json()));
577
578        auto tool_call_fields = p.rule("tool-call-fields", tool_call_id | tool_call_name | tool_call_args);
579        auto tool_call = p.rule("tool-call", p.tool(
580            p.tool_open(p.literal("{"))
581            << tool_call_fields
582            << p.zero_or_more( p.literal(",") << tool_call_fields)
583            << p.tool_close(p.literal("}"))
584        ));
585
586        auto tool_calls = p.rule("tool-calls",
587            "<|START_ACTION|>"
588            << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
589            << "<|END_ACTION|>");
590
591        return p.optional(thinking) << (tool_calls | response) + p.end();
592    });
593
594    auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, bool print_results) {
595        common_peg_parse_context ctx(input, is_partial);
596        auto result = p.parse(ctx);
597
598        common_chat_msg msg;
599        auto mapper = common_chat_peg_native_mapper(msg);
600        mapper.from_ast(ctx.ast, result);
601
602        if (print_results) {
603            std::cout << "== Parsed (new) ==\n";
604            std::cout << "=== Reasoning ===\n";
605            std::cout << msg.reasoning_content << "\n";
606            std::cout << "\n\n=== Content ===\n";
607            std::cout << msg.content << "\n";
608            std::cout << "\n\n=== Tool Calls ===\n";
609            for (const auto & tc : msg.tool_calls) {
610                std::cout << "id: " << tc.id << "\n";
611                std::cout << "name: " << tc.name << "\n";
612                std::cout << "args: " << tc.arguments << "\n";
613            }
614        }
615    };
616
617    auto test_legacy = [&](const std::string & input, bool need_more_input, bool print_results) {
618        // Original common_chat_combinator_parser taken from chat.cpp
619        common_chat_parser_params params;
620        params.format = COMMON_CHAT_FORMAT_GENERIC;
621        params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
622        params.reasoning_in_content = false;
623        params.thinking_forced_open = false;
624        common_chat_msg_parser builder(
625            input,
626            /* .is_partial = */ need_more_input,
627            params
628        );
629
630        builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
631
632        static const common_regex start_action_regex("<\\|START_ACTION\\|>");
633        static const common_regex end_action_regex("<\\|END_ACTION\\|>");
634        static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
635        static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
636
637        if (auto res = builder.try_find_regex(start_action_regex)) {
638            // If we didn't extract thoughts, prelude includes them.
639            auto tool_calls = builder.consume_json_with_dumped_args({ { "parameters" } });
640            for (const auto & tool_call : tool_calls.value) {
641                std::string name      = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
642                std::string id        = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
643                std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
644                if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
645                    throw common_chat_msg_partial_exception("incomplete tool call");
646                }
647            }
648            if (tool_calls.is_partial) {
649                throw common_chat_msg_partial_exception("incomplete tool call");
650            }
651            builder.consume_regex(end_action_regex);
652        } else if (auto res = builder.try_find_regex(start_response_regex)) {
653            if (!builder.try_find_regex(end_response_regex)) {
654                builder.add_content(builder.consume_rest());
655                throw common_chat_msg_partial_exception(end_response_regex.str());
656            }
657        } else {
658            builder.add_content(builder.consume_rest());
659        }
660
661        if (print_results) {
662            std::cout << "== Parsed (legacy) ==\n";
663            std::cout << "=== Reasoning ===\n";
664            std::cout << builder.result().reasoning_content << "\n";
665            std::cout << "\n\n=== Content ===\n";
666            std::cout << builder.result().content << "\n";
667            std::cout << "\n\n=== Tool Calls ===\n";
668            for (const auto & tc : builder.result().tool_calls) {
669                std::cout << "id: " << tc.id << "\n";
670                std::cout << "name: " << tc.name << "\n";
671                std::cout << "args: " << tc.arguments << "\n";
672            }
673        }
674    };
675
676    std::string reasoning = "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
677            "budget of $4000 for a two-week stay, we need to:\n\n"
678            "1. Identify key historical sites and modern attractions in Japan.\n"
679            "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
680            "3. Determine the best modes of transportation for getting around Japan.\n"
681            "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
682            "overspending.\n"
683            "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
684            "to attractions.";
685
686    std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {{
687        "call_0",
688        "plan_trip",
689        nlohmann::json::parse(R"({
690            "destination": "Japan",
691            "duration": 14,
692            "budget": 4000,
693            "interests": ["historical sites", "modern attractions"],
694            "accommodation_preferences": "affordable",
695            "transportation_preferences": "efficient",
696            "meal_preferences": "local cuisine"
697        })")
698    }};
699
700    std::vector<std::string> tokens;
701
702    // Build tokens
703    if (!reasoning.empty()) {
704        auto tokenized = simple_tokenize(reasoning);
705        tokens.emplace_back("<|START_THINKING|>");
706        tokens.insert(tokens.end(), tokenized.begin(), tokenized.end());
707        tokens.emplace_back("<|END_THINKING|>");
708    }
709
710    if (!tool_calls.empty()) {
711        tokens.emplace_back("<|START_ACTION|>");
712
713        auto json = nlohmann::json::array();
714        for (const auto & tc : tool_calls) {
715            auto tc_json = nlohmann::json::object();
716            tc_json["tool_call_id"] = std::get<0>(tc);
717            tc_json["tool_name"] = std::get<1>(tc);
718            tc_json["parameters"] = std::get<2>(tc);
719            json.push_back(tc_json);
720        }
721
722        auto tokenized = simple_tokenize(json.dump(-1, ' ', true));
723        tokens.insert(tokens.end(), tokenized.begin(), tokenized.end());
724
725        tokens.emplace_back("<|END_ACTION|>");
726    }
727
728    std::string input = std::accumulate(tokens.begin(), tokens.end(), std::string());
729
730    // Run tests
731    t.test("legacy_parse", [&](testing & /* t */) {
732        test_legacy(input, false, false);
733    });
734
735    t.test("current_parse", [&](testing & /* t */) {
736        test_current(parser, input, false, false);
737    });
738
739    // Run benchmarks
740    t.bench("legacy_parse_benchmark complete", [&]() {
741        test_legacy(input, false, false);
742    });
743
744    t.bench("legacy_parse_benchmark incremental", [&]() {
745        std::string in;
746        for (auto i = 0u; i < tokens.size(); i++) {
747            in += tokens[i];
748
749            try {
750                test_legacy(in, i + 1 < tokens.size(), false);
751            } catch (common_chat_msg_partial_exception & /* e */) {
752                // Do nothing, this is expected
753            }
754        }
755    }, 20);
756
757    t.bench("current_parse_benchmark complete", [&]() {
758        test_current(parser, input, false, false);
759    }, 100);
760
761    t.bench("current_parse_benchmark incremental", [&]() {
762        std::string in;
763        for (auto i = 0u; i < tokens.size(); i++) {
764            in += tokens[i];
765            test_current(parser, in, i + 1 < tokens.size(), false);
766        }
767    }, 20);
768}