1#include "chat-parser.h"
2#include "chat-peg-parser.h"
3#include "common.h"
4#include "log.h"
5#include "peg-parser.h"
6#include "regex-partial.h"
7
8#include <algorithm>
9#include <cctype>
10#include <optional>
11#include <stdexcept>
12#include <string>
13#include <string_view>
14#include <vector>
15
16using json = nlohmann::ordered_json;
17
18static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder,
19 const common_regex & prefix,
20 size_t rstrip_prefix = 0) {
21 static const std::vector<std::vector<std::string>> args_paths = { { "arguments" } };
22 if (auto res = builder.try_find_regex(prefix)) {
23 builder.move_back(rstrip_prefix);
24 auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
25 if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
26 throw common_chat_msg_partial_exception("incomplete tool call array");
27 }
28 } else {
29 builder.add_content(builder.consume_rest());
30 }
31}
32
33static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
34 std::string arguments;
35 if (builder.is_partial()) {
36 arguments = (json{
37 { "code", code + builder.healing_marker() }
38 })
39 .dump();
40 auto idx = arguments.find(builder.healing_marker());
41 if (idx != std::string::npos) {
42 arguments.resize(idx);
43 }
44 } else {
45 arguments = (json{
46 { "code", code }
47 })
48 .dump();
49 }
50 return arguments;
51}
52
53/**
54 * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
55 * Aggregates the prefix, suffix and in-between text into the content.
56 */
57static void parse_json_tool_calls(
58 common_chat_msg_parser & builder,
59 const std::optional<common_regex> & block_open,
60 const std::optional<common_regex> & function_regex_start_only,
61 const std::optional<common_regex> & function_regex,
62 const common_regex & close_regex,
63 const std::optional<common_regex> & block_close,
64 bool allow_raw_python = false,
65 const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name =
66 nullptr) {
67 auto parse_tool_calls = [&]() {
68 size_t from = std::string::npos;
69 auto first = true;
70 while (true) {
71 auto start_pos = builder.pos();
72 auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) :
73 function_regex ? builder.try_find_regex(*function_regex, from) :
74 std::nullopt;
75
76 if (res) {
77 std::string name;
78 if (get_function_name) {
79 name = get_function_name(*res);
80 } else {
81 GGML_ASSERT(res->groups.size() == 2);
82 name = builder.str(res->groups[1]);
83 }
84 first = false;
85 if (name.empty()) {
86 // get_function_name signalled us that we should skip this match and treat it as content.
87 from = res->groups[0].begin + 1;
88 continue;
89 }
90 from = std::string::npos;
91
92 auto maybe_raw_python = name == "python" && allow_raw_python;
93 if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
94 if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) {
95 if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
96 throw common_chat_msg_partial_exception("incomplete tool call");
97 }
98 builder.consume_regex(close_regex);
99 }
100 continue;
101 }
102 if (maybe_raw_python) {
103 auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
104 if (!builder.add_tool_call(name, "", arguments)) {
105 throw common_chat_msg_partial_exception("incomplete tool call");
106 }
107 return;
108 }
109 throw common_chat_msg_partial_exception("incomplete tool call");
110 } else {
111 builder.move_to(start_pos);
112 }
113 break;
114 }
115 if (block_close) {
116 builder.consume_regex(*block_close);
117 }
118 builder.consume_spaces();
119 builder.add_content(builder.consume_rest());
120 };
121 if (block_open) {
122 if (auto res = builder.try_find_regex(*block_open)) {
123 parse_tool_calls();
124 } else {
125 builder.add_content(builder.consume_rest());
126 }
127 } else {
128 parse_tool_calls();
129 }
130}
131
132common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
133 : input_(input), is_partial_(is_partial), syntax_(syntax)
134{
135 result_.role = "assistant";
136
137 while (true) {
138 std::string id = std::to_string(std::rand());
139 if (input.find(id) == std::string::npos) {
140 healing_marker_ = id;
141 break;
142 }
143 }
144}
145
146std::string common_chat_msg_parser::str(const common_string_range & rng) const {
147 GGML_ASSERT(rng.begin <= rng.end);
148 return input_.substr(rng.begin, rng.end - rng.begin);
149}
150
151void common_chat_msg_parser::add_content(const std::string &content) {
152 result_.content += content;
153}
154
155void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) {
156 result_.reasoning_content += reasoning_content;
157}
158
159bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) {
160 if (name.empty()) {
161 return false;
162 }
163
164 common_chat_tool_call tool_call;
165 tool_call.name = name;
166 tool_call.arguments = arguments;
167 tool_call.id = id;
168
169 // LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
170 result_.tool_calls.emplace_back(tool_call);
171
172 return true;
173}
174bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
175 std::string name = tool_call.contains("name") ? tool_call.at("name") : "";
176 std::string id = tool_call.contains("id") ? tool_call.at("id") : "";
177 std::string arguments = "";
178 if (tool_call.contains("arguments")) {
179 if (tool_call.at("arguments").is_object()) {
180 arguments = tool_call.at("arguments").dump();
181 } else {
182 arguments = tool_call.at("arguments");
183 }
184 }
185
186 return add_tool_call(name, id, arguments);
187}
188
189bool common_chat_msg_parser::add_tool_calls(const json & arr) {
190 for (const auto & item : arr) {
191 if (!add_tool_call(item)) {
192 return false;
193 }
194 }
195 return true;
196}
197
198bool common_chat_msg_parser::add_tool_call_short_form(const json & tool_call) {
199 if (!tool_call.is_object() || tool_call.size() != 1) {
200 return false;
201 }
202
203 // Get the tool name (the single key in the object)
204 auto it = tool_call.begin();
205 std::string name = it.key();
206
207 if (name.empty()) {
208 return false;
209 }
210
211 // Get the arguments (the nested object)
212 const json & args_json = it.value();
213 std::string arguments = "";
214
215 if (args_json.is_object()) {
216 arguments = args_json.dump();
217 } else if (args_json.is_string()) {
218 arguments = args_json;
219 } else if (!args_json.is_null()) {
220 // For other types, convert to string representation
221 arguments = args_json.dump();
222 }
223
224 return add_tool_call(name, "", arguments);
225}
226void common_chat_msg_parser::finish() {
227 if (!is_partial_ && pos_ != input_.size()) {
228 throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));
229 }
230}
231
232bool common_chat_msg_parser::consume_spaces() {
233 const auto length = input_.size();
234 auto consumed = false;
235 while (pos_ < length && std::isspace(input_[pos_])) {
236 ++pos_;
237 consumed = true;
238 }
239 return consumed;
240}
241
242bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
243 auto pos = pos_;
244 for (auto i = 0u; i < literal.size(); ++i) {
245 if (pos >= input_.size()) {
246 return false;
247 }
248 if (input_[pos] != literal[i]) {
249 return false;
250 }
251 ++pos;
252 }
253 pos_ = pos;
254 return true;
255}
256
257std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_literal(const std::string & literal) {
258 auto idx = input_.find(literal, pos_);
259 if (idx != std::string::npos) {
260 find_regex_result res;
261 res.prelude = input_.substr(pos_, idx - pos_);
262 auto end = idx + literal.size();
263 res.groups.emplace_back(common_string_range{idx, end});
264 move_to(end);
265 return res;
266 }
267 if (is_partial_) {
268 idx = string_find_partial_stop(input_, literal);
269 if (idx != std::string::npos && idx >= pos_) {
270 find_regex_result res;
271 res.prelude = input_.substr(pos_, idx - pos_);
272 auto end = input_.size();
273 res.groups.emplace_back(common_string_range{idx, end});
274 move_to(end);
275 return res;
276 }
277 }
278 return std::nullopt;
279}
280
281void common_chat_msg_parser::consume_literal(const std::string & literal) {
282 if (!try_consume_literal(literal)) {
283 throw common_chat_msg_partial_exception(literal);
284 }
285}
286
287bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
288 std::string pending_reasoning_prefix;
289
290 if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) {
291 return false;
292 }
293
294 auto set_reasoning_prefix = [&](size_t prefix_pos) {
295 if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content) {
296 return;
297 }
298 if (prefix_pos + start_think.size() > input_.size()) {
299 pending_reasoning_prefix.clear();
300 return;
301 }
302 // Capture the exact literal that opened the reasoning section so we can
303 // surface it back to callers. This ensures formats that force the
304 // reasoning tag open (e.g. DeepSeek R1) retain their original prefix
305 // instead of dropping it during parsing.
306 pending_reasoning_prefix = input_.substr(prefix_pos, start_think.size());
307 };
308
309 auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
310 auto stripped_reasoning = string_strip(reasoning);
311 if (stripped_reasoning.empty()) {
312 return;
313 }
314 if (syntax_.reasoning_in_content) {
315 add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "<think>" : start_think);
316 add_content(stripped_reasoning);
317 if (closed) {
318 add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
319 }
320 } else {
321 if (!pending_reasoning_prefix.empty()) {
322 add_reasoning_content(pending_reasoning_prefix);
323 pending_reasoning_prefix.clear();
324 }
325 add_reasoning_content(stripped_reasoning);
326 }
327 };
328
329 const size_t saved_pos = pos_;
330 const size_t saved_content_size = result_.content.size();
331 const size_t saved_reasoning_size = result_.reasoning_content.size();
332
333 auto restore_state = [&]() {
334 move_to(saved_pos);
335 result_.content.resize(saved_content_size);
336 result_.reasoning_content.resize(saved_reasoning_size);
337 };
338
339 // Allow leading whitespace to be preserved as content when reasoning is present at the start
340 size_t cursor = pos_;
341 size_t whitespace_end = cursor;
342 while (whitespace_end < input_.size() && std::isspace(static_cast<unsigned char>(input_[whitespace_end]))) {
343 ++whitespace_end;
344 }
345
346 if (whitespace_end >= input_.size()) {
347 restore_state();
348 if (syntax_.thinking_forced_open) {
349 auto rest = input_.substr(saved_pos);
350 if (!rest.empty()) {
351 handle_reasoning(rest, /* closed */ !is_partial());
352 }
353 move_to(input_.size());
354 return true;
355 }
356 return false;
357 }
358
359 cursor = whitespace_end;
360 const size_t remaining = input_.size() - cursor;
361 const size_t start_prefix = std::min(start_think.size(), remaining);
362 const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0;
363
364 if (has_start_tag && start_prefix < start_think.size()) {
365 move_to(input_.size());
366 return true;
367 }
368
369 if (has_start_tag) {
370 if (whitespace_end > pos_) {
371 add_content(input_.substr(pos_, whitespace_end - pos_));
372 }
373 set_reasoning_prefix(cursor);
374 cursor += start_think.size();
375 } else if (syntax_.thinking_forced_open) {
376 cursor = whitespace_end;
377 } else {
378 restore_state();
379 return false;
380 }
381 while (true) {
382 if (cursor >= input_.size()) {
383 move_to(input_.size());
384 return true;
385 }
386
387 size_t end_pos = input_.find(end_think, cursor);
388 if (end_pos == std::string::npos) {
389 std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor);
390 size_t partial_off = string_find_partial_stop(remaining_view, end_think);
391 size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off;
392 if (reasoning_end > cursor) {
393 handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial());
394 }
395 move_to(input_.size());
396 return true;
397 }
398
399 if (end_pos > cursor) {
400 handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true);
401 } else {
402 handle_reasoning("", /* closed */ true);
403 }
404
405 cursor = end_pos + end_think.size();
406
407 while (cursor < input_.size() && std::isspace(static_cast<unsigned char>(input_[cursor]))) {
408 ++cursor;
409 }
410
411 const size_t next_remaining = input_.size() - cursor;
412 if (next_remaining == 0) {
413 move_to(cursor);
414 return true;
415 }
416
417 const size_t next_prefix = std::min(start_think.size(), next_remaining);
418 if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) {
419 if (next_prefix < start_think.size()) {
420 move_to(input_.size());
421 return true;
422 }
423 set_reasoning_prefix(cursor);
424 cursor += start_think.size();
425 continue;
426 }
427
428 move_to(cursor);
429 return true;
430 }
431}
432
433std::string common_chat_msg_parser::consume_rest() {
434 auto rest = input_.substr(pos_);
435 pos_ = input_.size();
436 return rest;
437}
438
439// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback.
440std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) {
441 auto m = regex.search(input_, from == std::string::npos ? pos_ : from);
442 if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
443 return std::nullopt;
444 }
445 auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
446 pos_ = m.groups[0].end;
447
448 if (add_prelude_to_content) {
449 add_content(prelude);
450 }
451 if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
452 if (is_partial()) {
453 throw common_chat_msg_partial_exception(regex.str());
454 }
455 return std::nullopt;
456 }
457 return find_regex_result{prelude, m.groups};
458}
459
460common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) {
461 if (auto result = try_consume_regex(regex)) {
462 return *result;
463 }
464 throw common_chat_msg_partial_exception(regex.str());
465}
466
467std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_consume_regex(const common_regex & regex) {
468 auto m = regex.search(input_, pos_);
469 if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
470 return std::nullopt;
471 }
472 if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
473 if (is_partial()) {
474 throw common_chat_msg_partial_exception(regex.str());
475 }
476 return std::nullopt;
477 }
478 if (m.groups[0].begin != pos_) {
479 // Didn't match at the current position.
480 return std::nullopt;
481 }
482 pos_ = m.groups[0].end;
483
484 return find_regex_result {
485 /* .prelude = */ "",
486 m.groups,
487 };
488}
489
490std::optional<common_json> common_chat_msg_parser::try_consume_json() {
491 auto it = input_.cbegin() + pos_;
492 const auto end = input_.cend();
493 common_json result;
494 if (!common_json_parse(it, end, healing_marker_, result)) {
495 return std::nullopt;
496 }
497 pos_ = std::distance(input_.cbegin(), it);
498 if (result.healing_marker.marker.empty()) {
499 // No healing marker, just return the parsed json
500 return result;
501 }
502 if (!is_partial()) {
503 throw common_chat_msg_partial_exception("JSON");
504 }
505 return result;
506}
507
508common_json common_chat_msg_parser::consume_json() {
509 if (auto result = try_consume_json()) {
510 return *result;
511 }
512 throw common_chat_msg_partial_exception("JSON");
513}
514
515common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args(
516 const std::vector<std::vector<std::string>> & args_paths,
517 const std::vector<std::vector<std::string>> & content_paths
518) {
519 if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) {
520 return *result;
521 }
522 throw common_chat_msg_partial_exception("JSON");
523}
524
525std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parser::try_consume_json_with_dumped_args(
526 const std::vector<std::vector<std::string>> & args_paths,
527 const std::vector<std::vector<std::string>> & content_paths
528) {
529 auto partial = try_consume_json();
530 if (!partial) {
531 return std::nullopt;
532 }
533 auto is_arguments_path = [&](const std::vector<std::string> & path) {
534 return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end();
535 };
536 auto is_content_path = [&](const std::vector<std::string> & path) {
537 return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end();
538 };
539
540 if (partial->healing_marker.marker.empty()) {
541 if (args_paths.empty()) {
542 // No arguments to dump, and JSON was parsed fully.
543 return consume_json_result {
544 partial->json,
545 /* .is_partial = */ false,
546 };
547 }
548 if (is_arguments_path({})) {
549 // Entire JSON is the arguments and was parsed fully.
550 return consume_json_result {
551 partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
552 /* .is_partial = */ false,
553 };
554 }
555 }
556
557 LOG_DBG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
558
559 auto found_healing_marker = false;
560 std::vector<std::string> path;
561 std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
562 if (is_arguments_path(path)) {
563 auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
564 if (is_partial() && !partial->healing_marker.marker.empty()) {
565 auto idx = arguments.find(partial->healing_marker.json_dump_marker);
566 if (idx != std::string::npos) {
567 arguments.resize(idx);
568 found_healing_marker = true;
569 }
570 if (arguments == "\"") {
571 // This happens because of completing `:"$magic` after `"arguments"`
572 arguments = "";
573 }
574 }
575 return arguments;
576 }
577 if (is_content_path(path)) {
578 if (!j.is_string()) {
579 throw std::runtime_error("Content path must be a string");
580 }
581 std::string str = j;
582 auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string
583 if (idx != std::string::npos) {
584 str.resize(idx);
585 found_healing_marker = true;
586 }
587 return str;
588 }
589 if (j.is_object()) {
590 auto obj = json::object();
591 for (const auto & p : j.items()) {
592 const auto & key = p.key();
593 const auto & value = p.value();
594 const std::string key_str = key; // NOLINT
595 auto idx = key_str.find(healing_marker_);
596 if (idx != std::string::npos) {
597 found_healing_marker = true;
598 break;
599 }
600 path.push_back(key_str);
601 if (value.is_string()) {
602 const std::string value_str = value;
603 if (value_str.find(healing_marker_) != std::string::npos) {
604 found_healing_marker = true;
605 if (is_content_path(path)) {
606 if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) {
607 // The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair.
608 obj[key] = remove_unsupported_healings_and_dump_args(value);
609 }
610 }
611 break;
612 }
613 obj[key] = value;
614 } else {
615 obj[key] = remove_unsupported_healings_and_dump_args(value);
616 }
617 path.pop_back();
618 }
619 return obj;
620 }
621 if (j.is_array()) {
622 auto arr = json::array();
623 for (const auto & value : j) {
624 if (value.is_string()) {
625 std::string str = value;
626 auto idx = str.find(healing_marker_);
627 if (idx != std::string::npos) {
628 // Don't heal array values that aren't in the arguments.
629 found_healing_marker = true;
630 break;
631 }
632 }
633 arr.push_back(remove_unsupported_healings_and_dump_args(value));
634 }
635 return arr;
636 }
637 return j;
638 };
639
640 auto cleaned = remove_unsupported_healings_and_dump_args(partial->json);
641 LOG_DBG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
642 return consume_json_result {
643 cleaned,
644 /* .is_partial = */ found_healing_marker,
645 };
646}
647
648void common_chat_msg_parser::clear_tools() {
649 result_.tool_calls.clear();
650}
651
652/**
653 * All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below
654 * to reduce incremental compile time for parser changes.
655 */
656static void common_chat_parse_generic(common_chat_msg_parser & builder) {
657 if (!builder.syntax().parse_tool_calls) {
658 builder.add_content(builder.consume_rest());
659 return;
660 }
661 static const std::vector<std::vector<std::string>> content_paths = {
662 {"response"},
663 };
664 static const std::vector<std::vector<std::string>> args_paths = {
665 {"tool_call", "arguments"},
666 {"tool_calls", "arguments"},
667 };
668 auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
669 if (data.value.contains("tool_calls")) {
670 if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
671 throw common_chat_msg_partial_exception("incomplete tool calls");
672 }
673 } else if (data.value.contains("tool_call")) {
674 if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
675 throw common_chat_msg_partial_exception("incomplete tool call");
676 }
677 } else if (data.value.contains("response")) {
678 const auto & response = data.value.at("response");
679 builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
680 if (data.is_partial) {
681 throw common_chat_msg_partial_exception("incomplete response");
682 }
683 } else {
684 throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
685 }
686}
687
688static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
689 if (!builder.syntax().parse_tool_calls) {
690 builder.add_content(builder.consume_rest());
691 return;
692 }
693
694 static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
695 parse_prefixed_json_tool_call_array(builder, prefix);
696}
697
698static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
699 builder.try_parse_reasoning("[THINK]", "[/THINK]");
700
701 if (!builder.syntax().parse_tool_calls) {
702 builder.add_content(builder.consume_rest());
703 return;
704 }
705
706 static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
707 parse_prefixed_json_tool_call_array(builder, prefix);
708}
709
710static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
711 builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
712
713 static const common_regex start_action_regex("<\\|START_ACTION\\|>");
714 static const common_regex end_action_regex("<\\|END_ACTION\\|>");
715 static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
716 static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
717
718 if (auto res = builder.try_find_regex(start_action_regex)) {
719 // If we didn't extract thoughts, prelude includes them.
720 auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
721 for (const auto & tool_call : tool_calls.value) {
722 std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
723 std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
724 std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
725 if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
726 throw common_chat_msg_partial_exception("incomplete tool call");
727 }
728 }
729 if (tool_calls.is_partial) {
730 throw common_chat_msg_partial_exception("incomplete tool call");
731 }
732 builder.consume_regex(end_action_regex);
733 } else if (auto res = builder.try_find_regex(start_response_regex)) {
734 if (!builder.try_find_regex(end_response_regex)) {
735 builder.add_content(builder.consume_rest());
736 throw common_chat_msg_partial_exception(end_response_regex.str());
737 }
738 } else {
739 builder.add_content(builder.consume_rest());
740 }
741}
742
743static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
744 builder.try_parse_reasoning("<think>", "</think>");
745
746 if (!builder.syntax().parse_tool_calls) {
747 builder.add_content(builder.consume_rest());
748 return;
749 }
750
751 static const common_regex function_regex(
752 "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
753 static const common_regex close_regex("\\}\\s*");
754
755 static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
756 static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
757
758 if (with_builtin_tools) {
759 static const common_regex builtin_call_regex("<\\|python_tag\\|>");
760 if (auto res = builder.try_find_regex(builtin_call_regex)) {
761 auto fun_res = builder.consume_regex(function_name_regex);
762 auto function_name = builder.str(fun_res.groups[1]);
763
764 common_healing_marker healing_marker;
765 json args = json::object();
766 while (true) {
767 if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
768 auto arg_name = builder.str(arg_res->groups[1]);
769 auto partial = builder.consume_json();
770 args[arg_name] = partial.json;
771 healing_marker.marker = partial.healing_marker.marker;
772 healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
773 builder.consume_spaces();
774 if (!builder.try_consume_literal(",")) {
775 break;
776 }
777 } else {
778 break;
779 }
780 }
781 builder.consume_literal(")");
782 builder.consume_spaces();
783
784 auto arguments = args.dump();
785 if (!builder.add_tool_call(function_name, "", arguments)) {
786 throw common_chat_msg_partial_exception("Incomplete tool call");
787 }
788 return;
789 }
790 }
791 parse_json_tool_calls(
792 builder,
793 /* block_open= */ std::nullopt,
794 /* function_regex_start_only= */ function_regex,
795 /* function_regex= */ std::nullopt,
796 close_regex,
797 std::nullopt);
798
799}
800
801static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
802 builder.try_parse_reasoning("<think>", "</think>");
803 if (!builder.syntax().parse_tool_calls) {
804 builder.add_content(builder.consume_rest());
805 return;
806 }
807
808 static const common_regex tool_calls_begin("(?:<๏ฝtoolโcallsโbegin๏ฝ>|<๏ฝtool_calls_begin๏ฝ>|<๏ฝtool calls begin๏ฝ>|<๏ฝtool\\\\_calls\\\\_begin๏ฝ>|<๏ฝtoolโcalls๏ฝ>)");
809 static const common_regex tool_calls_end("<๏ฝtoolโcallsโend๏ฝ>");
810 static const common_regex function_regex("(?:<๏ฝtoolโcallโbegin๏ฝ>)?function<๏ฝtoolโsep๏ฝ>([^\n]+)\n```json\n");
811 static const common_regex close_regex("```[\\s\\r\\n]*<๏ฝtoolโcallโend๏ฝ>");
812
813 parse_json_tool_calls(
814 builder,
815 /* block_open= */ tool_calls_begin,
816 /* function_regex_start_only= */ std::nullopt,
817 function_regex,
818 close_regex,
819 tool_calls_end);
820}
821
822static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
823 static const common_regex function_regex("(?:<๏ฝtoolโcallโbegin๏ฝ>)?([^\\n<]+)(?:<๏ฝtoolโsep๏ฝ>)");
824
825 static const common_regex close_regex("(?:[\\s]*)?<๏ฝtoolโcallโend๏ฝ>");
826 static const common_regex tool_calls_begin("(?:<๏ฝtoolโcallsโbegin๏ฝ>|<๏ฝtool_calls_begin๏ฝ>|<๏ฝtool calls begin๏ฝ>|<๏ฝtool\\\\_calls\\\\_begin๏ฝ>|<๏ฝtoolโcalls๏ฝ>)");
827 static const common_regex tool_calls_end("<๏ฝtoolโcallsโend๏ฝ>");
828
829 if (!builder.syntax().parse_tool_calls) {
830 LOG_DBG("%s: not parse_tool_calls\n", __func__);
831 builder.add_content(builder.consume_rest());
832 return;
833 }
834
835 LOG_DBG("%s: parse_tool_calls\n", __func__);
836
837 parse_json_tool_calls(
838 builder,
839 /* block_open= */ tool_calls_begin,
840 /* function_regex_start_only= */ std::nullopt,
841 function_regex,
842 close_regex,
843 tool_calls_end);
844}
845
846static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
847 // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
848 // First try to parse using the standard reasoning parsing method
849 LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
850
851 auto start_pos = builder.pos();
852 auto found_end_think = builder.try_find_literal("</think>");
853 builder.move_to(start_pos);
854
855 if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
856 LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
857 common_chat_parse_deepseek_v3_1_content(builder);
858 } else if (builder.try_parse_reasoning("<think>", "</think>")) {
859 // If reasoning was parsed successfully, the remaining content is regular content
860 LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
861 // </think><๏ฝtoolโcallsโbegin๏ฝ><๏ฝtoolโcallโbegin๏ฝ>function<๏ฝtoolโsep๏ฝ>NAME\n```json\nJSON\n```<๏ฝtoolโcallโend๏ฝ><๏ฝtoolโcallsโend๏ฝ>
862 common_chat_parse_deepseek_v3_1_content(builder);
863 } else {
864 if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
865 LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
866 common_chat_parse_deepseek_v3_1_content(builder);
867 return;
868 }
869 // If no reasoning tags found, check if we should treat everything as reasoning
870 if (builder.syntax().thinking_forced_open) {
871 // If thinking is forced open but no tags found, treat everything as reasoning
872 LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
873 builder.add_reasoning_content(builder.consume_rest());
874 } else {
875 LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
876 // <๏ฝtoolโcallโbegin๏ฝ>NAME<๏ฝtoolโsep๏ฝ>JSON<๏ฝtoolโcallโend๏ฝ>
877 common_chat_parse_deepseek_v3_1_content(builder);
878 }
879 }
880}
881
882static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
883 static const xml_tool_call_format form {
884 /* form.scope_start = */ "<minimax:tool_call>",
885 /* form.tool_start = */ "<invoke name=\"",
886 /* form.tool_sep = */ "\">",
887 /* form.key_start = */ "<parameter name=\"",
888 /* form.key_val_sep = */ "\">",
889 /* form.val_end = */ "</parameter>",
890 /* form.tool_end = */ "</invoke>",
891 /* form.scope_end = */ "</minimax:tool_call>",
892 };
893 builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
894}
895
896static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
897 static const xml_tool_call_format form = ([]() {
898 xml_tool_call_format form {};
899 form.scope_start = "<tool_call>";
900 form.tool_start = "<function=";
901 form.tool_sep = ">";
902 form.key_start = "<parameter=";
903 form.key_val_sep = ">";
904 form.val_end = "</parameter>";
905 form.tool_end = "</function>";
906 form.scope_end = "</tool_call>";
907 form.trim_raw_argval = true;
908 return form;
909 })();
910 builder.consume_reasoning_with_xml_tool_calls(form);
911}
912
913static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
914 static const xml_tool_call_format form = ([]() {
915 xml_tool_call_format form {};
916 form.scope_start = "<|tool_calls_section_begin|>";
917 form.tool_start = "<|tool_call_begin|>";
918 form.tool_sep = "<|tool_call_argument_begin|>{";
919 form.key_start = "\"";
920 form.key_val_sep = "\":";
921 form.val_end = ",";
922 form.tool_end = "}<|tool_call_end|>";
923 form.scope_end = "<|tool_calls_section_end|>";
924 form.raw_argval = false;
925 form.last_val_end = "";
926 form.allow_toolcall_in_think = true;
927 return form;
928 })();
929 builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
930}
931
932static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
933 static const xml_tool_call_format form = ([]() {
934 xml_tool_call_format form {};
935 form.scope_start = "<tool_calls>[";
936 form.tool_start = "{\"name\": \"";
937 form.tool_sep = "\", \"arguments\": {";
938 form.key_start = "\"";
939 form.key_val_sep = "\": ";
940 form.val_end = ", ";
941 form.tool_end = "}, ";
942 form.scope_end = "]</tool_calls>";
943 form.raw_argval = false;
944 form.last_val_end = "";
945 form.last_tool_end = "}";
946 return form;
947 })();
948 builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
949}
950
951static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
952 static const xml_tool_call_format form = ([]() {
953 xml_tool_call_format form {};
954 form.scope_start = "";
955 form.tool_start = "<tool_call>\n{\"name\": \"";
956 form.tool_sep = "\", \"arguments\": {";
957 form.key_start = "\"";
958 form.key_val_sep = "\": ";
959 form.val_end = ", ";
960 form.tool_end = "}\n</tool_call>";
961 form.scope_end = "";
962 form.raw_argval = false;
963 form.last_val_end = "";
964 return form;
965 })();
966 builder.consume_reasoning_with_xml_tool_calls(form);
967}
968
969static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
970 static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
971 static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
972
973 static const common_regex start_regex("<\\|start\\|>assistant");
974 static const common_regex analysis_regex("<\\|channel\\|>analysis");
975 static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
976 static const common_regex preamble_regex("<\\|channel\\|>commentary");
977 static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
978 static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
979
980 auto consume_end = [&](bool include_end = false) {
981 if (auto res = builder.try_find_literal("<|end|>")) {
982 return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
983 }
984 return builder.consume_rest();
985 };
986
987 auto handle_tool_call = [&](const std::string & name) {
988 if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
989 if (builder.syntax().parse_tool_calls) {
990 if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
991 throw common_chat_msg_partial_exception("incomplete tool call");
992 }
993 } else if (args->is_partial) {
994 throw common_chat_msg_partial_exception("incomplete tool call");
995 }
996 }
997 };
998
999 auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
1000 auto match = regex.search(input, 0, true);
1001 if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
1002 return match;
1003 }
1004 return std::nullopt;
1005 };
1006
1007 do {
1008 auto header_start_pos = builder.pos();
1009 auto content_start = builder.try_find_literal("<|message|>");
1010 if (!content_start) {
1011 throw common_chat_msg_partial_exception("incomplete header");
1012 }
1013
1014 auto header = content_start->prelude;
1015
1016 if (auto match = regex_match(tool_call1_regex, header)) {
1017 auto group = match->groups[1];
1018 auto name = header.substr(group.begin, group.end - group.begin);
1019 handle_tool_call(name);
1020 continue;
1021 }
1022
1023 if (auto match = regex_match(tool_call2_regex, header)) {
1024 auto group = match->groups[2];
1025 auto name = header.substr(group.begin, group.end - group.begin);
1026 handle_tool_call(name);
1027 continue;
1028 }
1029
1030 if (regex_match(analysis_regex, header)) {
1031 builder.move_to(header_start_pos);
1032 if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
1033 builder.add_content(consume_end(true));
1034 } else {
1035 builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
1036 }
1037 continue;
1038 }
1039
1040 if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
1041 builder.add_content(consume_end());
1042 continue;
1043 }
1044
1045 // Possibly a malformed message, attempt to recover by rolling
1046 // back to pick up the next <|start|>
1047 LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
1048 builder.move_to(header_start_pos);
1049 } while (builder.try_find_regex(start_regex, std::string::npos, false));
1050
1051 auto remaining = builder.consume_rest();
1052 if (!remaining.empty()) {
1053 LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
1054 }
1055}
1056
1057static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
1058 static const xml_tool_call_format form {
1059 /* form.scope_start = */ "",
1060 /* form.tool_start = */ "<tool_call>",
1061 /* form.tool_sep = */ "",
1062 /* form.key_start = */ "<arg_key>",
1063 /* form.key_val_sep = */ "</arg_key>",
1064 /* form.val_end = */ "</arg_value>",
1065 /* form.tool_end = */ "</tool_call>",
1066 /* form.scope_end = */ "",
1067 /* form.key_val_sep2 = */ "<arg_value>",
1068 };
1069 builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
1070}
1071
1072static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
1073 if (!builder.syntax().parse_tool_calls) {
1074 builder.add_content(builder.consume_rest());
1075 return;
1076 }
1077 static const common_regex prefix(regex_escape(" functools["));
1078 parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
1079}
1080
1081static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
1082 static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
1083 static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
1084 static const common_regex close_regex(R"(\s*)");
1085
1086 parse_json_tool_calls(
1087 builder,
1088 std::nullopt,
1089 function_regex_start_only,
1090 function_regex,
1091 close_regex,
1092 std::nullopt,
1093 /* allow_raw_python= */ true,
1094 /* get_function_name= */ [&](const auto & res) -> std::string {
1095 auto at_start = res.groups[0].begin == 0;
1096 auto name = builder.str(res.groups[1]);
1097 if (!name.empty() && name.back() == '{') {
1098 // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
1099 builder.move_back(1);
1100 }
1101 auto idx = name.find_last_not_of("\n{");
1102 name = name.substr(0, idx + 1);
1103 if (at_start && name == "all") {
1104 return "";
1105 }
1106 return name;
1107 });
1108}
1109
1110static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
1111 if (!builder.syntax().parse_tool_calls) {
1112 builder.add_content(builder.consume_rest());
1113 return;
1114 }
1115 // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
1116 static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
1117
1118 static const common_regex function_regex(R"(<function=(\w+)>)");
1119 static const common_regex close_regex(R"(</function>)");
1120
1121 parse_json_tool_calls(
1122 builder,
1123 /* block_open= */ std::nullopt,
1124 /* function_regex_start_only= */ std::nullopt,
1125 function_regex,
1126 close_regex,
1127 std::nullopt);
1128
1129 if (auto res = builder.try_find_regex(python_tag_regex)) {
1130 auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
1131 builder.add_tool_call("python", "", arguments);
1132 return;
1133 }
1134}
1135
1136static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
1137 builder.try_parse_reasoning("<think>", "</think>");
1138 if (!builder.syntax().parse_tool_calls) {
1139 builder.add_content(builder.consume_rest());
1140 return;
1141 }
1142
1143 static const common_regex open_regex(
1144 "(?:"
1145 "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
1146 "(" // match 2 (open_tag)
1147 "<tool_call>"
1148 "|<function_call>"
1149 "|<tool>"
1150 "|<tools>"
1151 "|<response>"
1152 "|<json>"
1153 "|<xml>"
1154 "|<JSON>"
1155 ")?"
1156 "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
1157 ")"
1158 "|<function=([^>]+)>" // match 4 (function name)
1159 "|<function name=\"([^\"]+)\">" // match 5 (function name again)
1160 );
1161
1162 while (auto res = builder.try_find_regex(open_regex)) {
1163 const auto & block_start = res->groups[1];
1164 std::string block_end = block_start.empty() ? "" : "```";
1165
1166 const auto & open_tag = res->groups[2];
1167 std::string close_tag;
1168
1169 if (!res->groups[3].empty()) {
1170 builder.move_to(res->groups[3].begin);
1171 close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
1172
1173 if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
1174 if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
1175 throw common_chat_msg_partial_exception("incomplete tool call");
1176 }
1177 builder.consume_spaces();
1178 builder.consume_literal(close_tag);
1179 builder.consume_spaces();
1180 if (!block_end.empty()) {
1181 builder.consume_literal(block_end);
1182 builder.consume_spaces();
1183 }
1184 } else {
1185 throw common_chat_msg_partial_exception("failed to parse tool call");
1186 }
1187 } else {
1188 auto function_name = builder.str(res->groups[4]);
1189 if (function_name.empty()) {
1190 function_name = builder.str(res->groups[5]);
1191 }
1192 GGML_ASSERT(!function_name.empty());
1193
1194 close_tag = "</function>";
1195
1196 if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
1197 if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
1198 throw common_chat_msg_partial_exception("incomplete tool call");
1199 }
1200 builder.consume_spaces();
1201 builder.consume_literal(close_tag);
1202 builder.consume_spaces();
1203 if (!block_end.empty()) {
1204 builder.consume_literal(block_end);
1205 builder.consume_spaces();
1206 }
1207 }
1208 }
1209 }
1210
1211 builder.add_content(builder.consume_rest());
1212}
1213
1214static void common_chat_parse_granite(common_chat_msg_parser & builder) {
1215 // Parse thinking tags
1216 static const common_regex start_think_regex(regex_escape("<think>"));
1217 static const common_regex end_think_regex(regex_escape("</think>"));
1218 // Granite models output partial tokens such as "<" and "<think".
1219 // By leveraging try_consume_regex()/try_find_regex() throwing
1220 // common_chat_msg_partial_exception for these partial tokens,
1221 // processing is interrupted and the tokens are not passed to add_content().
1222 if (auto res = builder.try_consume_regex(start_think_regex)) {
1223 // Restore position for try_parse_reasoning()
1224 builder.move_to(res->groups[0].begin);
1225 builder.try_find_regex(end_think_regex, std::string::npos, false);
1226 // Restore position for try_parse_reasoning()
1227 builder.move_to(res->groups[0].begin);
1228 }
1229 builder.try_parse_reasoning("<think>", "</think>");
1230
1231 // Parse response tags
1232 static const common_regex start_response_regex(regex_escape("<response>"));
1233 static const common_regex end_response_regex(regex_escape("</response>"));
1234 // Granite models output partial tokens such as "<" and "<response".
1235 // Same hack as reasoning parsing.
1236 if (builder.try_consume_regex(start_response_regex)) {
1237 builder.try_find_regex(end_response_regex);
1238 }
1239
1240 if (!builder.syntax().parse_tool_calls) {
1241 builder.add_content(builder.consume_rest());
1242 return;
1243 }
1244
1245 // Look for tool calls
1246 static const common_regex tool_call_regex(regex_escape("<|tool_call|>"));
1247 if (auto res = builder.try_find_regex(tool_call_regex)) {
1248 builder.move_to(res->groups[0].end);
1249
1250 // Expect JSON array of tool calls
1251 if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
1252 if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
1253 throw common_chat_msg_partial_exception("incomplete tool call");
1254 }
1255 }
1256 } else {
1257 builder.add_content(builder.consume_rest());
1258 }
1259}
1260
1261static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
1262 // Parse thinking tags
1263 builder.try_parse_reasoning("<think>", "</think>");
1264 if (!builder.syntax().parse_tool_calls) {
1265 builder.add_content(builder.consume_rest());
1266 return;
1267 }
1268
1269 // Look for tool calls
1270 static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
1271 if (auto res = builder.try_find_regex(tool_call_regex)) {
1272 builder.move_to(res->groups[0].end);
1273
1274 // Expect JSON array of tool calls
1275 auto tool_calls_data = builder.consume_json();
1276 if (tool_calls_data.json.is_array()) {
1277 if (!builder.try_consume_literal("</TOOLCALL>")) {
1278 throw common_chat_msg_partial_exception("Incomplete tool call");
1279 }
1280 builder.add_tool_calls(tool_calls_data.json);
1281 } else {
1282 throw common_chat_msg_partial_exception("Incomplete tool call");
1283 }
1284 }
1285 builder.add_content(builder.consume_rest());
1286}
1287
1288static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
1289 // Parse thinking tags
1290 builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
1291 if (!builder.syntax().parse_tool_calls) {
1292 builder.add_content(builder.consume_rest());
1293 return;
1294 }
1295
1296 // Look for tool calls
1297 static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
1298 if (auto res = builder.try_find_regex(tool_call_regex)) {
1299 builder.move_to(res->groups[0].end);
1300
1301 auto tool_calls_data = builder.consume_json();
1302 if (tool_calls_data.json.is_array()) {
1303 builder.consume_spaces();
1304 if (!builder.try_consume_literal("<|tools_suffix|>")) {
1305 throw common_chat_msg_partial_exception("Incomplete tool call");
1306 }
1307 for (const auto & value : tool_calls_data.json) {
1308 if (value.is_object()) {
1309 builder.add_tool_call_short_form(value);
1310 }
1311 }
1312 } else {
1313 throw common_chat_msg_partial_exception("Incomplete tool call");
1314 }
1315 }
1316 builder.add_content(builder.consume_rest());
1317}
1318
1319
1320static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
1321 if (!builder.syntax().parse_tool_calls) {
1322 builder.add_content(builder.consume_rest());
1323 return;
1324 }
1325
1326 // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
1327 static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
1328 static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
1329
1330 // Loop through all tool calls
1331 while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
1332 builder.move_to(res->groups[0].end);
1333
1334 // Parse JSON array format: [{"name": "...", "arguments": {...}}]
1335 auto tool_calls_data = builder.consume_json();
1336
1337 // Consume end marker
1338 builder.consume_spaces();
1339 if (!builder.try_consume_regex(tool_call_end_regex)) {
1340 throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
1341 }
1342
1343 // Process each tool call in the array
1344 if (tool_calls_data.json.is_array()) {
1345 for (const auto & tool_call : tool_calls_data.json) {
1346 if (!tool_call.is_object()) {
1347 throw common_chat_msg_partial_exception("Tool call must be an object");
1348 }
1349
1350 if (!tool_call.contains("name")) {
1351 throw common_chat_msg_partial_exception("Tool call missing 'name' field");
1352 }
1353
1354 std::string function_name = tool_call.at("name");
1355 std::string arguments = "{}";
1356
1357 if (tool_call.contains("arguments")) {
1358 if (tool_call.at("arguments").is_object()) {
1359 arguments = tool_call.at("arguments").dump();
1360 } else if (tool_call.at("arguments").is_string()) {
1361 arguments = tool_call.at("arguments");
1362 }
1363 }
1364
1365 if (!builder.add_tool_call(function_name, "", arguments)) {
1366 throw common_chat_msg_partial_exception("Incomplete tool call");
1367 }
1368 }
1369 } else {
1370 throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
1371 }
1372
1373 // Consume any trailing whitespace after this tool call
1374 builder.consume_spaces();
1375 }
1376
1377 // Consume any remaining content after all tool calls
1378 auto remaining = builder.consume_rest();
1379 if (!string_strip(remaining).empty()) {
1380 builder.add_content(remaining);
1381 }
1382}
1383
1384static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
1385 static const xml_tool_call_format form {
1386 /* form.scope_start = */ "<seed:tool_call>",
1387 /* form.tool_start = */ "<function=",
1388 /* form.tool_sep = */ ">",
1389 /* form.key_start = */ "<parameter=",
1390 /* form.key_val_sep = */ ">",
1391 /* form.val_end = */ "</parameter>",
1392 /* form.tool_end = */ "</function>",
1393 /* form.scope_end = */ "</seed:tool_call>",
1394 };
1395 builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
1396}
1397
1398static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
1399 builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
1400
1401 // TODO: Tool calling
1402
1403 builder.add_content(builder.consume_rest());
1404}
1405
1406static void common_chat_parse_exaone_moe_content(common_chat_msg_parser & builder) {
1407 // 1) <tool_call>{ "name": "...", "arguments": {...} }</tool_call>
1408 // 2) <tool_call>{ "id": "...", "type": "function", "function": { "name": "...", "arguments": {...} } }</tool_call>
1409 static const common_regex tool_call_open(R"(<tool_call[^>]*>)");
1410
1411 if (!builder.syntax().parse_tool_calls) {
1412 LOG_DBG("%s: not parse_tool_calls\n", __func__);
1413 builder.add_content(builder.consume_rest());
1414 return;
1415 }
1416
1417 LOG_DBG("%s: parse_tool_calls\n", __func__);
1418
1419 // Find all <tool_call></tool_call> blocks
1420 while (auto first = builder.try_find_regex(tool_call_open, std::string::npos, /* add_prelude_to_content= */ true)) {
1421 builder.move_to(first->groups[0].end);
1422 builder.consume_spaces();
1423
1424 builder.try_consume_literal("```json");
1425 builder.try_consume_literal("```");
1426 builder.consume_spaces();
1427
1428 // Consume JSON object
1429 auto data = builder.consume_json();
1430
1431 builder.consume_spaces();
1432 builder.try_consume_literal("```");
1433 builder.consume_spaces();
1434
1435 if (!builder.try_consume_literal("</tool_call>")) {
1436 throw common_chat_msg_partial_exception("incomplete tool call");
1437 }
1438 builder.consume_spaces();
1439
1440 // Extract name and arguments
1441 std::string name;
1442 std::string id;
1443 nlohmann::ordered_json arguments;
1444
1445 const auto extract_args = [&](const nlohmann::ordered_json & obj) -> bool {
1446 if (!obj.contains("name") || !obj.contains("arguments")) {
1447 return false;
1448 }
1449 name = obj.at("name").get<std::string>();
1450 arguments = obj.at("arguments");
1451 if (obj.contains("id") && obj.at("id").is_string()) {
1452 id = obj.at("id").get<std::string>();
1453 }
1454 return true;
1455 };
1456
1457 if (!extract_args(data.json)) {
1458 if (data.json.contains("function") && data.json.at("function").is_object()) {
1459 auto fn = data.json.at("function");
1460 extract_args(fn);
1461 if (id.empty() && data.json.contains("id") && data.json.at("id").is_string()) {
1462 id = data.json.at("id").get<std::string>();
1463 }
1464 }
1465 }
1466
1467 // If name is empty, treat the JSON object as content
1468 if (name.empty()) {
1469 LOG_DBG("%s: tool call missing name, treating as content\n", __func__);
1470 builder.add_content(data.json.dump());
1471 continue;
1472 }
1473
1474 std::string args_str = arguments.dump();
1475 if (!builder.add_tool_call(name, id, args_str)) {
1476 throw common_chat_msg_partial_exception("incomplete tool call");
1477 }
1478 }
1479
1480 builder.add_content(builder.consume_rest());
1481}
1482
1483static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
1484 LOG_DBG("%s: parsing exaone_moe\n", __func__);
1485 // EXAONE MoE outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
1486 // First try to parse using the standard reasoning parsing method
1487 LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
1488
1489 auto start_pos = builder.pos();
1490 auto found_end_think = builder.try_find_literal("</think>");
1491 builder.move_to(start_pos);
1492
1493 if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
1494 LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
1495 common_chat_parse_exaone_moe_content(builder);
1496 } else if (builder.try_parse_reasoning("<think>", "</think>")) {
1497 // If reasoning was parsed successfully, the remaining content is regular content
1498 LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
1499 common_chat_parse_exaone_moe_content(builder);
1500 } else {
1501 if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
1502 LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
1503 common_chat_parse_exaone_moe_content(builder);
1504 return;
1505 }
1506 // If no reasoning tags found, check if we should treat everything as reasoning
1507 if (builder.syntax().thinking_forced_open) {
1508 // If thinking is forced open but no tags found, treat everything as reasoning
1509 LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
1510 builder.add_reasoning_content(builder.consume_rest());
1511 } else {
1512 LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
1513 common_chat_parse_exaone_moe_content(builder);
1514 }
1515 }
1516}
1517
1518static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
1519 builder.try_parse_reasoning("<think>", "</think>");
1520 builder.add_content(builder.consume_rest());
1521}
1522
1523static void common_chat_parse(common_chat_msg_parser & builder) {
1524 LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
1525
1526 switch (builder.syntax().format) {
1527 case COMMON_CHAT_FORMAT_CONTENT_ONLY:
1528 common_chat_parse_content_only(builder);
1529 break;
1530 case COMMON_CHAT_FORMAT_GENERIC:
1531 common_chat_parse_generic(builder);
1532 break;
1533 case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
1534 common_chat_parse_mistral_nemo(builder);
1535 break;
1536 case COMMON_CHAT_FORMAT_MAGISTRAL:
1537 common_chat_parse_magistral(builder);
1538 break;
1539 case COMMON_CHAT_FORMAT_LLAMA_3_X:
1540 common_chat_parse_llama_3_1(builder);
1541 break;
1542 case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
1543 common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
1544 break;
1545 case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
1546 common_chat_parse_deepseek_r1(builder);
1547 break;
1548 case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
1549 common_chat_parse_deepseek_v3_1(builder);
1550 break;
1551 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
1552 common_chat_parse_functionary_v3_2(builder);
1553 break;
1554 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
1555 common_chat_parse_functionary_v3_1_llama_3_1(builder);
1556 break;
1557 case COMMON_CHAT_FORMAT_HERMES_2_PRO:
1558 common_chat_parse_hermes_2_pro(builder);
1559 break;
1560 case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
1561 common_chat_parse_firefunction_v2(builder);
1562 break;
1563 case COMMON_CHAT_FORMAT_COMMAND_R7B:
1564 common_chat_parse_command_r7b(builder);
1565 break;
1566 case COMMON_CHAT_FORMAT_GRANITE:
1567 common_chat_parse_granite(builder);
1568 break;
1569 case COMMON_CHAT_FORMAT_GPT_OSS:
1570 common_chat_parse_gpt_oss(builder);
1571 break;
1572 case COMMON_CHAT_FORMAT_SEED_OSS:
1573 common_chat_parse_seed_oss(builder);
1574 break;
1575 case COMMON_CHAT_FORMAT_NEMOTRON_V2:
1576 common_chat_parse_nemotron_v2(builder);
1577 break;
1578 case COMMON_CHAT_FORMAT_APERTUS:
1579 common_chat_parse_apertus(builder);
1580 break;
1581 case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
1582 common_chat_parse_lfm2(builder);
1583 break;
1584 case COMMON_CHAT_FORMAT_MINIMAX_M2:
1585 common_chat_parse_minimax_m2(builder);
1586 break;
1587 case COMMON_CHAT_FORMAT_GLM_4_5:
1588 common_chat_parse_glm_4_5(builder);
1589 break;
1590 case COMMON_CHAT_FORMAT_KIMI_K2:
1591 common_chat_parse_kimi_k2(builder);
1592 break;
1593 case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
1594 common_chat_parse_qwen3_coder_xml(builder);
1595 break;
1596 case COMMON_CHAT_FORMAT_APRIEL_1_5:
1597 common_chat_parse_apriel_1_5(builder);
1598 break;
1599 case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
1600 common_chat_parse_xiaomi_mimo(builder);
1601 break;
1602 case COMMON_CHAT_FORMAT_SOLAR_OPEN:
1603 common_chat_parse_solar_open(builder);
1604 break;
1605 case COMMON_CHAT_FORMAT_EXAONE_MOE:
1606 common_chat_parse_exaone_moe(builder);
1607 break;
1608 default:
1609 throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
1610 }
1611 builder.finish();
1612}
1613
1614common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
1615 if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
1616 syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
1617 syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
1618 return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
1619 }
1620 common_chat_msg_parser builder(input, is_partial, syntax);
1621 try {
1622 common_chat_parse(builder);
1623 } catch (const common_chat_msg_partial_exception & ex) {
1624 LOG_DBG("Partial parse: %s\n", ex.what());
1625 if (!is_partial) {
1626 builder.clear_tools();
1627 builder.move_to(0);
1628 common_chat_parse_content_only(builder);
1629 }
1630 }
1631 auto msg = builder.result();
1632 if (!is_partial) {
1633 LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
1634 }
1635 return msg;
1636}
1637
1638common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
1639 if (parser.empty()) {
1640 throw std::runtime_error("Failed to parse due to missing parser definition.");
1641 }
1642
1643 LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
1644
1645 common_peg_parse_context ctx(input, is_partial);
1646 auto result = parser.parse(ctx);
1647 if (result.fail()) {
1648 throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
1649 }
1650
1651 common_chat_msg msg;
1652 msg.role = "assistant";
1653
1654 if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
1655 auto mapper = common_chat_peg_native_mapper(msg);
1656 mapper.from_ast(ctx.ast, result);
1657 } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
1658 auto mapper = common_chat_peg_constructed_mapper(msg);
1659 mapper.from_ast(ctx.ast, result);
1660 } else {
1661 // Generic mapper
1662 auto mapper = common_chat_peg_mapper(msg);
1663 mapper.from_ast(ctx.ast, result);
1664 }
1665 if (!is_partial) {
1666 LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
1667 }
1668 return msg;
1669}