aboutsummaryrefslogtreecommitdiff
path: root/llama.cpp/tests/test-chat-template.cpp
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/tests/test-chat-template.cpp
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/tests/test-chat-template.cpp')
-rw-r--r--llama.cpp/tests/test-chat-template.cpp680
1 files changed, 680 insertions, 0 deletions
diff --git a/llama.cpp/tests/test-chat-template.cpp b/llama.cpp/tests/test-chat-template.cpp
new file mode 100644
index 0000000..27b537a
--- /dev/null
+++ b/llama.cpp/tests/test-chat-template.cpp
@@ -0,0 +1,680 @@
1#include <string>
2#include <vector>
3#include <sstream>
4#include <regex>
5#include <iostream>
6#include <fstream>
7#include <filesystem>
8
9#include <nlohmann/json.hpp>
10
11#undef NDEBUG
12#include <cassert>
13
14#include "llama.h"
15#include "common.h"
16#include "chat.h"
17#include "jinja/runtime.h"
18#include "jinja/parser.h"
19#include "jinja/lexer.h"
20#include "jinja/caps.h"
21
22using json = nlohmann::ordered_json;
23
24int main_automated_tests(void);
25
26void run_multiple(std::string dir_path, bool stop_on_first_failure, json input, bool use_common = false);
27void run_single(std::string contents, json input, bool use_common = false, const std::string & output_path = "");
28
29
30
31std::string HELP = R"(
32Usage: test-chat-template [OPTIONS] PATH_TO_TEMPLATE
33Options:
34 -h, --help Show this help message and exit.
35 --json <path> Path to the JSON input file.
36 --stop-on-first-fail Stop testing on the first failure (default: false).
37 --no-common Use direct Jinja engine instead of common chat templates (default: use common).
38 --output <path> Path to output results (only for single template runs).
39If PATH_TO_TEMPLATE is a file, runs that single template.
40If PATH_TO_TEMPLATE is a directory, runs all .jinja files in that directory.
41If PATH_TO_TEMPLATE is omitted, runs automated tests (default CI mode).
42)";
43
44std::string DEFAULT_JSON = R"({
45 "messages": [
46 {
47 "role": "user",
48 "content": "Hello, how are you?"
49 },
50 {
51 "role": "assistant",
52 "content": "I am fine, thank you!"
53 }
54 ],
55 "bos_token": "<s>",
56 "eos_token": "</s>",
57 "add_generation_prompt": true
58})";
59
60int main(int argc, char ** argv) {
61 std::vector<std::string> args(argv, argv + argc);
62
63 std::string tmpl_path;
64 std::string json_path;
65 std::string output_path;
66 bool stop_on_first_fail = false;
67 bool use_common = true;
68
69 for (size_t i = 1; i < args.size(); i++) {
70 if (args[i] == "--help" || args[i] == "-h") {
71 std::cout << HELP << "\n";
72 return 0;
73 } else if (args[i] == "--json" && i + 1 < args.size()) {
74 json_path = args[i + 1];
75 i++;
76 } else if (args[i] == "--stop-on-first-fail") {
77 stop_on_first_fail = true;
78 } else if (args[i] == "--output" && i + 1 < args.size()) {
79 output_path = args[i + 1];
80 i++;
81 } else if (args[i] == "--no-common") {
82 use_common = true;
83 } else if (tmpl_path.empty()) {
84 tmpl_path = args[i];
85 } else {
86 std::cerr << "Unknown argument: " << args[i] << "\n";
87 std::cout << HELP << "\n";
88 return 1;
89 }
90 }
91
92 if (tmpl_path.empty()) {
93 return main_automated_tests();
94 }
95
96 json input_json;
97 if (!json_path.empty()) {
98 std::ifstream json_file(json_path);
99 if (!json_file) {
100 std::cerr << "Error: Could not open JSON file: " << json_path << "\n";
101 return 1;
102 }
103 std::string content = std::string(
104 std::istreambuf_iterator<char>(json_file),
105 std::istreambuf_iterator<char>());
106 input_json = json::parse(content);
107 } else {
108 input_json = json::parse(DEFAULT_JSON);
109 }
110
111 std::filesystem::path p(tmpl_path);
112 if (std::filesystem::is_directory(p)) {
113 run_multiple(tmpl_path, stop_on_first_fail, input_json, use_common);
114 } else if (std::filesystem::is_regular_file(p)) {
115 std::ifstream infile(tmpl_path);
116 std::string contents = std::string(
117 std::istreambuf_iterator<char>(infile),
118 std::istreambuf_iterator<char>());
119 run_single(contents, input_json, use_common, output_path);
120 } else {
121 std::cerr << "Error: PATH_TO_TEMPLATE is not a valid file or directory: " << tmpl_path << "\n";
122 return 1;
123 }
124
125 return 0;
126}
127
128void run_multiple(std::string dir_path, bool stop_on_first_fail, json input, bool use_common) {
129 std::vector<std::string> failed_tests;
130
131 // list all files in models/templates/ and run each
132 size_t test_count = 0;
133
134 for (const auto & entry : std::filesystem::directory_iterator(dir_path)) {
135 // only process .jinja files
136 if (entry.path().extension() == ".jinja" && entry.is_regular_file()) {
137 test_count++;
138 std::cout << "\n\n=== RUNNING TEMPLATE FILE: " << entry.path().string() << " ===\n";
139 std::ifstream infile(entry.path());
140 std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>());
141 try {
142 run_single(contents, input, use_common);
143 } catch (const std::exception & e) {
144 std::cout << "Exception: " << e.what() << "\n";
145 std::cout << "=== ERROR WITH TEMPLATE FILE: " << entry.path().string() << " ===\n";
146 failed_tests.push_back(entry.path().string());
147 if (stop_on_first_fail) {
148 break;
149 }
150 }
151 }
152 }
153
154 std::cout << "\n\n=== TEST SUMMARY ===\n";
155 std::cout << "Total tests run: " << test_count << "\n";
156 std::cout << "Total failed tests: " << failed_tests.size() << "\n";
157 for (const auto & test : failed_tests) {
158 std::cout << "FAILED TEST: " << test << "\n";
159 }
160}
161
162
163static std::string normalize_newlines(const std::string & s) {
164#ifdef _WIN32
165 static const std::regex nl_regex("\r\n");
166 return std::regex_replace(s, nl_regex, "\n");
167#else
168 return s;
169#endif
170}
171
172
173static std::string format_using_common(
174 const std::string & template_str,
175 const std::string & bos_token,
176 const std::string & eos_token,
177 std::vector<common_chat_msg> & messages,
178 std::vector<common_chat_tool> tools = {}) {
179 auto tmpls = common_chat_templates_init(/* model= */ nullptr, template_str, bos_token, eos_token);
180 common_chat_templates_inputs inputs;
181 inputs.use_jinja = true;
182 inputs.messages = messages;
183 inputs.tools = tools;
184 inputs.add_generation_prompt = true;
185 auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt;
186 output = normalize_newlines(output);
187 return output;
188}
189
190
191// skip libcommon, use direct jinja engine
192static jinja::value_string format_using_direct_engine(
193 const std::string & template_str,
194 json & input) {
195 // lexing
196 jinja::lexer lexer;
197 auto lexer_res = lexer.tokenize(template_str);
198
199 // compile to AST
200 jinja::program ast = jinja::parse_from_tokens(lexer_res);
201
202 // check caps for workarounds
203 jinja::caps_get(ast);
204
205 std::cout << "\n=== RUN ===\n";
206 jinja::context ctx(template_str);
207
208 jinja::global_from_json(ctx, input, true);
209
210 jinja::runtime runtime(ctx);
211 const jinja::value results = runtime.execute(ast);
212 auto parts = runtime.gather_string_parts(results);
213
214 std::cout << "\n=== RESULTS ===\n";
215 for (const auto & part : parts->as_string().parts) {
216 std::cout << (part.is_input ? "DATA" : "TMPL") << ": " << part.val << "\n";
217 }
218
219 return parts;
220}
221
222
223void run_single(std::string contents, json input, bool use_common, const std::string & output_path) {
224 jinja::enable_debug(true);
225
226 jinja::value_string output_parts;
227
228 if (use_common) {
229 std::string bos_token = "<s>";
230 std::string eos_token = "</s>";
231 if (input.contains("bos_token")) {
232 bos_token = input["bos_token"].get<std::string>();
233 }
234 if (input.contains("eos_token")) {
235 eos_token = input["eos_token"].get<std::string>();
236 }
237 nlohmann::ordered_json msgs_json = input["messages"];
238 nlohmann::ordered_json tools_json = input["tools"];
239 auto messages = common_chat_msgs_parse_oaicompat(msgs_json);
240 auto tools = common_chat_tools_parse_oaicompat(tools_json);
241 auto output = format_using_common(contents, bos_token, eos_token, messages, tools);
242 std::cout << "\n=== OUTPUT ===\n";
243 std::cout << output << "\n";
244 output_parts = jinja::mk_val<jinja::value_string>(output);
245
246 } else {
247 output_parts = format_using_direct_engine(contents, input);
248 std::cout << "\n=== OUTPUT ===\n";
249 std::cout << output_parts->as_string().str() << "\n";
250 }
251
252 if (!output_path.empty()) {
253 std::ofstream outfile(output_path);
254 if (!outfile) {
255 throw std::runtime_error("Could not open output file: " + output_path);
256 }
257 outfile << output_parts->as_string().str();
258 outfile.close();
259 std::cout << "\n=== OUTPUT WRITTEN TO " << output_path << " ===\n";
260 }
261}
262
263
264
265
266
267//
268// Automated tests for chat templates
269//
270
271#define U8C(x) (const char*)(u8##x)
272
273static common_chat_msg simple_msg(const std::string & role, const std::string & content) {
274 common_chat_msg msg;
275 msg.role = role;
276 msg.content = content;
277 return msg;
278}
279
280int main_automated_tests(void) {
281 // jinja::enable_debug(true);
282
283 std::vector<llama_chat_message> conversation {
284 {"system", "You are a helpful assistant"},
285 {"user", "Hello"},
286 {"assistant", "Hi there"},
287 {"user", "Who are you"},
288 {"assistant", " I am an assistant "},
289 {"user", "Another question"},
290 };
291
292 // std::string wrong = /* .template_str= */ u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}";
293 struct TestCase {
294 std::string name;
295 std::string template_str;
296 std::string expected_output;
297 std::string expected_output_jinja;
298 std::string bos_token = "";
299 std::string eos_token = "";
300 bool supported_with_jinja = true;
301 };
302 std::vector<TestCase> test_cases {
303 {
304 /* .name= */ "teknium/OpenHermes-2.5-Mistral-7B",
305 /* .template_str= */ "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
306 /* .expected_output= */ "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\nHi there<|im_end|>\n<|im_start|>user\nWho are you<|im_end|>\n<|im_start|>assistant\n I am an assistant <|im_end|>\n<|im_start|>user\nAnother question<|im_end|>\n<|im_start|>assistant\n",
307 /* .expected_output_jinja= */ "",
308 /* .bos_token= */ "",
309 /* .eos_token= */ "",
310 },
311 {
312 /* .name= */ "mistralai/Mistral-7B-Instruct-v0.2 (NOTE: Old pre-v1 without a system prompt)",
313 /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
314 /* .expected_output= */ "[INST] You are a helpful assistant\nHello [/INST]Hi there</s>[INST] Who are you [/INST] I am an assistant </s>[INST] Another question [/INST]",
315 /* .expected_output_jinja= */ "",
316 /* .bos_token= */ "",
317 /* .eos_token= */ "</s>",
318 },
319 {
320 /* .name= */ "TheBloke/FusionNet_34Bx2_MoE-AWQ",
321 /* .template_str= */ "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <<SYS>>\\n' + messages[idx]['content'] + '\\n<</SYS>>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' ' + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}",
322 /* .expected_output= */ "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST]Hi there</s><s>[INST] Who are you [/INST] I am an assistant </s><s>[INST] Another question [/INST]",
323 /* .expected_output_jinja= */ "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST] Hi there </s><s>[INST] Who are you [/INST] I am an assistant </s><s>[INST] Another question [/INST]",
324 /* .bos_token= */ "<s>",
325 /* .eos_token= */ "</s>",
326 },
327 {
328 /* .name= */ "bofenghuang/vigogne-2-70b-chat",
329 /* .template_str= */ "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif true == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'Vous êtes Vigogne, un assistant IA créé par Zaion Lab. Vous suivez extrêmement bien les instructions. Aidez autant que vous le pouvez.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
330 /* .expected_output= */ "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST]Hi there</s>[INST] Who are you [/INST]I am an assistant</s>[INST] Another question [/INST]",
331 /* .expected_output_jinja= */ "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST] Hi there </s>[INST] Who are you [/INST] I am an assistant </s>[INST] Another question [/INST]",
332 /* .bos_token= */ "",
333 /* .eos_token= */ "</s>",
334 },
335 {
336 /* .name= */ "mlabonne/AlphaMonarch-7B",
337 /* .template_str= */ "{% for message in messages %}{{bos_token + message['role'] + '\\n' + message['content'] + eos_token + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\\n' }}{% endif %}",
338 /* .expected_output= */ "system\nYou are a helpful assistant</s>\n<s>user\nHello</s>\n<s>assistant\nHi there</s>\n<s>user\nWho are you</s>\n<s>assistant\n I am an assistant </s>\n<s>user\nAnother question</s>\n<s>assistant\n",
339 /* .expected_output_jinja= */ "<s>system\nYou are a helpful assistant</s>\n<s>user\nHello</s>\n<s>assistant\nHi there</s>\n<s>user\nWho are you</s>\n<s>assistant\n I am an assistant </s>\n<s>user\nAnother question</s>\n<s>assistant\n",
340 /* .bos_token= */ "<s>",
341 /* .eos_token= */ "</s>",
342 },
343 {
344 /* .name= */ "google/gemma-7b-it",
345 /* .template_str= */ "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\\n' + message['content'] | trim + '<end_of_turn>\\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\\n'}}{% endif %}",
346 /* .expected_output= */ "<start_of_turn>user\nYou are a helpful assistant\n\nHello<end_of_turn>\n<start_of_turn>model\nHi there<end_of_turn>\n<start_of_turn>user\nWho are you<end_of_turn>\n<start_of_turn>model\nI am an assistant<end_of_turn>\n<start_of_turn>user\nAnother question<end_of_turn>\n<start_of_turn>model\n",
347 /* .expected_output_jinja= */ "<start_of_turn>user\nYou are a helpful assistant\nHello<end_of_turn>\n<start_of_turn>model\nHi there<end_of_turn>\n<start_of_turn>user\nWho are you<end_of_turn>\n<start_of_turn>model\nI am an assistant<end_of_turn>\n<start_of_turn>user\nAnother question<end_of_turn>\n<start_of_turn>model\n",
348 },
349 {
350 /* .name= */ "OrionStarAI/Orion-14B-Chat",
351 /* .template_str= */ "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
352 /* .expected_output= */ "Human: You are a helpful assistant\n\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s> I am an assistant </s>Human: Another question\n\nAssistant: </s>",
353 /* .expected_output_jinja= */ "Human: You are a helpful assistant\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s> I am an assistant </s>Human: Another question\n\nAssistant: </s>",
354 /* .bos_token= */ "",
355 /* .eos_token= */ "</s>",
356 },
357 {
358 /* .name= */ "openchat/openchat-3.5-0106",
359 // The included chat_template differs from the author's suggestions here: https://huggingface.co/openchat/openchat_3.5/discussions/5#65448109b4a3f3a2f486fd9d
360 // So we match against the included template but implement the suggested version.
361 /* .template_str= */ "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}",
362 /* .expected_output= */ "You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:",
363 /* .expected_output_jinja= */ "GPT4 Correct System: You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:",
364 },
365 {
366 /* .name= */ "deepseek-ai/deepseek-coder-33b-instruct",
367 /* .template_str= */ "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
368 /* .expected_output= */ "You are a helpful assistant### Instruction:\nHello\n### Response:\nHi there\n<|EOT|>\n### Instruction:\nWho are you\n### Response:\n I am an assistant \n<|EOT|>\n### Instruction:\nAnother question\n### Response:\n",
369 /* .expected_output_jinja= */ "",
370 },
371 {
372 /* .name= */ "eachadea/vicuna-13b-1.1",
373 // No template included in tokenizer_config.json, so this template likely needs to be manually set.
374 /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '' + message['content'] + '\n\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}",
375 /* .expected_output= */ "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT: I am an assistant </s>\nUSER: Another question\nASSISTANT:",
376 /* .expected_output_jinja= */ "",
377 /* .bos_token= */ "",
378 /* .eos_token= */ "",
379 },
380 {
381 /* .name= */ "Orca-Vicuna",
382 // No template included in tokenizer_config.json, so this template likely needs to be manually set.
383 /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}",
384 /* .expected_output= */ "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT: I am an assistant </s>\nUSER: Another question\nASSISTANT:",
385 /* .expected_output_jinja= */ "",
386 /* .bos_token= */ "",
387 /* .eos_token= */ "",
388 },
389 {
390 /* .name= */ "CohereForAI/c4ai-command-r-plus",
391 /* .template_str= */ "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
392 /* .expected_output= */ "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
393 /* .expected_output_jinja= */ "",
394 },
395 {
396 /* .name= */ "Llama-3",
397 /* .template_str= */ "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
398 /* .expected_output= */ "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
399 /* .expected_output_jinja= */ "",
400 },
401 {
402 /* .name= */ "Phi-3-mini",
403 /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
404 /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
405 /* .expected_output_jinja= */ "<|user|>\nYou are a helpful assistant\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
406 },
407 {
408 /* .name= */ "Phi-3-small",
409 /* .template_str= */ "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
410 /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
411 /* .expected_output_jinja= */ "",
412 },
413 {
414 /* .name= */ "Phi-3-medium",
415 /* .template_str= */ "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
416 /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
417 /* .expected_output_jinja= */ "<|user|>\nYou are a helpful assistant\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
418 },
419 {
420 /* .name= */ "Phi-3-vision",
421 /* .template_str= */ "{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %}",
422 /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
423 /* .expected_output_jinja= */ "",
424 /* .bos_token= */ "",
425 /* .eos_token= */ "",
426 },
427 {
428 /* .name= */ "ChatGLM3",
429 /* .template_str= */ "{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
430 /* .expected_output= */ "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n I am an assistant <|user|>\n Another question<|assistant|>",
431 /* .expected_output_jinja= */ "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n I am an assistant <|user|>\n Another question<|assistant|>",
432 },
433 {
434 /* .name= */ "ChatGLM4",
435 /* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"),
436 /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>\n",
437 /* .expected_output_jinja= */ "",
438 /* .bos_token= */ "",
439 /* .eos_token= */ "",
440 },
441 {
442 /* .name= */ "GLMEdge",
443 /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
444 /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
445 /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
446 /* .bos_token= */ "",
447 /* .eos_token= */ "",
448 },
449 {
450 /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
451 /* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
452 /* .expected_output= */ U8C("You are a helpful assistant<用户>Hello<AI>Hi there<用户>Who are you<AI>I am an assistant<用户>Another question<AI>"),
453 /* .expected_output_jinja= */ "",
454 /* .bos_token= */ "",
455 /* .eos_token= */ "",
456 },
457 {
458 /* .name= */ "DeepSeek-V2",
459 /* .template_str= */ "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
460 /* .expected_output= */ U8C("You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<|end▁of▁sentence|>User: Who are you\n\nAssistant: I am an assistant <|end▁of▁sentence|>User: Another question\n\nAssistant:"),
461 /* .expected_output_jinja= */ "",
462 /* .bos_token= */ "",
463 /* .eos_token= */ "<|end▁of▁sentence|>",
464 },
465 {
466 /* .name= */ "ibm-granite/granite-3.0-8b-instruct",
467 /* .template_str= */ "{%- if tools %}\n {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n {%- for tool in tools %}\n {{- tool | tojson(indent=4) }}\n {%- if not loop.last %}\n {{- '\n\n' }}\n {%- endif %}\n {%- endfor %}\n {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {{- '<|start_of_role|>system<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'user' %}\n {{- '<|start_of_role|>user<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'assistant' %}\n {{- '<|start_of_role|>assistant<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'assistant_tool_call' %}\n {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'tool_response' %}\n {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- endif %}\n {%- if loop.last and add_generation_prompt %}\n {{- '<|start_of_role|>assistant<|end_of_role|>' }}\n {%- endif %}\n{%- endfor %}",
468 /* .expected_output= */ "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>",
469 /* .expected_output_jinja= */ "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>",
470 },
471 {
472 /* .name= */ "mistralai/Mistral-7B-Instruct-v0.2 (mistralai 'v1' template with a system prompt)",
473 /* .template_str= */ "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n",
474 /* .expected_output= */ " [INST] You are a helpful assistant\n\nHello [/INST] Hi there</s> [INST] Who are you [/INST] I am an assistant </s> [INST] Another question [/INST]",
475 /* .expected_output_jinja= */ " [INST] You are a helpful assistant\n\nHello [/INST] Hi there</s> [INST] Who are you [/INST] I am an assistant </s> [INST] Another question [/INST]",
476 /* .bos_token= */ "",
477 /* .eos_token= */ "</s>",
478 },
479 {
480 /* .name= */ "Mistral-Large-Instruct-2407 (mistralai 'v3' template; modified to have system prompt at start)",
481 /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if tools is not none and (message == user_messages[-1]) %}\n {{- \"[AVAILABLE_TOOLS] [\" }}\n {%- for tool in tools %}\n {%- set tool = tool.function %}\n {{- '{\"type\": \"function\", \"function\": {' }}\n {%- for key, val in tool.items() if key != \"return\" %}\n {%- if val is string %}\n {{- '\"' + key + '\": \"' + val + '\"' }}\n {%- else %}\n {{- '\"' + key + '\": ' + val|tojson }}\n {%- endif %}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"}}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"[/AVAILABLE_TOOLS]\" }}\n {%- endif %}\n {%- if loop.last and system_message is defined %}\n {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n {%- else %}\n {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n {%- endif %}\n {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n {{- \"[TOOL_CALLS] [\" }}\n {%- for tool_call in message.tool_calls %}\n {%- set out = tool_call.function|tojson %}\n {{- out[:-1] }}\n {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" + eos_token }}\n {%- endif %}\n {%- endfor %}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- \" \" + message[\"content\"]|trim + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n",
482 /* .expected_output= */ "[INST] You are a helpful assistant\n\nHello[/INST] Hi there</s>[INST] Who are you[/INST] I am an assistant</s>[INST] Another question[/INST]",
483 /* .expected_output_jinja= */ "[INST] Hello[/INST] Hi there</s>[INST] Who are you[/INST] I am an assistant</s>[INST] You are a helpful assistant\n\nAnother question[/INST]",
484 /* .bos_token= */ "",
485 /* .eos_token= */ "</s>",
486 },
487 {
488 /* .name= */ "Mistral-Nemo-Instruct-2407 (mistralai 'v3-tekken' template; modified to have system prompt at start)",
489 /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if tools is not none and (message == user_messages[-1]) %}\n {{- \"[AVAILABLE_TOOLS][\" }}\n {%- for tool in tools %}\n {%- set tool = tool.function %}\n {{- '{\"type\": \"function\", \"function\": {' }}\n {%- for key, val in tool.items() if key != \"return\" %}\n {%- if val is string %}\n {{- '\"' + key + '\": \"' + val + '\"' }}\n {%- else %}\n {{- '\"' + key + '\": ' + val|tojson }}\n {%- endif %}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"}}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"[/AVAILABLE_TOOLS]\" }}\n {%- endif %}\n {%- if loop.last and system_message is defined %}\n {{- \"[INST]\" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n {%- else %}\n {{- \"[INST]\" + message[\"content\"] + \"[/INST]\" }}\n {%- endif %}\n {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n {{- \"[TOOL_CALLS][\" }}\n {%- for tool_call in message.tool_calls %}\n {%- set out = tool_call.function|tojson %}\n {{- out[:-1] }}\n {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" + eos_token }}\n {%- endif %}\n {%- endfor %}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- message[\"content\"] + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '[TOOL_RESULTS]{\"content\": ' + content|string + \", \" }}\n {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n",
490 /* .expected_output= */ "[INST]You are a helpful assistant\n\nHello[/INST]Hi there</s>[INST]Who are you[/INST] I am an assistant </s>[INST]Another question[/INST]",
491 /* .expected_output_jinja= */ "[INST]Hello[/INST]Hi there</s>[INST]Who are you[/INST] I am an assistant </s>[INST]You are a helpful assistant\n\nAnother question[/INST]",
492 /* .bos_token= */ "",
493 /* .eos_token= */ "</s>",
494 },
495 {
496 /* .name= */ "mistralai/Mistral-Large-Instruct-2411 (mistralai 'v7' template)",
497 /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT] ' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}",
498 /* .expected_output= */ "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT][INST] Hello[/INST] Hi there</s>[INST] Who are you[/INST] I am an assistant </s>[INST] Another question[/INST]",
499 /* .expected_output_jinja= */ "",
500 /* .bos_token= */ "",
501 /* .eos_token= */ "</s>",
502 },
503 {
504 /* .name= */ "ai-sage/GigaChat-20B-A3B-instruct",
505 /* .template_str= */ "{% if messages[0]['role'] == 'system' -%}\n {%- set loop_messages = messages[1:] -%}\n {%- set system_message = bos_token + messages[0]['content'] + additional_special_tokens[1] -%}\n{%- else -%}\n {%- set loop_messages = messages -%}\n {%- set system_message = bos_token + '' -%}\n{%- endif -%}\n{%- for message in loop_messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n \n {%- if loop.index0 == 0 -%}\n {{ system_message -}}\n {%- endif -%}\n {%- if message['role'] == 'user' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {{ 'available functions' + additional_special_tokens[0] + additional_special_tokens[2] + additional_special_tokens[3] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if message['role'] == 'assistant' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if loop.last and add_generation_prompt -%}\n {{ 'assistant' + additional_special_tokens[0] -}}\n {%- endif -%}\n{%- endfor %}",
506 /* .expected_output= */ "<s>You are a helpful assistant<|message_sep|>user<|role_sep|>Hello<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>Hi there<|message_sep|>user<|role_sep|>Who are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|> I am an assistant <|message_sep|>user<|role_sep|>Another question<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>",
507 /* .expected_output_jinja= */ "",
508 /* .bos_token= */ "",
509 /* .eos_token= */ "",
510 /* .supported_with_jinja= */ false, // Requires additional_special_tokens as extra context
511 },
512 {
513 /* .name= */ "Infinigence/Megrez-3B-Instruct",
514 /* .template_str= */ U8C("{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"),
515 /* .expected_output= */ "<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|> I am an assistant <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>",
516 /* .expected_output_jinja= */ "",
517 /* .bos_token= */ "",
518 /* .eos_token= */ "",
519 },
520 {
521 /* .name= */ "phi-4",
522 /* .template_str= */ "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|><|im_start|>assistant<|im_sep|>'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}",
523 /* .expected_output= */ "<|im_start|>system<|im_sep|>You are a helpful assistant<|im_end|><|im_start|>user<|im_sep|>Hello<|im_end|><|im_start|>assistant<|im_sep|>Hi there<|im_end|><|im_start|>user<|im_sep|>Who are you<|im_end|><|im_start|>assistant<|im_sep|> I am an assistant <|im_end|><|im_start|>user<|im_sep|>Another question<|im_end|><|im_start|>assistant<|im_sep|>",
524 /* .expected_output_jinja= */ "",
525 /* .bos_token= */ "",
526 /* .eos_token= */ "",
527 },
528 {
529 /* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct",
530 /* .template_str= */ "<s>{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n {%- set name = tool.function.name %}\n {%- set description = tool.function.description|default('') %}\n {%- set parameters = tool.function.parameters|tojson %}\n {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n {{- tools_prefix }}\n {%- for tool in tools %}\n {{- __render_tool(tool) }}\n {%- endfor %}\n {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n {{- names.assistant }}\n {%- set call = message['function_call'] %}\n {%- if call %}\n {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n {%- else %}\n {{- ' ' + message.content + '\\n\\n' }}\n {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {{- __render_user_message(message) }}\n {%- endif %}\n {%- if message.role == 'assistant' and not loop.last %}\n {{- __render_assistant_message(message) }}\n {%- endif %}\n {%- if message.role == 'tool' %}\n {{- __render_tool_message(message) }}\n {%- endif %}\n {%- if loop.last %}\n {{- ' Ассистент:[SEP]' }}\n {%- endif %}\n{%- endfor %}\n",
531 /* .expected_output= */ " Пользователь: Hello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
532 /* .expected_output_jinja= */ "<s> Пользователь: You are a helpful assistant\nHello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
533 /* .bos_token= */ "<s>",
534 /* .eos_token= */ "",
535 },
536 {
537 /* .name= */ "inclusionAI/Ling-lite",
538 /* .template_str */ "{% for message in messages %}{% set role = message['role'] | lower %}{% if role == 'user' %}{% set role = 'HUMAN' %}{% endif %}{% set role = role | upper %}{{ '<role>' + role + '</role>' + message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ '<role>ASSISTANT</role>' }}{% endif %}",
539 /* .expected_output= */ "<role>SYSTEM</role>You are a helpful assistant<role>HUMAN</role>Hello<role>ASSISTANT</role>Hi there<role>HUMAN</role>Who are you<role>ASSISTANT</role> I am an assistant <role>HUMAN</role>Another question<role>ASSISTANT</role>",
540 /* .expected_output_jinja= */ "",
541 /* .bos_token= */ "",
542 /* .eos_token= */ "",
543 },
544 {
545 /* .name= */ "ByteDance-Seed/Seed-OSS-36B-Instruct",
546 /* .template_str */ "{# <seed:bos> #}{%- for message in messages %}{%- if message.role in [\"user\", \"system\"] %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- elif message.role == \"assistant\" %}{{ bos_token + message.role }}{%- if message.content is defined and message.content is string and message.content|trim|length > 0 %}{{ \"\\n\" + message.content|trim + eos_token }}{%- endif %}{%- else %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- endif %}{%- endfor %}{%- if add_generation_prompt %}{{ bos_token + \"assistant\\n\" }}{%- endif %}",
547 /* .expected_output= */ "<seed:bos>system\nYou are a helpful assistant<seed:eos><seed:bos>user\nHello<seed:eos><seed:bos>assistant\nHi there<seed:eos><seed:bos>user\nWho are you<seed:eos><seed:bos>assistant\nI am an assistant<seed:eos><seed:bos>user\nAnother question<seed:eos><seed:bos>assistant\n",
548 /* .expected_output_jinja= */ "<seed:bos>system\nYou are a helpful assistant<seed:eos><seed:bos>user\nHello<seed:eos><seed:bos>assistant\nHi there<seed:eos><seed:bos>user\nWho are you<seed:eos><seed:bos>assistant\nI am an assistant<seed:eos><seed:bos>user\nAnother question<seed:eos><seed:bos>assistant\n",
549 /* .bos_token= */ "<seed:bos>",
550 /* .eos_token= */ "<seed:eos>",
551 }
552 };
553 std::vector<char> formatted_chat(1024);
554 int32_t res;
555
556 // list all supported templates
557 std::vector<const char *> supported_tmpl;
558 res = llama_chat_builtin_templates(nullptr, 0);
559 assert(res > 0);
560 supported_tmpl.resize(res);
561 res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size());
562 std::cout << "Built-in chat templates:\n";
563 for (auto tmpl : supported_tmpl) {
564 std::cout << " " << tmpl << "\n";
565 }
566
567 // test invalid chat template
568 res = llama_chat_apply_template("INVALID TEMPLATE", conversation.data(), conversation.size(), true, formatted_chat.data(), formatted_chat.size());
569 assert(res < 0);
570 const auto add_generation_prompt = true;
571
572 for (const auto & test_case : test_cases) {
573 std::cout << "\n\n=== " << test_case.name << " ===\n\n";
574 formatted_chat.resize(1024);
575 res = llama_chat_apply_template(
576 test_case.template_str.c_str(),
577 conversation.data(),
578 conversation.size(),
579 add_generation_prompt,
580 formatted_chat.data(),
581 formatted_chat.size()
582 );
583 formatted_chat.resize(res);
584 std::string output(formatted_chat.data(), formatted_chat.size());
585 if (output != test_case.expected_output) {
586 std::cout << "Expected:\n" << test_case.expected_output << "\n";
587 std::cout << "-------------------------\n";
588 std::cout << "Actual:\n" << output << "\n";
589 std::cout.flush();
590 assert(output == test_case.expected_output);
591 }
592 }
593
594 std::vector<common_chat_msg> messages;
595 for (const auto & msg : conversation) {
596 messages.push_back(simple_msg(msg.role, msg.content));
597 }
598 for (const auto & test_case : test_cases) {
599 if (!test_case.supported_with_jinja) {
600 continue;
601 }
602 std::cout << "\n\n=== " << test_case.name << " (jinja) ===\n\n";
603 try {
604 auto output = format_using_common(
605 test_case.template_str,
606 test_case.bos_token,
607 test_case.eos_token,
608 messages);
609 auto expected_output = normalize_newlines(test_case.expected_output_jinja.empty() ? test_case.expected_output : test_case.expected_output_jinja);
610 if (output != expected_output) {
611 std::cout << "Template:```\n" << test_case.template_str << "\n```";
612 std::cout << "-------------------------\n";
613 std::cout << "Expected:```\n" << expected_output << "\n```";
614 std::cout << "-------------------------\n";
615 std::cout << "Actual:```\n" << output << "\n```";
616 std::cout.flush();
617 assert(output == expected_output);
618 }
619 } catch (const std::exception & e) {
620 std::cerr << "ERROR: " << e.what() << "\n";
621 assert(false);
622 }
623 }
624
625 // TODO: llama_chat_format_single will be deprecated, remove these tests later
626
627 // test llama_chat_format_single for system message
628 std::cout << "\n\n=== llama_chat_format_single (system message) ===\n\n";
629 std::vector<common_chat_msg> chat2;
630 auto sys_msg = simple_msg("system", "You are a helpful assistant");
631
632 auto fmt_sys = [&](std::string tmpl_str) {
633 auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str);
634 auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false);
635 std::cout << "fmt_sys(" << tmpl_str << ") : " << output << "\n";
636 std::cout << "-------------------------\n";
637 return output;
638 };
639 assert(fmt_sys("chatml") == "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n");
640 assert(fmt_sys("mistral-v1") == " [INST] You are a helpful assistant\n\n");
641 assert(fmt_sys("mistral-v3") == "[INST] You are a helpful assistant\n\n");
642 assert(fmt_sys("mistral-v3-tekken") == "[INST]You are a helpful assistant\n\n");
643 assert(fmt_sys("mistral-v7") == "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT]");
644 assert(fmt_sys("llama2") == "[INST] You are a helpful assistant\n");
645 assert(fmt_sys("llama2-sys") == "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\n");
646 assert(fmt_sys("mistral") == "[INST] You are a helpful assistant\n"); // for old pre-v1 templates
647 assert(fmt_sys("gemma") == ""); // for gemma, system message is merged with user message
648 assert(fmt_sys("llama3") == "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|>");
649 assert(fmt_sys("gigachat") == "<s>You are a helpful assistant<|message_sep|>");
650
651
652 // test llama_chat_format_single for user message
653 std::cout << "\n\n=== llama_chat_format_single (user message) ===\n\n";
654 chat2.push_back(simple_msg("system", "You are a helpful assistant"));
655 chat2.push_back(simple_msg("user", "Hello"));
656 chat2.push_back(simple_msg("assistant", "I am assistant"));
657 auto new_msg = simple_msg("user", "How are you");
658
659 auto fmt_single = [&](const std::string & tmpl_str) {
660 auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str());
661 auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false);
662 std::cout << "fmt_single(" << tmpl_str << ") : " << output << "\n";
663 std::cout << "-------------------------\n";
664 return output;
665 };
666 assert(fmt_single("chatml") == "\n<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n");
667 assert(fmt_single("mistral-v1") == " [INST] How are you [/INST]");
668 assert(fmt_single("mistral-v3") == "[INST] How are you[/INST]");
669 assert(fmt_single("mistral-v3-tekken") == "[INST]How are you[/INST]");
670 assert(fmt_single("mistral-v7") == "[INST] How are you[/INST]");
671 assert(fmt_single("llama2") == "[INST] How are you [/INST]");
672 assert(fmt_single("mistral") == "[INST] How are you [/INST]"); // for old pre-v1 templates
673 assert(fmt_single("gemma") == "\n<start_of_turn>user\nHow are you<end_of_turn>\n<start_of_turn>model\n");
674 assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n");
675 // assert(fmt_single("gigachat") == "user<|role_sep|>How are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>");
676
677 std::cout << "\nOK: All tests passed successfully.\n";
678
679 return 0;
680}