1#include "llama-chat.h"
2
3#include "llama.h"
4
5#include <map>
6#include <sstream>
7#include <algorithm>
8
9#if __cplusplus >= 202000L
10 #define LU8(x) (const char*)(u8##x)
11#else
12 #define LU8(x) u8##x
13#endif
14
15// trim whitespace from the beginning and end of a string
16static std::string trim(const std::string & str) {
17 size_t start = 0;
18 size_t end = str.size();
19 while (start < end && isspace(static_cast<unsigned char>(str[start]))) {
20 start += 1;
21 }
22 while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) {
23 end -= 1;
24 }
25 return str.substr(start, end - start);
26}
27
28static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
29 { "chatml", LLM_CHAT_TEMPLATE_CHATML },
30 { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
31 { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
32 { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
33 { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
34 { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
35 { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
36 { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
37 { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
38 { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
39 { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
40 { "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
41 { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
42 { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
43 { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
44 { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
45 { "orion", LLM_CHAT_TEMPLATE_ORION },
46 { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
47 { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
48 { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
49 { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
50 { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
51 { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
52 { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
53 { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
54 { "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 },
55 { "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 },
56 { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
57 { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
58 { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
59 { "exaone4", LLM_CHAT_TEMPLATE_EXAONE_4 },
60 { "exaone-moe", LLM_CHAT_TEMPLATE_EXAONE_MOE },
61 { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
62 { "granite", LLM_CHAT_TEMPLATE_GRANITE },
63 { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
64 { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
65 { "yandex", LLM_CHAT_TEMPLATE_YANDEX },
66 { "bailing", LLM_CHAT_TEMPLATE_BAILING },
67 { "bailing-think", LLM_CHAT_TEMPLATE_BAILING_THINK },
68 { "bailing2", LLM_CHAT_TEMPLATE_BAILING2 },
69 { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
70 { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
71 { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
72 { "gpt-oss", LLM_CHAT_TEMPLATE_OPENAI_MOE },
73 { "hunyuan-dense", LLM_CHAT_TEMPLATE_HUNYUAN_DENSE },
74 { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
75 { "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
76 { "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
77 { "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED },
78 { "solar-open", LLM_CHAT_TEMPLATE_SOLAR_OPEN },
79};
80
81llm_chat_template llm_chat_template_from_str(const std::string & name) {
82 return LLM_CHAT_TEMPLATES.at(name);
83}
84
85llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
86 try {
87 return llm_chat_template_from_str(tmpl);
88 } catch (const std::out_of_range &) {
89 // ignore
90 }
91
92 auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
93 return tmpl.find(haystack) != std::string::npos;
94 };
95 if (tmpl_contains("<|im_start|>")) {
96 return tmpl_contains("<|im_sep|>")
97 ? LLM_CHAT_TEMPLATE_PHI_4
98 : tmpl_contains("<end_of_utterance>")
99 ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml
100 : LLM_CHAT_TEMPLATE_CHATML;
101 } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
102 if (tmpl_contains("[SYSTEM_PROMPT]")) {
103 return LLM_CHAT_TEMPLATE_MISTRAL_V7;
104 } else if (
105 // catches official 'v1' template
106 tmpl_contains("' [INST] ' + system_message")
107 // catches official 'v3' and 'v3-tekken' templates
108 || tmpl_contains("[AVAILABLE_TOOLS]")
109 ) {
110 // Official mistral 'v1', 'v3' and 'v3-tekken' templates
111 // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
112 // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
113 if (tmpl_contains(" [INST]")) {
114 return LLM_CHAT_TEMPLATE_MISTRAL_V1;
115 } else if (tmpl_contains("\"[INST]\"")) {
116 return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
117 }
118 return LLM_CHAT_TEMPLATE_MISTRAL_V3;
119 } else {
120 // llama2 template and its variants
121 // [variant] support system message
122 // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
123 bool support_system_message = tmpl_contains("<<SYS>>");
124 bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
125 bool strip_message = tmpl_contains("content.strip()");
126 if (strip_message) {
127 return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
128 } else if (add_bos_inside_history) {
129 return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
130 } else if (support_system_message) {
131 return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
132 } else {
133 return LLM_CHAT_TEMPLATE_LLAMA_2;
134 }
135 }
136 } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
137 return LLM_CHAT_TEMPLATE_PHI_3;
138 } else if (tmpl_contains("[gMASK]<sop>")) {
139 return LLM_CHAT_TEMPLATE_CHATGLM_4;
140 } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
141 if (tmpl_contains("<|tool_declare|>")) {
142 return LLM_CHAT_TEMPLATE_EXAONE_MOE;
143 }
144 return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
145 } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) {
146 return LLM_CHAT_TEMPLATE_GLMEDGE;
147 } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
148 return LLM_CHAT_TEMPLATE_ZEPHYR;
149 } else if (tmpl_contains("bos_token + message['role']")) {
150 return LLM_CHAT_TEMPLATE_MONARCH;
151 } else if (tmpl_contains("<start_of_turn>")) {
152 return LLM_CHAT_TEMPLATE_GEMMA;
153 } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
154 // OrionStarAI/Orion-14B-Chat
155 return LLM_CHAT_TEMPLATE_ORION;
156 } else if (tmpl_contains("GPT4 Correct ")) {
157 // openchat/openchat-3.5-0106
158 return LLM_CHAT_TEMPLATE_OPENCHAT;
159 } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
160 // eachadea/vicuna-13b-1.1 (and Orca variant)
161 if (tmpl_contains("SYSTEM: ")) {
162 return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
163 }
164 return LLM_CHAT_TEMPLATE_VICUNA;
165 } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
166 // deepseek-ai/deepseek-coder-33b-instruct
167 return LLM_CHAT_TEMPLATE_DEEPSEEK;
168 } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
169 // CohereForAI/c4ai-command-r-plus
170 return LLM_CHAT_TEMPLATE_COMMAND_R;
171 } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
172 return LLM_CHAT_TEMPLATE_LLAMA_3;
173 } else if (tmpl_contains("[gMASK]sop")) {
174 // chatglm3-6b
175 return LLM_CHAT_TEMPLATE_CHATGLM_3;
176 } else if (tmpl_contains(LU8("<用户>"))) {
177 // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
178 return LLM_CHAT_TEMPLATE_MINICPM;
179 } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
180 return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
181 } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
182 return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
183 } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
184 if (tmpl_contains("[|tool|]")) {
185 return LLM_CHAT_TEMPLATE_EXAONE_4;
186 }
187 // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
188 // EXAONE-3.0-7.8B-Instruct
189 return LLM_CHAT_TEMPLATE_EXAONE_3;
190 } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) {
191 return LLM_CHAT_TEMPLATE_RWKV_WORLD;
192 } else if (tmpl_contains("<|start_of_role|>")) {
193 return LLM_CHAT_TEMPLATE_GRANITE;
194 } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
195 return LLM_CHAT_TEMPLATE_GIGACHAT;
196 } else if (tmpl_contains("<|role_start|>")) {
197 return LLM_CHAT_TEMPLATE_MEGREZ;
198 } else if (tmpl_contains(" Ассистент:")) {
199 return LLM_CHAT_TEMPLATE_YANDEX;
200 } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
201 return LLM_CHAT_TEMPLATE_BAILING;
202 } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) {
203 return LLM_CHAT_TEMPLATE_BAILING_THINK;
204 } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) {
205 return LLM_CHAT_TEMPLATE_BAILING2;
206 } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
207 return LLM_CHAT_TEMPLATE_LLAMA4;
208 } else if (tmpl_contains("<|endofuserprompt|>")) {
209 return LLM_CHAT_TEMPLATE_DOTS1;
210 } else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) {
211 return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
212 } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) {
213 return LLM_CHAT_TEMPLATE_OPENAI_MOE;
214 } else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) {
215 return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
216 } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
217 return LLM_CHAT_TEMPLATE_KIMI_K2;
218 } else if (tmpl_contains("<seed:bos>")) {
219 return LLM_CHAT_TEMPLATE_SEED_OSS;
220 } else if (tmpl_contains("'Assistant: ' + message['content'] + '<|separator|>")) {
221 return LLM_CHAT_TEMPLATE_GROK_2;
222 } else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
223 return LLM_CHAT_TEMPLATE_PANGU_EMBED;
224 } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
225 return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
226 }
227 return LLM_CHAT_TEMPLATE_UNKNOWN;
228}
229
230// Simple version of "llama_apply_chat_template" that only works with strings
231// This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
232int32_t llm_chat_apply_template(
233 llm_chat_template tmpl,
234 const std::vector<const llama_chat_message *> & chat,
235 std::string & dest, bool add_ass) {
236 // Taken from the research: https://github.com/ggml-org/llama.cpp/issues/5527
237 std::stringstream ss;
238 if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
239 // chatml template
240 for (auto message : chat) {
241 ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
242 }
243 if (add_ass) {
244 ss << "<|im_start|>assistant\n";
245 }
246 } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
247 // Official mistral 'v7' template
248 // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
249 // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
250 const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
251 for (auto message : chat) {
252 std::string role(message->role);
253 std::string content(message->content);
254 if (role == "system") {
255 ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
256 } else if (role == "user") {
257 ss << "[INST]" << trailing_space << content << "[/INST]";
258 } else {
259 ss << trailing_space << content << "</s>";
260 }
261 }
262 } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
263 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
264 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
265 // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
266 // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
267 std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
268 std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
269 bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
270 bool is_inside_turn = false;
271 for (auto message : chat) {
272 if (!is_inside_turn) {
273 ss << leading_space << "[INST]" << trailing_space;
274 is_inside_turn = true;
275 }
276 std::string role(message->role);
277 std::string content(message->content);
278 if (role == "system") {
279 ss << content << "\n\n";
280 } else if (role == "user") {
281 ss << content << leading_space << "[/INST]";
282 } else {
283 ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
284 is_inside_turn = false;
285 }
286 }
287 } else if (
288 tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
289 || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
290 || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
291 || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
292 // llama2 template and its variants
293 // [variant] support system message
294 // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
295 bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
296 // [variant] add BOS inside history
297 bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
298 // [variant] trim spaces from the input message
299 bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
300 // construct the prompt
301 bool is_inside_turn = true; // skip BOS at the beginning
302 ss << "[INST] ";
303 for (auto message : chat) {
304 std::string content = strip_message ? trim(message->content) : message->content;
305 std::string role(message->role);
306 if (!is_inside_turn) {
307 is_inside_turn = true;
308 ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
309 }
310 if (role == "system") {
311 if (support_system_message) {
312 ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
313 } else {
314 // if the model does not support system message, we still include it in the first message, but without <<SYS>>
315 ss << content << "\n";
316 }
317 } else if (role == "user") {
318 ss << content << " [/INST]";
319 } else {
320 ss << content << "</s>";
321 is_inside_turn = false;
322 }
323 }
324 } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
325 // Phi 3
326 for (auto message : chat) {
327 std::string role(message->role);
328 ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
329 }
330 if (add_ass) {
331 ss << "<|assistant|>\n";
332 }
333 } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
334 // chatml template
335 for (auto message : chat) {
336 ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
337 }
338 if (add_ass) {
339 ss << "<|im_start|>assistant<|im_sep|>";
340 }
341 } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
342 // Falcon 3
343 for (auto message : chat) {
344 std::string role(message->role);
345 ss << "<|" << role << "|>\n" << message->content << "\n";
346 }
347 if (add_ass) {
348 ss << "<|assistant|>\n";
349 }
350 } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
351 // zephyr template
352 for (auto message : chat) {
353 ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
354 }
355 if (add_ass) {
356 ss << "<|assistant|>\n";
357 }
358 } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
359 // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
360 for (auto message : chat) {
361 std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
362 ss << bos << message->role << "\n" << message->content << "</s>\n";
363 }
364 if (add_ass) {
365 ss << "<s>assistant\n";
366 }
367 } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
368 // google/gemma-7b-it
369 std::string system_prompt = "";
370 for (auto message : chat) {
371 std::string role(message->role);
372 if (role == "system") {
373 // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
374 system_prompt += trim(message->content);
375 continue;
376 }
377 // in gemma, "assistant" is "model"
378 role = role == "assistant" ? "model" : message->role;
379 ss << "<start_of_turn>" << role << "\n";
380 if (!system_prompt.empty() && role != "model") {
381 ss << system_prompt << "\n\n";
382 system_prompt = "";
383 }
384 ss << trim(message->content) << "<end_of_turn>\n";
385 }
386 if (add_ass) {
387 ss << "<start_of_turn>model\n";
388 }
389 } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
390 // OrionStarAI/Orion-14B-Chat
391 std::string system_prompt = "";
392 for (auto message : chat) {
393 std::string role(message->role);
394 if (role == "system") {
395 // there is no system message support, we will merge it with user prompt
396 system_prompt += message->content;
397 continue;
398 } else if (role == "user") {
399 ss << "Human: ";
400 if (!system_prompt.empty()) {
401 ss << system_prompt << "\n\n";
402 system_prompt = "";
403 }
404 ss << message->content << "\n\nAssistant: </s>";
405 } else {
406 ss << message->content << "</s>";
407 }
408 }
409 } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
410 // openchat/openchat-3.5-0106,
411 for (auto message : chat) {
412 std::string role(message->role);
413 if (role == "system") {
414 ss << message->content << "<|end_of_turn|>";
415 } else {
416 role[0] = toupper(role[0]);
417 ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
418 }
419 }
420 if (add_ass) {
421 ss << "GPT4 Correct Assistant:";
422 }
423 } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
424 // eachadea/vicuna-13b-1.1 (and Orca variant)
425 for (auto message : chat) {
426 std::string role(message->role);
427 if (role == "system") {
428 // Orca-Vicuna variant uses a system prefix
429 if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
430 ss << "SYSTEM: " << message->content << "\n";
431 } else {
432 ss << message->content << "\n\n";
433 }
434 } else if (role == "user") {
435 ss << "USER: " << message->content << "\n";
436 } else if (role == "assistant") {
437 ss << "ASSISTANT: " << message->content << "</s>\n";
438 }
439 }
440 if (add_ass) {
441 ss << "ASSISTANT:";
442 }
443 } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
444 // deepseek-ai/deepseek-coder-33b-instruct
445 for (auto message : chat) {
446 std::string role(message->role);
447 if (role == "system") {
448 ss << message->content;
449 } else if (role == "user") {
450 ss << "### Instruction:\n" << message->content << "\n";
451 } else if (role == "assistant") {
452 ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
453 }
454 }
455 if (add_ass) {
456 ss << "### Response:\n";
457 }
458 } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
459 // CohereForAI/c4ai-command-r-plus
460 for (auto message : chat) {
461 std::string role(message->role);
462 if (role == "system") {
463 ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
464 } else if (role == "user") {
465 ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
466 } else if (role == "assistant") {
467 ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
468 }
469 }
470 if (add_ass) {
471 ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
472 }
473 } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
474 // Llama 3
475 for (auto message : chat) {
476 std::string role(message->role);
477 ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
478 }
479 if (add_ass) {
480 ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
481 }
482 } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
483 // chatglm3-6b
484 ss << "[gMASK]" << "sop";
485 for (auto message : chat) {
486 std::string role(message->role);
487 ss << "<|" << role << "|>" << "\n " << message->content;
488 }
489 if (add_ass) {
490 ss << "<|assistant|>";
491 }
492 } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
493 ss << "[gMASK]" << "<sop>";
494 for (auto message : chat) {
495 std::string role(message->role);
496 ss << "<|" << role << "|>" << "\n" << message->content;
497 }
498 if (add_ass) {
499 ss << "<|assistant|>\n";
500 }
501 } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
502 for (auto message : chat) {
503 std::string role(message->role);
504 ss << "<|" << role << "|>" << "\n" << message->content;
505 }
506 if (add_ass) {
507 ss << "<|assistant|>";
508 }
509 } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
510 // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
511 for (auto message : chat) {
512 std::string role(message->role);
513 if (role == "user") {
514 ss << LU8("<用户>");
515 ss << trim(message->content);
516 ss << "<AI>";
517 } else {
518 ss << trim(message->content);
519 }
520 }
521 } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
522 // DeepSeek-V2
523 for (auto message : chat) {
524 std::string role(message->role);
525 if (role == "system") {
526 ss << message->content << "\n\n";
527 } else if (role == "user") {
528 ss << "User: " << message->content << "\n\n";
529 } else if (role == "assistant") {
530 ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
531 }
532 }
533 if (add_ass) {
534 ss << "Assistant:";
535 }
536 } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
537 // DeepSeek-V3
538 for (auto message : chat) {
539 std::string role(message->role);
540 if (role == "system") {
541 ss << message->content << "\n\n";
542 } else if (role == "user") {
543 ss << LU8("<|User|>") << message->content;
544 } else if (role == "assistant") {
545 ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
546 }
547 }
548 if (add_ass) {
549 ss << LU8("<|Assistant|>");
550 }
551 } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
552 // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
553 // EXAONE-3.0-7.8B-Instruct
554 for (auto message : chat) {
555 std::string role(message->role);
556 if (role == "system") {
557 ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
558 } else if (role == "user") {
559 ss << "[|user|]" << trim(message->content) << "\n";
560 } else if (role == "assistant") {
561 ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
562 }
563 }
564 if (add_ass) {
565 ss << "[|assistant|]";
566 }
567 } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) {
568 for (auto message : chat) {
569 std::string role(message->role);
570 if (role == "system") {
571 ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
572 } else if (role == "user") {
573 ss << "[|user|]" << trim(message->content) << "\n";
574 } else if (role == "assistant") {
575 ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
576 } else if (role == "tool") {
577 ss << "[|tool|]" << trim(message->content) << "[|endofturn|]\n";
578 }
579 }
580 if (add_ass) {
581 ss << "[|assistant|]";
582 }
583 } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_MOE) {
584 for (auto message : chat) {
585 std::string role(message->role);
586 if (role == "system") {
587 ss << "<|system|>\n" << trim(message->content) << "<|endofturn|>\n";
588 } else if (role == "user") {
589 ss << "<|user|>\n" << trim(message->content) << "<|endofturn|>\n";
590 } else if (role == "assistant") {
591 ss << "<|assistant|>\n" << trim(message->content) << "<|endofturn|>\n";
592 } else if (role == "tool") {
593 ss << "<|tool|>\n" << trim(message->content) << "<|endofturn|>\n";
594 }
595 }
596 if (add_ass) {
597 ss << "<|assistant|>\n";
598 }
599 } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
600 // this template requires the model to have "\n\n" as EOT token
601 for (size_t i = 0; i < chat.size(); i++) {
602 std::string role(chat[i]->role);
603 if (role == "system") {
604 ss << "System: " << trim(chat[i]->content) << "\n\n";
605 } else if (role == "user") {
606 ss << "User: " << trim(chat[i]->content) << "\n\n";
607 if (i == chat.size() - 1) {
608 ss << "Assistant:";
609 }
610 } else if (role == "assistant") {
611 ss << "Assistant: " << trim(chat[i]->content) << "\n\n";
612 }
613 }
614 } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
615 // IBM Granite template
616 for (const auto & message : chat) {
617 std::string role(message->role);
618 ss << "<|start_of_role|>" << role << "<|end_of_role|>";
619 if (role == "assistant_tool_call") {
620 ss << "<|tool_call|>";
621 }
622 ss << message->content << "<|end_of_text|>\n";
623 }
624 if (add_ass) {
625 ss << "<|start_of_role|>assistant<|end_of_role|>";
626 }
627 } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
628 // GigaChat template
629 bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
630
631 // Handle system message if present
632 if (has_system) {
633 ss << "<s>" << chat[0]->content << "<|message_sep|>";
634 } else {
635 ss << "<s>";
636 }
637
638 // Process remaining messages
639 for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
640 std::string role(chat[i]->role);
641 if (role == "user") {
642 ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
643 << "available functions<|role_sep|>[]<|message_sep|>";
644 } else if (role == "assistant") {
645 ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
646 }
647 }
648
649 // Add generation prompt if needed
650 if (add_ass) {
651 ss << "assistant<|role_sep|>";
652 }
653 } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
654 // Megrez template
655 for (auto message : chat) {
656 std::string role(message->role);
657 ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
658 }
659
660 if (add_ass) {
661 ss << "<|role_start|>assistant<|role_end|>";
662 }
663 } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
664 // Yandex template ("\n\n" is defined as EOT token)
665
666 for (size_t i = 0; i < chat.size(); i++) {
667 std::string role(chat[i]->role);
668 if (role == "user") {
669 ss << " Пользователь: " << chat[i]->content << "\n\n";
670 } else if (role == "assistant") {
671 ss << " Ассистент: " << chat[i]->content << "\n\n";
672 }
673 }
674
675 // Add generation prompt if needed
676 if (add_ass) {
677 ss << " Ассистент:[SEP]";
678 }
679 } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
680 // Bailing (Ling/Ring) template
681 for (auto message : chat) {
682 std::string role(message->role);
683
684 if (role == "user") {
685 role = "HUMAN";
686 } else {
687 std::transform(role.begin(), role.end(), role.begin(), ::toupper);
688 }
689
690 ss << "<role>" << role << "</role>" << message->content;
691 }
692
693 if (add_ass) {
694 ss << "<role>ASSISTANT</role>";
695
696 if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
697 ss << "<think>";
698 }
699 }
700 } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) {
701 // Bailing2 (Ling 2.0) template
702 bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
703
704 if (!has_system) {
705 ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>";
706 }
707
708 for (auto message : chat) {
709 std::string role(message->role);
710
711 if (role == "user") {
712 role = "HUMAN";
713 } else {
714 std::transform(role.begin(), role.end(), role.begin(), ::toupper);
715 }
716
717 ss << "<role>" << role << "</role>" << message->content << "<|role_end|>";
718 }
719
720 if (add_ass) {
721 ss << "<role>ASSISTANT</role>";
722 }
723 } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
724 // Llama 4
725 for (auto message : chat) {
726 std::string role(message->role);
727 ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>";
728 }
729 if (add_ass) {
730 ss << "<|header_start|>assistant<|header_end|>\n\n";
731 }
732 } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) {
733 // SmolVLM
734 ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml
735 for (auto message : chat) {
736 std::string role(message->role);
737 if (role == "system") {
738 ss << message->content << "\n\n";
739 } else if (role == "user") {
740 ss << "User: " << message->content << "<end_of_utterance>\n";
741 } else {
742 ss << "Assistant: " << message->content << "<end_of_utterance>\n";
743 }
744 }
745 if (add_ass) {
746 ss << "Assistant:";
747 }
748 } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) {
749 // dots.llm1.inst (DOTS1)
750 for (auto message : chat) {
751 std::string role(message->role);
752 if (role == "system") {
753 ss << "<|system|>" << message->content << "<|endofsystem|>";
754 } else if (role == "user") {
755 ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>";
756 } else {
757 ss << "<|response|>" << message->content << "<|endofresponse|>";
758 }
759 }
760 if (add_ass) {
761 ss << "<|response|>";
762 }
763 } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) {
764 // tencent/Hunyuan-A13B-Instruct
765 for (auto message : chat) {
766 std::string role(message->role);
767 if (role == "system") {
768 ss << "<|startoftext|>" << message->content << "<|extra_4|>";
769 } else if (role == "assistant") {
770 ss << message->content << "<|eos|>";
771 } else {
772 ss << "<|startoftext|>" << message->content << "<|extra_0|>";
773 }
774 }
775 } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) {
776 // OpenAI MoE (based on Harmony chat template)
777 for (auto message : chat) {
778 std::string role(message->role);
779 ss << "<|start|>" << role << "<|message|>" << message->content;
780 ss << (role == "assistant" ? "<|return|>" : "<|end|>");
781 }
782 if (add_ass) {
783 ss << "<|start|>assistant";
784 }
785 } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) {
786 // tencent/Hunyuan-4B-Instruct
787 for (size_t i = 0; i < chat.size(); i++) {
788 std::string role(chat[i]->role);
789 if (i == 0) {
790 if (role == "system") {
791 ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>";
792 }
793 }
794
795 if (role == "assistant") {
796 ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>";
797 } else if (role == "user") {
798 ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>";
799 }
800 }
801 } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
802 // moonshotai/Kimi-K2-Instruct
803 for (auto message : chat) {
804 std::string role(message->role);
805 if (role == "system") {
806 ss << "<|im_system|>system<|im_middle|>";
807 } else if (role == "user") {
808 ss << "<|im_user|>user<|im_middle|>";
809 } else if (role == "assistant") {
810 ss << "<|im_assistant|>assistant<|im_middle|>";
811 } else if (role == "tool") {
812 ss << "<|im_system|>tool<|im_middle|>";
813 }
814
815 ss << message->content << "<|im_end|>";
816 }
817 if (add_ass) {
818 ss << "<|im_assistant|>assistant<|im_middle|>";
819 }
820 } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) {
821 for (auto message: chat) {
822 std::string role(message->role);
823 ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(message->content) : message->content) << "<seed:eos>";
824 }
825 if (add_ass) {
826 ss << "<seed:bos>assistant\n";
827 }
828 } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) {
829 for (auto message : chat) {
830 std::string role(message->role);
831 if (role == "system") {
832 ss << "System: " << trim(message->content) << "<|separator|>\n\n";
833 } else if (role == "user") {
834 ss << "Human: " << trim(message->content) << "<|separator|>\n\n";
835 } else if (role == "assistant") {
836 ss << "Assistant: " << message->content << "<|separator|>\n\n";
837 }
838 }
839 if (add_ass) {
840 ss << "Assistant:";
841 }
842 }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) {
843 // [unused9]系统:xxx[unused10]
844 // [unused9]用户:xxx[unused10]
845 // [unused9]助手:xxx[unused10]
846 // ...
847 for (size_t i = 0; i < chat.size(); ++i) {
848 const auto & msg = chat[i];
849 const std::string & role = msg->role;
850 const std::string & content = msg->content;
851
852 if (i == 0 && role != "system") {
853 ss << "[unused9]系统:[unused10]";
854 }
855
856 if (role == "system") {
857 ss << "[unused9]系统:" << content << "[unused10]";
858 } else if (role == "user") {
859 ss << "[unused9]用户:" << content << "[unused10]";
860 } else if (role == "assistant") {
861 ss << "[unused9]助手:" << content << "[unused10]";
862 } else if (role == "tool") {
863 ss << "[unused9]工具:" << content << "[unused10]";
864 } else if (role == "function") {
865 ss << "[unused9]方法:" << content << "[unused10]";
866 }
867 }
868 if (add_ass) {
869 ss << "[unused9]助手:";
870 }
871 } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
872 for (auto message : chat) {
873 std::string role(message->role);
874 ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
875 }
876 if (add_ass) {
877 ss << "<|begin|>assistant";
878 }
879 } else {
880 // template not supported
881 return -1;
882 }
883 dest = ss.str();
884 return dest.size();
885}
886
887// public interface
888
889int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
890 auto it = LLM_CHAT_TEMPLATES.begin();
891 for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
892 output[i] = it->first.c_str();
893 std::advance(it, 1);
894 }
895 return (int32_t) LLM_CHAT_TEMPLATES.size();
896}