llmnpc - llama.cpp/src/llama-chat.cpp

Path: llmnpc / llama.cpp / src / llama-chat.cpp (raw)
  1#include "llama-chat.h"
  2
  3#include "llama.h"
  4
  5#include <map>
  6#include <sstream>
  7#include <algorithm>
  8
  9#if __cplusplus >= 202000L
 10    #define LU8(x) (const char*)(u8##x)
 11#else
 12    #define LU8(x) u8##x
 13#endif
 14
 15// trim whitespace from the beginning and end of a string
 16static std::string trim(const std::string & str) {
 17    size_t start = 0;
 18    size_t end = str.size();
 19    while (start < end && isspace(static_cast<unsigned char>(str[start]))) {
 20        start += 1;
 21    }
 22    while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) {
 23        end -= 1;
 24    }
 25    return str.substr(start, end - start);
 26}
 27
 28static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
 29    { "chatml",            LLM_CHAT_TEMPLATE_CHATML            },
 30    { "llama2",            LLM_CHAT_TEMPLATE_LLAMA_2           },
 31    { "llama2-sys",        LLM_CHAT_TEMPLATE_LLAMA_2_SYS       },
 32    { "llama2-sys-bos",    LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS   },
 33    { "llama2-sys-strip",  LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
 34    { "mistral-v1",        LLM_CHAT_TEMPLATE_MISTRAL_V1        },
 35    { "mistral-v3",        LLM_CHAT_TEMPLATE_MISTRAL_V3        },
 36    { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
 37    { "mistral-v7",        LLM_CHAT_TEMPLATE_MISTRAL_V7        },
 38    { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
 39    { "phi3",              LLM_CHAT_TEMPLATE_PHI_3             },
 40    { "phi4",              LLM_CHAT_TEMPLATE_PHI_4             },
 41    { "falcon3",           LLM_CHAT_TEMPLATE_FALCON_3          },
 42    { "zephyr",            LLM_CHAT_TEMPLATE_ZEPHYR            },
 43    { "monarch",           LLM_CHAT_TEMPLATE_MONARCH           },
 44    { "gemma",             LLM_CHAT_TEMPLATE_GEMMA             },
 45    { "orion",             LLM_CHAT_TEMPLATE_ORION             },
 46    { "openchat",          LLM_CHAT_TEMPLATE_OPENCHAT          },
 47    { "vicuna",            LLM_CHAT_TEMPLATE_VICUNA            },
 48    { "vicuna-orca",       LLM_CHAT_TEMPLATE_VICUNA_ORCA       },
 49    { "deepseek",          LLM_CHAT_TEMPLATE_DEEPSEEK          },
 50    { "deepseek2",         LLM_CHAT_TEMPLATE_DEEPSEEK_2        },
 51    { "deepseek3",         LLM_CHAT_TEMPLATE_DEEPSEEK_3        },
 52    { "command-r",         LLM_CHAT_TEMPLATE_COMMAND_R         },
 53    { "llama3",            LLM_CHAT_TEMPLATE_LLAMA_3           },
 54    { "chatglm3",          LLM_CHAT_TEMPLATE_CHATGLM_3         },
 55    { "chatglm4",          LLM_CHAT_TEMPLATE_CHATGLM_4         },
 56    { "glmedge",           LLM_CHAT_TEMPLATE_GLMEDGE           },
 57    { "minicpm",           LLM_CHAT_TEMPLATE_MINICPM           },
 58    { "exaone3",           LLM_CHAT_TEMPLATE_EXAONE_3          },
 59    { "exaone4",           LLM_CHAT_TEMPLATE_EXAONE_4          },
 60    { "exaone-moe",        LLM_CHAT_TEMPLATE_EXAONE_MOE        },
 61    { "rwkv-world",        LLM_CHAT_TEMPLATE_RWKV_WORLD        },
 62    { "granite",           LLM_CHAT_TEMPLATE_GRANITE           },
 63    { "gigachat",          LLM_CHAT_TEMPLATE_GIGACHAT          },
 64    { "megrez",            LLM_CHAT_TEMPLATE_MEGREZ            },
 65    { "yandex",            LLM_CHAT_TEMPLATE_YANDEX            },
 66    { "bailing",           LLM_CHAT_TEMPLATE_BAILING           },
 67    { "bailing-think",     LLM_CHAT_TEMPLATE_BAILING_THINK     },
 68    { "bailing2",          LLM_CHAT_TEMPLATE_BAILING2          },
 69    { "llama4",            LLM_CHAT_TEMPLATE_LLAMA4            },
 70    { "smolvlm",           LLM_CHAT_TEMPLATE_SMOLVLM           },
 71    { "hunyuan-moe",       LLM_CHAT_TEMPLATE_HUNYUAN_MOE       },
 72    { "gpt-oss",           LLM_CHAT_TEMPLATE_OPENAI_MOE        },
 73    { "hunyuan-dense",     LLM_CHAT_TEMPLATE_HUNYUAN_DENSE     },
 74    { "kimi-k2",           LLM_CHAT_TEMPLATE_KIMI_K2           },
 75    { "seed_oss",          LLM_CHAT_TEMPLATE_SEED_OSS          },
 76    { "grok-2",            LLM_CHAT_TEMPLATE_GROK_2            },
 77    { "pangu-embedded",    LLM_CHAT_TEMPLATE_PANGU_EMBED       },
 78    { "solar-open",        LLM_CHAT_TEMPLATE_SOLAR_OPEN        },
 79};
 80
 81llm_chat_template llm_chat_template_from_str(const std::string & name) {
 82    return LLM_CHAT_TEMPLATES.at(name);
 83}
 84
 85llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
 86    try {
 87        return llm_chat_template_from_str(tmpl);
 88    } catch (const std::out_of_range &) {
 89        // ignore
 90    }
 91
 92    auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
 93        return tmpl.find(haystack) != std::string::npos;
 94    };
 95    if (tmpl_contains("<|im_start|>")) {
 96        return tmpl_contains("<|im_sep|>")
 97            ? LLM_CHAT_TEMPLATE_PHI_4
 98            : tmpl_contains("<end_of_utterance>")
 99                ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml
100                : LLM_CHAT_TEMPLATE_CHATML;
101    } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
102        if (tmpl_contains("[SYSTEM_PROMPT]")) {
103            return LLM_CHAT_TEMPLATE_MISTRAL_V7;
104        } else if (
105            // catches official 'v1' template
106            tmpl_contains("' [INST] ' + system_message")
107            // catches official 'v3' and 'v3-tekken' templates
108            || tmpl_contains("[AVAILABLE_TOOLS]")
109        ) {
110            // Official mistral 'v1', 'v3' and 'v3-tekken' templates
111            // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
112            // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
113            if (tmpl_contains(" [INST]")) {
114                return LLM_CHAT_TEMPLATE_MISTRAL_V1;
115            } else if (tmpl_contains("\"[INST]\"")) {
116                return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
117            }
118            return LLM_CHAT_TEMPLATE_MISTRAL_V3;
119        } else {
120            // llama2 template and its variants
121            // [variant] support system message
122            // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
123            bool support_system_message = tmpl_contains("<<SYS>>");
124            bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
125            bool strip_message = tmpl_contains("content.strip()");
126            if (strip_message) {
127                return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
128            } else if (add_bos_inside_history) {
129                return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
130            } else if (support_system_message) {
131                return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
132            } else {
133                return LLM_CHAT_TEMPLATE_LLAMA_2;
134            }
135        }
136    } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
137        return LLM_CHAT_TEMPLATE_PHI_3;
138    } else if (tmpl_contains("[gMASK]<sop>")) {
139        return LLM_CHAT_TEMPLATE_CHATGLM_4;
140    } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
141        if (tmpl_contains("<|tool_declare|>")) {
142            return LLM_CHAT_TEMPLATE_EXAONE_MOE;
143        }
144        return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
145    } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) {
146        return LLM_CHAT_TEMPLATE_GLMEDGE;
147    } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
148        return LLM_CHAT_TEMPLATE_ZEPHYR;
149    } else if (tmpl_contains("bos_token + message['role']")) {
150        return LLM_CHAT_TEMPLATE_MONARCH;
151    } else if (tmpl_contains("<start_of_turn>")) {
152        return LLM_CHAT_TEMPLATE_GEMMA;
153    } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
154        // OrionStarAI/Orion-14B-Chat
155        return LLM_CHAT_TEMPLATE_ORION;
156    } else if (tmpl_contains("GPT4 Correct ")) {
157        // openchat/openchat-3.5-0106
158        return LLM_CHAT_TEMPLATE_OPENCHAT;
159    } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
160        // eachadea/vicuna-13b-1.1 (and Orca variant)
161        if (tmpl_contains("SYSTEM: ")) {
162            return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
163        }
164        return LLM_CHAT_TEMPLATE_VICUNA;
165    } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
166        // deepseek-ai/deepseek-coder-33b-instruct
167        return LLM_CHAT_TEMPLATE_DEEPSEEK;
168    } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
169        // CohereForAI/c4ai-command-r-plus
170        return LLM_CHAT_TEMPLATE_COMMAND_R;
171    } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
172        return LLM_CHAT_TEMPLATE_LLAMA_3;
173    } else if (tmpl_contains("[gMASK]sop")) {
174        // chatglm3-6b
175        return LLM_CHAT_TEMPLATE_CHATGLM_3;
176    } else if (tmpl_contains(LU8("<用户>"))) {
177        // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
178        return LLM_CHAT_TEMPLATE_MINICPM;
179    } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
180        return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
181    } else if (tmpl_contains(LU8("<｜Assistant｜>")) && tmpl_contains(LU8("<｜User｜>")) && tmpl_contains(LU8("<｜end▁of▁sentence｜>"))) {
182        return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
183    } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
184        if (tmpl_contains("[|tool|]")) {
185            return LLM_CHAT_TEMPLATE_EXAONE_4;
186        }
187        // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
188        // EXAONE-3.0-7.8B-Instruct
189        return LLM_CHAT_TEMPLATE_EXAONE_3;
190    } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) {
191        return LLM_CHAT_TEMPLATE_RWKV_WORLD;
192    } else if (tmpl_contains("<|start_of_role|>")) {
193        return LLM_CHAT_TEMPLATE_GRANITE;
194    } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
195        return LLM_CHAT_TEMPLATE_GIGACHAT;
196    } else if (tmpl_contains("<|role_start|>")) {
197        return LLM_CHAT_TEMPLATE_MEGREZ;
198    } else if (tmpl_contains(" Ассистент:")) {
199        return LLM_CHAT_TEMPLATE_YANDEX;
200    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
201        return LLM_CHAT_TEMPLATE_BAILING;
202    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) {
203        return LLM_CHAT_TEMPLATE_BAILING_THINK;
204    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) {
205        return LLM_CHAT_TEMPLATE_BAILING2;
206    } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
207        return LLM_CHAT_TEMPLATE_LLAMA4;
208    } else if (tmpl_contains("<|endofuserprompt|>")) {
209        return LLM_CHAT_TEMPLATE_DOTS1;
210    } else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) {
211        return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
212    } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) {
213        return LLM_CHAT_TEMPLATE_OPENAI_MOE;
214    } else if (tmpl_contains("<｜hy_Assistant｜>") && tmpl_contains("<｜hy_place▁holder▁no▁3｜>")) {
215        return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
216    } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
217        return LLM_CHAT_TEMPLATE_KIMI_K2;
218    } else if (tmpl_contains("<seed:bos>")) {
219        return LLM_CHAT_TEMPLATE_SEED_OSS;
220    } else if (tmpl_contains("'Assistant: '  + message['content'] + '<|separator|>")) {
221        return LLM_CHAT_TEMPLATE_GROK_2;
222    } else if (tmpl_contains(LU8("[unused9]系统：[unused10]"))) {
223        return LLM_CHAT_TEMPLATE_PANGU_EMBED;
224    } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
225        return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
226    }
227    return LLM_CHAT_TEMPLATE_UNKNOWN;
228}
229
230// Simple version of "llama_apply_chat_template" that only works with strings
231// This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
232int32_t llm_chat_apply_template(
233    llm_chat_template tmpl,
234    const std::vector<const llama_chat_message *> & chat,
235    std::string & dest, bool add_ass) {
236    // Taken from the research: https://github.com/ggml-org/llama.cpp/issues/5527
237    std::stringstream ss;
238    if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
239        // chatml template
240        for (auto message : chat) {
241            ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
242        }
243        if (add_ass) {
244            ss << "<|im_start|>assistant\n";
245        }
246    } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
247        // Official mistral 'v7' template
248        // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
249        //      https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
250        const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
251        for (auto message : chat) {
252            std::string role(message->role);
253            std::string content(message->content);
254            if (role == "system") {
255                ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
256            } else if (role == "user") {
257                ss << "[INST]" << trailing_space << content << "[/INST]";
258            } else {
259                ss << trailing_space << content << "</s>";
260            }
261        }
262    } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
263            || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
264            || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
265        // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
266        // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
267        std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
268        std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
269        bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
270        bool is_inside_turn = false;
271        for (auto message : chat) {
272            if (!is_inside_turn) {
273                ss << leading_space << "[INST]" << trailing_space;
274                is_inside_turn = true;
275            }
276            std::string role(message->role);
277            std::string content(message->content);
278            if (role == "system") {
279                ss << content << "\n\n";
280            } else if (role == "user") {
281                ss << content << leading_space << "[/INST]";
282            } else {
283                ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
284                is_inside_turn = false;
285            }
286        }
287    } else if (
288            tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
289            || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
290            || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
291            || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
292        // llama2 template and its variants
293        // [variant] support system message
294        // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
295        bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
296        // [variant] add BOS inside history
297        bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
298        // [variant] trim spaces from the input message
299        bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
300        // construct the prompt
301        bool is_inside_turn = true; // skip BOS at the beginning
302        ss << "[INST] ";
303        for (auto message : chat) {
304            std::string content = strip_message ? trim(message->content) : message->content;
305            std::string role(message->role);
306            if (!is_inside_turn) {
307                is_inside_turn = true;
308                ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
309            }
310            if (role == "system") {
311                if (support_system_message) {
312                    ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
313                } else {
314                    // if the model does not support system message, we still include it in the first message, but without <<SYS>>
315                    ss << content << "\n";
316                }
317            } else if (role == "user") {
318                ss << content << " [/INST]";
319            } else {
320                ss << content << "</s>";
321                is_inside_turn = false;
322            }
323        }
324    } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
325        // Phi 3
326        for (auto message : chat) {
327            std::string role(message->role);
328            ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
329        }
330        if (add_ass) {
331            ss << "<|assistant|>\n";
332        }
333    } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
334        // chatml template
335        for (auto message : chat) {
336            ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
337        }
338        if (add_ass) {
339            ss << "<|im_start|>assistant<|im_sep|>";
340        }
341    } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
342        // Falcon 3
343        for (auto message : chat) {
344            std::string role(message->role);
345            ss << "<|" << role << "|>\n" << message->content << "\n";
346        }
347        if (add_ass) {
348            ss << "<|assistant|>\n";
349        }
350    } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
351        // zephyr template
352        for (auto message : chat) {
353            ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
354        }
355        if (add_ass) {
356            ss << "<|assistant|>\n";
357        }
358    } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
359        // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
360        for (auto message : chat) {
361            std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
362            ss << bos << message->role << "\n" << message->content << "</s>\n";
363        }
364        if (add_ass) {
365            ss << "<s>assistant\n";
366        }
367    } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
368        // google/gemma-7b-it
369        std::string system_prompt = "";
370        for (auto message : chat) {
371            std::string role(message->role);
372            if (role == "system") {
373                // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
374                system_prompt += trim(message->content);
375                continue;
376            }
377            // in gemma, "assistant" is "model"
378            role = role == "assistant" ? "model" : message->role;
379            ss << "<start_of_turn>" << role << "\n";
380            if (!system_prompt.empty() && role != "model") {
381                ss << system_prompt << "\n\n";
382                system_prompt = "";
383            }
384            ss << trim(message->content) << "<end_of_turn>\n";
385        }
386        if (add_ass) {
387            ss << "<start_of_turn>model\n";
388        }
389    } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
390        // OrionStarAI/Orion-14B-Chat
391        std::string system_prompt = "";
392        for (auto message : chat) {
393            std::string role(message->role);
394            if (role == "system") {
395                // there is no system message support, we will merge it with user prompt
396                system_prompt += message->content;
397                continue;
398            } else if (role == "user") {
399                ss << "Human: ";
400                if (!system_prompt.empty()) {
401                    ss << system_prompt << "\n\n";
402                    system_prompt = "";
403                }
404                ss << message->content << "\n\nAssistant: </s>";
405            } else {
406                ss << message->content << "</s>";
407            }
408        }
409    } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
410        // openchat/openchat-3.5-0106,
411        for (auto message : chat) {
412            std::string role(message->role);
413            if (role == "system") {
414                ss << message->content << "<|end_of_turn|>";
415            } else {
416                role[0] = toupper(role[0]);
417                ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
418            }
419        }
420        if (add_ass) {
421            ss << "GPT4 Correct Assistant:";
422        }
423    } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
424        // eachadea/vicuna-13b-1.1 (and Orca variant)
425        for (auto message : chat) {
426            std::string role(message->role);
427            if (role == "system") {
428                // Orca-Vicuna variant uses a system prefix
429                if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
430                    ss << "SYSTEM: " << message->content << "\n";
431                } else {
432                    ss << message->content << "\n\n";
433                }
434            } else if (role == "user") {
435                ss << "USER: " << message->content << "\n";
436            } else if (role == "assistant") {
437                ss << "ASSISTANT: " << message->content << "</s>\n";
438            }
439        }
440        if (add_ass) {
441            ss << "ASSISTANT:";
442        }
443    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
444        // deepseek-ai/deepseek-coder-33b-instruct
445        for (auto message : chat) {
446            std::string role(message->role);
447            if (role == "system") {
448                ss << message->content;
449            } else if (role == "user") {
450                ss << "### Instruction:\n" << message->content << "\n";
451            } else if (role == "assistant") {
452                ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
453            }
454        }
455        if (add_ass) {
456            ss << "### Response:\n";
457        }
458    } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
459        // CohereForAI/c4ai-command-r-plus
460        for (auto message : chat) {
461            std::string role(message->role);
462            if (role == "system") {
463                ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
464            } else if (role == "user") {
465                ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
466            } else if (role == "assistant") {
467                ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
468            }
469        }
470        if (add_ass) {
471            ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
472        }
473    } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
474        // Llama 3
475        for (auto message : chat) {
476            std::string role(message->role);
477            ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
478        }
479        if (add_ass) {
480            ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
481        }
482    } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
483        // chatglm3-6b
484        ss << "[gMASK]" << "sop";
485        for (auto message : chat) {
486            std::string role(message->role);
487            ss << "<|" << role << "|>" << "\n " << message->content;
488        }
489        if (add_ass) {
490            ss << "<|assistant|>";
491        }
492    } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
493        ss << "[gMASK]" << "<sop>";
494        for (auto message : chat) {
495            std::string role(message->role);
496            ss << "<|" << role << "|>" << "\n" << message->content;
497        }
498        if (add_ass) {
499            ss << "<|assistant|>\n";
500        }
501    } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
502        for (auto message : chat) {
503            std::string role(message->role);
504            ss << "<|" << role << "|>" << "\n" << message->content;
505        }
506        if (add_ass) {
507            ss << "<|assistant|>";
508        }
509    } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
510        // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
511        for (auto message : chat) {
512            std::string role(message->role);
513            if (role == "user") {
514                ss << LU8("<用户>");
515                ss << trim(message->content);
516                ss << "<AI>";
517            } else {
518                ss << trim(message->content);
519            }
520        }
521    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
522        // DeepSeek-V2
523        for (auto message : chat) {
524            std::string role(message->role);
525            if (role == "system") {
526                ss << message->content << "\n\n";
527            } else if (role == "user") {
528                ss << "User: " << message->content << "\n\n";
529            } else if (role == "assistant") {
530                ss << "Assistant: " << message->content << LU8("<｜end▁of▁sentence｜>");
531            }
532        }
533        if (add_ass) {
534            ss << "Assistant:";
535        }
536    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
537        // DeepSeek-V3
538        for (auto message : chat) {
539            std::string role(message->role);
540            if (role == "system") {
541                ss << message->content << "\n\n";
542            } else if (role == "user") {
543                ss << LU8("<｜User｜>") << message->content;
544            } else if (role == "assistant") {
545                ss << LU8("<｜Assistant｜>") << message->content << LU8("<｜end▁of▁sentence｜>");
546            }
547        }
548        if (add_ass) {
549            ss << LU8("<｜Assistant｜>");
550        }
551    } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
552        // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
553        // EXAONE-3.0-7.8B-Instruct
554        for (auto message : chat) {
555            std::string role(message->role);
556            if (role == "system") {
557                ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
558            } else if (role == "user") {
559                ss << "[|user|]" << trim(message->content) << "\n";
560            } else if (role == "assistant") {
561                ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
562            }
563        }
564        if (add_ass) {
565            ss << "[|assistant|]";
566        }
567    } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) {
568        for (auto message : chat) {
569            std::string role(message->role);
570            if (role == "system") {
571                ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
572            } else if (role == "user") {
573                ss << "[|user|]" << trim(message->content) << "\n";
574            } else if (role == "assistant") {
575                ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
576            } else if (role == "tool") {
577                ss << "[|tool|]" << trim(message->content) << "[|endofturn|]\n";
578            }
579        }
580        if (add_ass) {
581            ss << "[|assistant|]";
582        }
583    } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_MOE) {
584        for (auto message : chat) {
585            std::string role(message->role);
586            if (role == "system") {
587                ss << "<|system|>\n" << trim(message->content) << "<|endofturn|>\n";
588            } else if (role == "user") {
589                ss << "<|user|>\n" << trim(message->content) << "<|endofturn|>\n";
590            } else if (role == "assistant") {
591                ss << "<|assistant|>\n" << trim(message->content) << "<|endofturn|>\n";
592            } else if (role == "tool") {
593                ss << "<|tool|>\n" << trim(message->content) << "<|endofturn|>\n";
594            }
595        }
596        if (add_ass) {
597            ss << "<|assistant|>\n";
598        }
599    } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
600        // this template requires the model to have "\n\n" as EOT token
601        for (size_t i = 0; i < chat.size(); i++) {
602            std::string role(chat[i]->role);
603            if (role == "system") {
604                ss << "System: " << trim(chat[i]->content) << "\n\n";
605            } else if (role == "user") {
606                ss << "User: " << trim(chat[i]->content) << "\n\n";
607                if (i == chat.size() - 1) {
608                    ss << "Assistant:";
609                }
610            } else if (role == "assistant") {
611                ss << "Assistant: " << trim(chat[i]->content) << "\n\n";
612            }
613        }
614    } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
615        // IBM Granite template
616        for (const auto & message : chat) {
617            std::string role(message->role);
618            ss << "<|start_of_role|>" << role << "<|end_of_role|>";
619            if (role == "assistant_tool_call") {
620                ss << "<|tool_call|>";
621            }
622            ss << message->content << "<|end_of_text|>\n";
623        }
624        if (add_ass) {
625            ss << "<|start_of_role|>assistant<|end_of_role|>";
626        }
627    } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
628        // GigaChat template
629        bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
630
631        // Handle system message if present
632        if (has_system) {
633            ss << "<s>" << chat[0]->content << "<|message_sep|>";
634        } else {
635            ss << "<s>";
636        }
637
638        // Process remaining messages
639        for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
640            std::string role(chat[i]->role);
641            if (role == "user") {
642                ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
643                << "available functions<|role_sep|>[]<|message_sep|>";
644            } else if (role == "assistant") {
645                ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
646            }
647        }
648
649        // Add generation prompt if needed
650        if (add_ass) {
651            ss << "assistant<|role_sep|>";
652        }
653    }  else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
654        // Megrez template
655        for (auto message : chat) {
656            std::string role(message->role);
657            ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
658        }
659
660        if (add_ass) {
661            ss << "<|role_start|>assistant<|role_end|>";
662        }
663    } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
664        // Yandex template ("\n\n" is defined as EOT token)
665
666        for (size_t i = 0; i < chat.size(); i++) {
667            std::string role(chat[i]->role);
668            if (role == "user") {
669                ss << " Пользователь: " << chat[i]->content << "\n\n";
670            } else if (role == "assistant") {
671                ss << " Ассистент: " << chat[i]->content << "\n\n";
672            }
673        }
674
675        // Add generation prompt if needed
676        if (add_ass) {
677            ss << " Ассистент:[SEP]";
678        }
679    } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
680        // Bailing (Ling/Ring) template
681        for (auto message : chat) {
682            std::string role(message->role);
683
684            if (role == "user") {
685                role = "HUMAN";
686            } else {
687                std::transform(role.begin(), role.end(), role.begin(), ::toupper);
688            }
689
690            ss << "<role>" << role << "</role>" << message->content;
691        }
692
693        if (add_ass) {
694            ss << "<role>ASSISTANT</role>";
695
696            if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
697                ss << "<think>";
698            }
699        }
700    } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) {
701        // Bailing2 (Ling 2.0) template
702        bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
703
704        if (!has_system) {
705            ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>";
706        }
707
708        for (auto message : chat) {
709            std::string role(message->role);
710
711            if (role == "user") {
712                role = "HUMAN";
713            } else {
714                std::transform(role.begin(), role.end(), role.begin(), ::toupper);
715            }
716
717            ss << "<role>" << role << "</role>" << message->content << "<|role_end|>";
718        }
719
720        if (add_ass) {
721            ss << "<role>ASSISTANT</role>";
722        }
723    } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
724        // Llama 4
725        for (auto message : chat) {
726            std::string role(message->role);
727            ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>";
728        }
729        if (add_ass) {
730            ss << "<|header_start|>assistant<|header_end|>\n\n";
731        }
732    } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) {
733        // SmolVLM
734        ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml
735        for (auto message : chat) {
736            std::string role(message->role);
737            if (role == "system") {
738                ss << message->content << "\n\n";
739            } else if (role == "user") {
740                ss << "User: " << message->content << "<end_of_utterance>\n";
741            } else {
742                ss << "Assistant: " << message->content << "<end_of_utterance>\n";
743            }
744        }
745        if (add_ass) {
746            ss << "Assistant:";
747        }
748    } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) {
749        // dots.llm1.inst (DOTS1)
750        for (auto message : chat) {
751            std::string role(message->role);
752            if (role == "system") {
753                ss << "<|system|>" << message->content << "<|endofsystem|>";
754            } else if (role == "user") {
755                ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>";
756            } else {
757                ss << "<|response|>" << message->content << "<|endofresponse|>";
758            }
759        }
760        if (add_ass) {
761            ss << "<|response|>";
762        }
763    } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) {
764        // tencent/Hunyuan-A13B-Instruct
765        for (auto message : chat) {
766            std::string role(message->role);
767            if (role == "system") {
768                ss << "<|startoftext|>" << message->content << "<|extra_4|>";
769            } else if (role == "assistant") {
770                ss << message->content << "<|eos|>";
771            } else {
772                ss << "<|startoftext|>" << message->content << "<|extra_0|>";
773            }
774        }
775    } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) {
776        // OpenAI MoE (based on Harmony chat template)
777        for (auto message : chat) {
778            std::string role(message->role);
779            ss << "<|start|>" << role << "<|message|>" << message->content;
780            ss << (role == "assistant" ? "<|return|>" : "<|end|>");
781        }
782        if (add_ass) {
783            ss << "<|start|>assistant";
784        }
785    } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) {
786        // tencent/Hunyuan-4B-Instruct
787        for (size_t i = 0; i < chat.size(); i++) {
788            std::string role(chat[i]->role);
789            if (i == 0) {
790                if (role == "system") {
791                    ss << chat[i]->content << "<｜hy_place▁holder▁no▁3｜>";
792                }
793            }
794
795            if (role == "assistant") {
796                ss << "<｜hy_Assistant｜>" << chat[i]->content << "<｜hy_place▁holder▁no▁2｜>";
797            } else if (role == "user") {
798                ss << "<｜hy_User｜>" << chat[i]->content << "<｜hy_Assistant｜>";
799            }
800        }
801    } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
802        // moonshotai/Kimi-K2-Instruct
803        for (auto message : chat) {
804            std::string role(message->role);
805            if (role == "system") {
806                ss << "<|im_system|>system<|im_middle|>";
807            } else if (role == "user") {
808                ss << "<|im_user|>user<|im_middle|>";
809            } else if (role == "assistant") {
810                ss << "<|im_assistant|>assistant<|im_middle|>";
811            } else if (role == "tool") {
812                ss << "<|im_system|>tool<|im_middle|>";
813            }
814
815            ss << message->content << "<|im_end|>";
816        }
817        if (add_ass) {
818            ss << "<|im_assistant|>assistant<|im_middle|>";
819        }
820    } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) {
821        for (auto message: chat) {
822            std::string role(message->role);
823            ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(message->content) : message->content) << "<seed:eos>";
824        }
825        if (add_ass) {
826            ss << "<seed:bos>assistant\n";
827        }
828    } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) {
829        for (auto message : chat) {
830            std::string role(message->role);
831            if (role == "system") {
832                ss << "System: " << trim(message->content) << "<|separator|>\n\n";
833            } else if (role == "user") {
834                ss << "Human: " << trim(message->content) << "<|separator|>\n\n";
835            } else if (role == "assistant") {
836                ss << "Assistant: " << message->content << "<|separator|>\n\n";
837            }
838        }
839        if (add_ass) {
840            ss << "Assistant:";
841        }
842    }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) {
843        // [unused9]系统：xxx[unused10]
844        // [unused9]用户：xxx[unused10]
845        // [unused9]助手：xxx[unused10]
846        // ...
847        for (size_t i = 0; i < chat.size(); ++i) {
848            const auto & msg = chat[i];
849            const std::string & role = msg->role;
850            const std::string & content = msg->content;
851
852            if (i == 0 && role != "system") {
853                ss << "[unused9]系统：[unused10]";
854            }
855
856            if (role == "system") {
857                ss << "[unused9]系统：" << content << "[unused10]";
858            } else if (role == "user") {
859                ss << "[unused9]用户：" << content << "[unused10]";
860            } else if (role == "assistant") {
861                ss << "[unused9]助手：" << content << "[unused10]";
862            } else if (role == "tool") {
863                ss << "[unused9]工具：" << content << "[unused10]";
864            } else if (role == "function") {
865                ss << "[unused9]方法：" << content << "[unused10]";
866            }
867        }
868        if (add_ass) {
869            ss << "[unused9]助手：";
870        }
871    } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
872        for (auto message : chat) {
873            std::string role(message->role);
874            ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
875        }
876        if (add_ass) {
877            ss << "<|begin|>assistant";
878        }
879    } else {
880        // template not supported
881        return -1;
882    }
883    dest = ss.str();
884    return dest.size();
885}
886
887// public interface
888
889int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
890    auto it = LLM_CHAT_TEMPLATES.begin();
891    for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
892        output[i] = it->first.c_str();
893        std::advance(it, 1);
894    }
895    return (int32_t) LLM_CHAT_TEMPLATES.size();
896}