1#pragma once
2
3#include "ggml.h" // ggml_op
4
5#include <string>
6#include <set>
7
8//
9// gguf constants (sync with gguf.py)
10//
11
12enum llm_arch {
13 LLM_ARCH_CLIP,
14 LLM_ARCH_LLAMA,
15 LLM_ARCH_LLAMA4,
16 LLM_ARCH_DECI,
17 LLM_ARCH_FALCON,
18 LLM_ARCH_BAICHUAN,
19 LLM_ARCH_GROK,
20 LLM_ARCH_GPT2,
21 LLM_ARCH_GPTJ,
22 LLM_ARCH_GPTNEOX,
23 LLM_ARCH_MPT,
24 LLM_ARCH_STARCODER,
25 LLM_ARCH_REFACT,
26 LLM_ARCH_BERT,
27 LLM_ARCH_MODERN_BERT,
28 LLM_ARCH_NOMIC_BERT,
29 LLM_ARCH_NOMIC_BERT_MOE,
30 LLM_ARCH_NEO_BERT,
31 LLM_ARCH_JINA_BERT_V2,
32 LLM_ARCH_JINA_BERT_V3,
33 LLM_ARCH_BLOOM,
34 LLM_ARCH_STABLELM,
35 LLM_ARCH_QWEN,
36 LLM_ARCH_QWEN2,
37 LLM_ARCH_QWEN2MOE,
38 LLM_ARCH_QWEN2VL,
39 LLM_ARCH_QWEN3,
40 LLM_ARCH_QWEN3MOE,
41 LLM_ARCH_QWEN3NEXT,
42 LLM_ARCH_QWEN3VL,
43 LLM_ARCH_QWEN3VLMOE,
44 LLM_ARCH_QWEN35,
45 LLM_ARCH_QWEN35MOE,
46 LLM_ARCH_PHI2,
47 LLM_ARCH_PHI3,
48 LLM_ARCH_PHIMOE,
49 LLM_ARCH_PLAMO,
50 LLM_ARCH_PLAMO2,
51 LLM_ARCH_PLAMO3,
52 LLM_ARCH_CODESHELL,
53 LLM_ARCH_ORION,
54 LLM_ARCH_INTERNLM2,
55 LLM_ARCH_MINICPM,
56 LLM_ARCH_MINICPM3,
57 LLM_ARCH_GEMMA,
58 LLM_ARCH_GEMMA2,
59 LLM_ARCH_GEMMA3,
60 LLM_ARCH_GEMMA3N,
61 LLM_ARCH_GEMMA_EMBEDDING,
62 LLM_ARCH_STARCODER2,
63 LLM_ARCH_MAMBA,
64 LLM_ARCH_MAMBA2,
65 LLM_ARCH_JAMBA,
66 LLM_ARCH_FALCON_H1,
67 LLM_ARCH_XVERSE,
68 LLM_ARCH_COMMAND_R,
69 LLM_ARCH_COHERE2,
70 LLM_ARCH_DBRX,
71 LLM_ARCH_OLMO,
72 LLM_ARCH_OLMO2,
73 LLM_ARCH_OLMOE,
74 LLM_ARCH_OPENELM,
75 LLM_ARCH_ARCTIC,
76 LLM_ARCH_DEEPSEEK,
77 LLM_ARCH_DEEPSEEK2,
78 LLM_ARCH_CHATGLM,
79 LLM_ARCH_GLM4,
80 LLM_ARCH_GLM4_MOE,
81 LLM_ARCH_BITNET,
82 LLM_ARCH_T5,
83 LLM_ARCH_T5ENCODER,
84 LLM_ARCH_JAIS,
85 LLM_ARCH_NEMOTRON,
86 LLM_ARCH_NEMOTRON_H,
87 LLM_ARCH_NEMOTRON_H_MOE,
88 LLM_ARCH_EXAONE,
89 LLM_ARCH_EXAONE4,
90 LLM_ARCH_EXAONE_MOE,
91 LLM_ARCH_RWKV6,
92 LLM_ARCH_RWKV6QWEN2,
93 LLM_ARCH_RWKV7,
94 LLM_ARCH_ARWKV7,
95 LLM_ARCH_GRANITE,
96 LLM_ARCH_GRANITE_MOE,
97 LLM_ARCH_GRANITE_HYBRID,
98 LLM_ARCH_CHAMELEON,
99 LLM_ARCH_WAVTOKENIZER_DEC,
100 LLM_ARCH_PLM,
101 LLM_ARCH_BAILINGMOE,
102 LLM_ARCH_BAILINGMOE2,
103 LLM_ARCH_DOTS1,
104 LLM_ARCH_ARCEE,
105 LLM_ARCH_AFMOE,
106 LLM_ARCH_ERNIE4_5,
107 LLM_ARCH_ERNIE4_5_MOE,
108 LLM_ARCH_HUNYUAN_MOE,
109 LLM_ARCH_HUNYUAN_DENSE,
110 LLM_ARCH_SMOLLM3,
111 LLM_ARCH_OPENAI_MOE,
112 LLM_ARCH_LFM2,
113 LLM_ARCH_LFM2MOE,
114 LLM_ARCH_DREAM,
115 LLM_ARCH_SMALLTHINKER,
116 LLM_ARCH_LLADA,
117 LLM_ARCH_LLADA_MOE,
118 LLM_ARCH_SEED_OSS,
119 LLM_ARCH_GROVEMOE,
120 LLM_ARCH_APERTUS,
121 LLM_ARCH_MINIMAX_M2,
122 LLM_ARCH_COGVLM,
123 LLM_ARCH_RND1,
124 LLM_ARCH_PANGU_EMBED,
125 LLM_ARCH_MISTRAL3,
126 LLM_ARCH_MIMO2,
127 LLM_ARCH_STEP35,
128 LLM_ARCH_LLAMA_EMBED,
129 LLM_ARCH_MAINCODER,
130 LLM_ARCH_KIMI_LINEAR,
131 LLM_ARCH_UNKNOWN,
132};
133
134enum llm_kv {
135 LLM_KV_GENERAL_TYPE,
136 LLM_KV_GENERAL_ARCHITECTURE,
137 LLM_KV_GENERAL_QUANTIZATION_VERSION,
138 LLM_KV_GENERAL_ALIGNMENT,
139 LLM_KV_GENERAL_FILE_TYPE,
140 LLM_KV_GENERAL_SAMPLING_SEQUENCE,
141 LLM_KV_GENERAL_SAMPLING_TOP_K,
142 LLM_KV_GENERAL_SAMPLING_TOP_P,
143 LLM_KV_GENERAL_SAMPLING_MIN_P,
144 LLM_KV_GENERAL_SAMPLING_XTC_PROBABILITY,
145 LLM_KV_GENERAL_SAMPLING_XTC_THRESHOLD,
146 LLM_KV_GENERAL_SAMPLING_TEMP,
147 LLM_KV_GENERAL_SAMPLING_PENALTY_LAST_N,
148 LLM_KV_GENERAL_SAMPLING_PENALTY_REPEAT,
149 LLM_KV_GENERAL_SAMPLING_MIROSTAT,
150 LLM_KV_GENERAL_SAMPLING_MIROSTAT_TAU,
151 LLM_KV_GENERAL_SAMPLING_MIROSTAT_ETA,
152 LLM_KV_GENERAL_NAME,
153 LLM_KV_GENERAL_AUTHOR,
154 LLM_KV_GENERAL_VERSION,
155 LLM_KV_GENERAL_URL,
156 LLM_KV_GENERAL_DESCRIPTION,
157 LLM_KV_GENERAL_LICENSE,
158 LLM_KV_GENERAL_SOURCE_URL,
159 LLM_KV_GENERAL_SOURCE_HF_REPO,
160
161 LLM_KV_VOCAB_SIZE,
162 LLM_KV_CONTEXT_LENGTH,
163 LLM_KV_EMBEDDING_LENGTH,
164 LLM_KV_EMBEDDING_LENGTH_OUT,
165 LLM_KV_FEATURES_LENGTH,
166 LLM_KV_BLOCK_COUNT,
167 LLM_KV_LEADING_DENSE_BLOCK_COUNT,
168 LLM_KV_FEED_FORWARD_LENGTH,
169 LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
170 LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
171 LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH,
172 LLM_KV_SWIGLU_CLAMP_EXP,
173 LLM_KV_SWIGLU_CLAMP_SHEXP,
174 LLM_KV_USE_PARALLEL_RESIDUAL,
175 LLM_KV_TENSOR_DATA_LAYOUT,
176 LLM_KV_EXPERT_COUNT,
177 LLM_KV_EXPERT_USED_COUNT,
178 LLM_KV_EXPERT_SHARED_COUNT,
179 LLM_KV_EXPERT_GROUP_COUNT,
180 LLM_KV_EXPERT_GROUP_USED_COUNT,
181 LLM_KV_EXPERT_WEIGHTS_SCALE,
182 LLM_KV_EXPERT_WEIGHTS_NORM,
183 LLM_KV_EXPERT_GATING_FUNC,
184 LLM_KV_EXPERT_GROUP_SCALE,
185 LLM_KV_EXPERTS_PER_GROUP,
186 LLM_KV_MOE_EVERY_N_LAYERS,
187 LLM_KV_NEXTN_PREDICT_LAYERS,
188 LLM_KV_NUM_DEEPSTACK_LAYERS,
189 LLM_KV_POOLING_TYPE,
190 LLM_KV_LOGIT_SCALE,
191 LLM_KV_DECODER_START_TOKEN_ID,
192 LLM_KV_DECODER_BLOCK_COUNT,
193 LLM_KV_ATTN_LOGIT_SOFTCAPPING,
194 LLM_KV_ROUTER_LOGIT_SOFTCAPPING,
195 LLM_KV_FINAL_LOGIT_SOFTCAPPING,
196 LLM_KV_SWIN_NORM,
197 LLM_KV_RESCALE_EVERY_N_LAYERS,
198 LLM_KV_TIME_MIX_EXTRA_DIM,
199 LLM_KV_TIME_DECAY_EXTRA_DIM,
200 LLM_KV_RESIDUAL_SCALE,
201 LLM_KV_EMBEDDING_SCALE,
202 LLM_KV_TOKEN_SHIFT_COUNT,
203 LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
204 LLM_KV_FULL_ATTENTION_INTERVAL,
205
206 LLM_KV_ATTENTION_HEAD_COUNT,
207 LLM_KV_ATTENTION_HEAD_COUNT_KV,
208 LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
209 LLM_KV_ATTENTION_CLAMP_KQV,
210 LLM_KV_ATTENTION_KEY_LENGTH,
211 LLM_KV_ATTENTION_VALUE_LENGTH,
212 LLM_KV_ATTENTION_LAYERNORM_EPS,
213 LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
214 LLM_KV_ATTENTION_GROUPNORM_EPS,
215 LLM_KV_ATTENTION_GROUPNORM_GROUPS,
216 LLM_KV_ATTENTION_CAUSAL,
217 LLM_KV_ATTENTION_Q_LORA_RANK,
218 LLM_KV_ATTENTION_KV_LORA_RANK,
219 LLM_KV_ATTENTION_DECAY_LORA_RANK,
220 LLM_KV_ATTENTION_ICLR_LORA_RANK,
221 LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
222 LLM_KV_ATTENTION_GATE_LORA_RANK,
223 LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
224 LLM_KV_ATTENTION_SLIDING_WINDOW,
225 LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN,
226 LLM_KV_ATTENTION_SCALE,
227 LLM_KV_ATTENTION_OUTPUT_SCALE,
228 LLM_KV_ATTENTION_TEMPERATURE_LENGTH,
229 LLM_KV_ATTENTION_TEMPERATURE_SCALE,
230 LLM_KV_ATTENTION_KEY_LENGTH_MLA,
231 LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
232
233 LLM_KV_ROPE_DIMENSION_COUNT,
234 LLM_KV_ROPE_DIMENSION_SECTIONS,
235 LLM_KV_ROPE_FREQ_BASE,
236 LLM_KV_ROPE_FREQ_BASE_SWA,
237 LLM_KV_ROPE_SCALE_LINEAR,
238 LLM_KV_ROPE_SCALING_TYPE,
239 LLM_KV_ROPE_SCALING_FACTOR,
240 LLM_KV_ROPE_SCALING_ATTN_FACTOR,
241 LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
242 LLM_KV_ROPE_SCALING_FINETUNED,
243 LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
244 LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR,
245 LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR,
246 LLM_KV_ROPE_SCALING_YARN_BETA_FAST,
247 LLM_KV_ROPE_SCALING_YARN_BETA_SLOW,
248
249 LLM_KV_SPLIT_NO,
250 LLM_KV_SPLIT_COUNT,
251 LLM_KV_SPLIT_TENSORS_COUNT,
252
253 LLM_KV_SSM_INNER_SIZE,
254 LLM_KV_SSM_CONV_KERNEL,
255 LLM_KV_SSM_STATE_SIZE,
256 LLM_KV_SSM_TIME_STEP_RANK,
257 LLM_KV_SSM_GROUP_COUNT,
258 LLM_KV_SSM_DT_B_C_RMS,
259
260 LLM_KV_KDA_HEAD_DIM,
261
262 LLM_KV_WKV_HEAD_SIZE,
263
264 LLM_KV_TOKENIZER_MODEL,
265 LLM_KV_TOKENIZER_PRE,
266 LLM_KV_TOKENIZER_LIST,
267 LLM_KV_TOKENIZER_TOKEN_TYPE,
268 LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
269 LLM_KV_TOKENIZER_SCORES,
270 LLM_KV_TOKENIZER_MERGES,
271 LLM_KV_TOKENIZER_BOS_ID,
272 LLM_KV_TOKENIZER_EOS_ID,
273 LLM_KV_TOKENIZER_EOT_ID,
274 LLM_KV_TOKENIZER_EOM_ID,
275 LLM_KV_TOKENIZER_UNK_ID,
276 LLM_KV_TOKENIZER_SEP_ID,
277 LLM_KV_TOKENIZER_PAD_ID,
278 LLM_KV_TOKENIZER_CLS_ID,
279 LLM_KV_TOKENIZER_MASK_ID,
280 LLM_KV_TOKENIZER_ADD_BOS,
281 LLM_KV_TOKENIZER_ADD_EOS,
282 LLM_KV_TOKENIZER_ADD_SEP,
283 LLM_KV_TOKENIZER_ADD_PREFIX,
284 LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
285 LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
286 LLM_KV_TOKENIZER_HF_JSON,
287 LLM_KV_TOKENIZER_RWKV,
288 LLM_KV_TOKENIZER_CHAT_TEMPLATE,
289 LLM_KV_TOKENIZER_FIM_PRE_ID,
290 LLM_KV_TOKENIZER_FIM_SUF_ID,
291 LLM_KV_TOKENIZER_FIM_MID_ID,
292 LLM_KV_TOKENIZER_FIM_PAD_ID,
293 LLM_KV_TOKENIZER_FIM_REP_ID,
294 LLM_KV_TOKENIZER_FIM_SEP_ID,
295
296 LLM_KV_ADAPTER_TYPE,
297 LLM_KV_ADAPTER_LORA_ALPHA,
298 LLM_KV_ADAPTER_LORA_TASK_NAME,
299 LLM_KV_ADAPTER_LORA_PROMPT_PREFIX,
300 LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS,
301
302 LLM_KV_POSNET_EMBEDDING_LENGTH,
303 LLM_KV_POSNET_BLOCK_COUNT,
304
305 LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
306 LLM_KV_CONVNEXT_BLOCK_COUNT,
307
308 LLM_KV_CLASSIFIER_OUTPUT_LABELS,
309
310 LLM_KV_SHORTCONV_L_CACHE,
311
312 LLM_KV_XIELU_ALPHA_N,
313 LLM_KV_XIELU_ALPHA_P,
314 LLM_KV_XIELU_BETA,
315 LLM_KV_XIELU_EPS,
316
317 // deprecated:
318 LLM_KV_TOKENIZER_PREFIX_ID,
319 LLM_KV_TOKENIZER_SUFFIX_ID,
320 LLM_KV_TOKENIZER_MIDDLE_ID,
321
322 // sentence-transformers dense layers in and out features
323 LLM_KV_DENSE_2_FEAT_IN,
324 LLM_KV_DENSE_2_FEAT_OUT,
325 LLM_KV_DENSE_3_FEAT_IN,
326 LLM_KV_DENSE_3_FEAT_OUT,
327};
328
329enum llm_tensor {
330 LLM_TENSOR_TOKEN_EMBD,
331 LLM_TENSOR_TOKEN_EMBD_NORM,
332 LLM_TENSOR_TOKEN_TYPES,
333 LLM_TENSOR_POS_EMBD,
334 LLM_TENSOR_DENSE_2_OUT,
335 LLM_TENSOR_DENSE_3_OUT,
336 LLM_TENSOR_OUTPUT,
337 LLM_TENSOR_OUTPUT_NORM,
338 LLM_TENSOR_OUTPUT_NORM_LFM2, // fix for wrong tensor name
339 LLM_TENSOR_ROPE_FREQS,
340 LLM_TENSOR_ROPE_FACTORS_LONG,
341 LLM_TENSOR_ROPE_FACTORS_SHORT,
342 LLM_TENSOR_ATTN_Q,
343 LLM_TENSOR_ATTN_K,
344 LLM_TENSOR_ATTN_V,
345 LLM_TENSOR_ATTN_QKV,
346 LLM_TENSOR_ATTN_OUT,
347 LLM_TENSOR_ATTN_NORM,
348 LLM_TENSOR_ATTN_NORM_2,
349 LLM_TENSOR_ATTN_OUT_NORM,
350 LLM_TENSOR_ATTN_POST_NORM,
351 LLM_TENSOR_ATTN_ROT_EMBD,
352 LLM_TENSOR_ATTN_SINKS,
353 LLM_TENSOR_ATTN_GATE,
354 LLM_TENSOR_FFN_GATE_INP,
355 LLM_TENSOR_FFN_GATE_INP_SHEXP,
356 LLM_TENSOR_FFN_NORM,
357 LLM_TENSOR_FFN_POST_NORM,
358 LLM_TENSOR_FFN_GATE,
359 LLM_TENSOR_FFN_DOWN,
360 LLM_TENSOR_FFN_UP,
361 LLM_TENSOR_FFN_ACT,
362 LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
363 LLM_TENSOR_FFN_GATE_EXP,
364 LLM_TENSOR_FFN_UP_EXP,
365 LLM_TENSOR_FFN_NORM_EXPS,
366 LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
367 LLM_TENSOR_FFN_GATE_EXPS,
368 LLM_TENSOR_FFN_UP_EXPS,
369 LLM_TENSOR_FFN_DOWN_SHEXP,
370 LLM_TENSOR_FFN_GATE_SHEXP,
371 LLM_TENSOR_FFN_UP_SHEXP,
372 LLM_TENSOR_FFN_DOWN_CHEXPS,
373 LLM_TENSOR_FFN_GATE_CHEXPS,
374 LLM_TENSOR_FFN_UP_CHEXPS,
375 LLM_TENSOR_FFN_EXP_PROBS_B,
376 LLM_TENSOR_ATTN_Q_NORM,
377 LLM_TENSOR_ATTN_K_NORM,
378 LLM_TENSOR_LAYER_OUT_NORM,
379 LLM_TENSOR_POST_ATTN_NORM,
380 LLM_TENSOR_POST_MLP_NORM,
381 LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
382 LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
383 LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
384 LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
385 LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
386 LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
387 LLM_TENSOR_ALTUP_PROJ, // gemma3n
388 LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
389 LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
390 LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
391 LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
392 LLM_TENSOR_ALTUP_ROUTER, // gemma3n
393 LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
394 LLM_TENSOR_LAUREL_L, // gemma3n
395 LLM_TENSOR_LAUREL_R, // gemma3n
396 LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
397 LLM_TENSOR_SSM_IN,
398 LLM_TENSOR_SSM_CONV1D,
399 LLM_TENSOR_SSM_X,
400 LLM_TENSOR_SSM_DT,
401 LLM_TENSOR_SSM_DT_NORM,
402 LLM_TENSOR_SSM_A,
403 LLM_TENSOR_SSM_A_NOSCAN, // qwen3next special case with MUL instead of SSM_SCAN
404 LLM_TENSOR_SSM_B_NORM,
405 LLM_TENSOR_SSM_C_NORM,
406 LLM_TENSOR_SSM_D,
407 LLM_TENSOR_SSM_NORM,
408 LLM_TENSOR_SSM_OUT,
409 LLM_TENSOR_SSM_BETA_ALPHA, // qwen3next
410 LLM_TENSOR_SSM_ALPHA, // qwen3.5
411 // Kimi Linear KDA (using SSM_ prefix for consistency)
412 LLM_TENSOR_SSM_CONV1D_Q, // kimi: Q conv1d weight
413 LLM_TENSOR_SSM_CONV1D_K, // kimi: K conv1d weight
414 LLM_TENSOR_SSM_CONV1D_V, // kimi: V conv1d weight
415 LLM_TENSOR_SSM_F_A, // kimi: forget gate projection A
416 LLM_TENSOR_SSM_F_B, // kimi: forget gate projection B
417 LLM_TENSOR_SSM_BETA, // kimi: beta mixing coefficient and qwen3.5
418 LLM_TENSOR_SSM_G_A, // kimi: output gate projection A
419 LLM_TENSOR_SSM_G_B, // kimi: output gate projection B
420 LLM_TENSOR_TIME_MIX_W0,
421 LLM_TENSOR_TIME_MIX_W1,
422 LLM_TENSOR_TIME_MIX_W2,
423 LLM_TENSOR_TIME_MIX_A0,
424 LLM_TENSOR_TIME_MIX_A1,
425 LLM_TENSOR_TIME_MIX_A2,
426 LLM_TENSOR_TIME_MIX_V0,
427 LLM_TENSOR_TIME_MIX_V1,
428 LLM_TENSOR_TIME_MIX_V2,
429 LLM_TENSOR_TIME_MIX_G1,
430 LLM_TENSOR_TIME_MIX_G2,
431 LLM_TENSOR_TIME_MIX_K_K,
432 LLM_TENSOR_TIME_MIX_K_A,
433 LLM_TENSOR_TIME_MIX_R_K,
434 LLM_TENSOR_TIME_MIX_LERP_X,
435 LLM_TENSOR_TIME_MIX_LERP_W,
436 LLM_TENSOR_TIME_MIX_LERP_K,
437 LLM_TENSOR_TIME_MIX_LERP_V,
438 LLM_TENSOR_TIME_MIX_LERP_R,
439 LLM_TENSOR_TIME_MIX_LERP_G,
440 LLM_TENSOR_TIME_MIX_LERP_FUSED,
441 LLM_TENSOR_TIME_MIX_FIRST,
442 LLM_TENSOR_TIME_MIX_DECAY,
443 LLM_TENSOR_TIME_MIX_DECAY_W1,
444 LLM_TENSOR_TIME_MIX_DECAY_W2,
445 LLM_TENSOR_TIME_MIX_KEY,
446 LLM_TENSOR_TIME_MIX_VALUE,
447 LLM_TENSOR_TIME_MIX_RECEPTANCE,
448 LLM_TENSOR_TIME_MIX_GATE,
449 LLM_TENSOR_TIME_MIX_LN,
450 LLM_TENSOR_TIME_MIX_OUTPUT,
451 LLM_TENSOR_CHANNEL_MIX_LERP_K,
452 LLM_TENSOR_CHANNEL_MIX_LERP_R,
453 LLM_TENSOR_CHANNEL_MIX_KEY,
454 LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
455 LLM_TENSOR_CHANNEL_MIX_VALUE,
456 LLM_TENSOR_ATTN_Q_A,
457 LLM_TENSOR_ATTN_Q_B,
458 LLM_TENSOR_ATTN_KV_A_MQA,
459 LLM_TENSOR_ATTN_KV_B,
460 LLM_TENSOR_ATTN_K_B,
461 LLM_TENSOR_ATTN_V_B,
462 LLM_TENSOR_ATTN_Q_A_NORM,
463 LLM_TENSOR_ATTN_KV_A_NORM,
464 LLM_TENSOR_ATTN_SUB_NORM,
465 LLM_TENSOR_FFN_SUB_NORM,
466 LLM_TENSOR_DEC_ATTN_NORM,
467 LLM_TENSOR_DEC_ATTN_Q,
468 LLM_TENSOR_DEC_ATTN_K,
469 LLM_TENSOR_DEC_ATTN_V,
470 LLM_TENSOR_DEC_ATTN_OUT,
471 LLM_TENSOR_DEC_ATTN_REL_B,
472 LLM_TENSOR_DEC_CROSS_ATTN_NORM,
473 LLM_TENSOR_DEC_CROSS_ATTN_Q,
474 LLM_TENSOR_DEC_CROSS_ATTN_K,
475 LLM_TENSOR_DEC_CROSS_ATTN_V,
476 LLM_TENSOR_DEC_CROSS_ATTN_OUT,
477 LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
478 LLM_TENSOR_DEC_FFN_NORM,
479 LLM_TENSOR_DEC_FFN_GATE,
480 LLM_TENSOR_DEC_FFN_DOWN,
481 LLM_TENSOR_DEC_FFN_UP,
482 LLM_TENSOR_DEC_OUTPUT_NORM,
483 LLM_TENSOR_ENC_ATTN_NORM,
484 LLM_TENSOR_ENC_ATTN_Q,
485 LLM_TENSOR_ENC_ATTN_K,
486 LLM_TENSOR_ENC_ATTN_V,
487 LLM_TENSOR_ENC_ATTN_OUT,
488 LLM_TENSOR_ENC_ATTN_REL_B,
489 LLM_TENSOR_ENC_FFN_NORM,
490 LLM_TENSOR_ENC_FFN_GATE,
491 LLM_TENSOR_ENC_FFN_DOWN,
492 LLM_TENSOR_ENC_FFN_UP,
493 LLM_TENSOR_ENC_OUTPUT_NORM,
494 LLM_TENSOR_CLS,
495 LLM_TENSOR_CLS_OUT,
496 LLM_TENSOR_CONV1D,
497 LLM_TENSOR_CONVNEXT_DW,
498 LLM_TENSOR_CONVNEXT_NORM,
499 LLM_TENSOR_CONVNEXT_PW1,
500 LLM_TENSOR_CONVNEXT_PW2,
501 LLM_TENSOR_CONVNEXT_GAMMA,
502 LLM_TENSOR_POS_NET_CONV1,
503 LLM_TENSOR_POS_NET_CONV2,
504 LLM_TENSOR_POS_NET_NORM,
505 LLM_TENSOR_POS_NET_NORM1,
506 LLM_TENSOR_POS_NET_NORM2,
507 LLM_TENSOR_POS_NET_ATTN_NORM,
508 LLM_TENSOR_POS_NET_ATTN_Q,
509 LLM_TENSOR_POS_NET_ATTN_K,
510 LLM_TENSOR_POS_NET_ATTN_V,
511 LLM_TENSOR_POS_NET_ATTN_OUT,
512 LLM_TENSOR_SHORTCONV_CONV,
513 LLM_TENSOR_SHORTCONV_INPROJ,
514 LLM_TENSOR_SHORTCONV_OUTPROJ,
515 LLM_TENSOR_VISEXP_ATTN_QKV,
516 LLM_TENSOR_VISEXP_ATTN_OUT,
517 LLM_TENSOR_VISEXP_FFN_GATE,
518 LLM_TENSOR_VISEXP_FFN_DOWN,
519 LLM_TENSOR_VISEXP_FFN_UP,
520 LLM_TENSOR_NEXTN_EH_PROJ,
521 LLM_TENSOR_NEXTN_EMBED_TOKENS,
522 LLM_TENSOR_NEXTN_ENORM,
523 LLM_TENSOR_NEXTN_HNORM,
524 LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
525 LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
526};
527
528enum llm_tensor_layer {
529 LLM_TENSOR_LAYER_INPUT,
530 LLM_TENSOR_LAYER_REPEATING,
531 LLM_TENSOR_LAYER_OUTPUT,
532};
533
534struct LLM_KV {
535 LLM_KV(llm_arch arch, const char * suffix = nullptr);
536
537 llm_arch arch;
538 const char * suffix;
539
540 std::string operator()(llm_kv kv) const;
541};
542
543// helper to handle gguf constants
544// usage:
545//
546// const auto tn = LLM_TN(LLM_ARCH_LLAMA);
547//
548// std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
549// std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
550// std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
551//
552struct LLM_TN_IMPL {
553 const llm_arch arch;
554 const llm_tensor tensor;
555 const char * const suffix;
556 const int bid;
557 const int xid;
558
559 const std::set<llm_tensor> model_tensors;
560
561 LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid);
562
563 std::string str() const;
564
565 operator std::string() const {
566 return str();
567 }
568
569 friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
570 return str == tn.str();
571 }
572
573 friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
574 return str != tn.str();
575 }
576};
577
578struct LLM_TN {
579 LLM_TN(llm_arch arch) : arch(arch) {}
580
581 llm_arch arch;
582
583 LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
584 return LLM_TN_IMPL(arch, tensor, suffix, bid, xid);
585 }
586
587 LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
588 return LLM_TN_IMPL(arch, tensor, nullptr, bid, xid);
589 }
590};
591
592
593struct llm_tensor_info {
594 llm_tensor_layer layer;
595 ggml_op op;
596};
597
598const char * llm_arch_name(llm_arch arch);
599
600llm_arch llm_arch_from_string(const std::string & name);
601
602const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
603
604bool llm_arch_is_recurrent(const llm_arch & arch);
605bool llm_arch_is_hybrid (const llm_arch & arch);
606bool llm_arch_is_diffusion(const llm_arch & arch);