aboutsummaryrefslogtreecommitdiff
path: root/llama.cpp/src/llama-arch.cpp
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/src/llama-arch.cpp
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/src/llama-arch.cpp')
-rw-r--r--llama.cpp/src/llama-arch.cpp2757
1 files changed, 2757 insertions, 0 deletions
diff --git a/llama.cpp/src/llama-arch.cpp b/llama.cpp/src/llama-arch.cpp
new file mode 100644
index 0000000..a943d40
--- /dev/null
+++ b/llama.cpp/src/llama-arch.cpp
@@ -0,0 +1,2757 @@
1#include "llama-arch.h"
2
3#include "llama-impl.h"
4
5#include <map>
6#include <set>
7
8static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
9 { LLM_ARCH_CLIP, "clip" }, // dummy, only used by llama-quantize
10 { LLM_ARCH_LLAMA, "llama" },
11 { LLM_ARCH_LLAMA4, "llama4" },
12 { LLM_ARCH_DECI, "deci" },
13 { LLM_ARCH_FALCON, "falcon" },
14 { LLM_ARCH_GROK, "grok" },
15 { LLM_ARCH_GPT2, "gpt2" },
16 { LLM_ARCH_GPTJ, "gptj" },
17 { LLM_ARCH_GPTNEOX, "gptneox" },
18 { LLM_ARCH_MPT, "mpt" },
19 { LLM_ARCH_BAICHUAN, "baichuan" },
20 { LLM_ARCH_STARCODER, "starcoder" },
21 { LLM_ARCH_REFACT, "refact" },
22 { LLM_ARCH_BERT, "bert" },
23 { LLM_ARCH_MODERN_BERT, "modern-bert" },
24 { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
25 { LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" },
26 { LLM_ARCH_NEO_BERT, "neo-bert" },
27 { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
28 { LLM_ARCH_JINA_BERT_V3, "jina-bert-v3" },
29 { LLM_ARCH_BLOOM, "bloom" },
30 { LLM_ARCH_STABLELM, "stablelm" },
31 { LLM_ARCH_QWEN, "qwen" },
32 { LLM_ARCH_QWEN2, "qwen2" },
33 { LLM_ARCH_QWEN2MOE, "qwen2moe" },
34 { LLM_ARCH_QWEN2VL, "qwen2vl" },
35 { LLM_ARCH_QWEN3, "qwen3" },
36 { LLM_ARCH_QWEN3MOE, "qwen3moe" },
37 { LLM_ARCH_QWEN3NEXT, "qwen3next" },
38 { LLM_ARCH_QWEN3VL, "qwen3vl" },
39 { LLM_ARCH_QWEN3VLMOE, "qwen3vlmoe" },
40 { LLM_ARCH_QWEN35, "qwen35" },
41 { LLM_ARCH_QWEN35MOE, "qwen35moe" },
42 { LLM_ARCH_PHI2, "phi2" },
43 { LLM_ARCH_PHI3, "phi3" },
44 { LLM_ARCH_PHIMOE, "phimoe" },
45 { LLM_ARCH_PLAMO, "plamo" },
46 { LLM_ARCH_PLAMO2, "plamo2" },
47 { LLM_ARCH_PLAMO3, "plamo3" },
48 { LLM_ARCH_CODESHELL, "codeshell" },
49 { LLM_ARCH_ORION, "orion" },
50 { LLM_ARCH_INTERNLM2, "internlm2" },
51 { LLM_ARCH_MINICPM, "minicpm" },
52 { LLM_ARCH_MINICPM3, "minicpm3" },
53 { LLM_ARCH_GEMMA, "gemma" },
54 { LLM_ARCH_GEMMA2, "gemma2" },
55 { LLM_ARCH_GEMMA3, "gemma3" },
56 { LLM_ARCH_GEMMA3N, "gemma3n" },
57 { LLM_ARCH_GEMMA_EMBEDDING, "gemma-embedding" },
58 { LLM_ARCH_STARCODER2, "starcoder2" },
59 { LLM_ARCH_MAMBA, "mamba" },
60 { LLM_ARCH_MAMBA2, "mamba2" },
61 { LLM_ARCH_JAMBA, "jamba" },
62 { LLM_ARCH_FALCON_H1, "falcon-h1" },
63 { LLM_ARCH_XVERSE, "xverse" },
64 { LLM_ARCH_COMMAND_R, "command-r" },
65 { LLM_ARCH_COHERE2, "cohere2" },
66 { LLM_ARCH_DBRX, "dbrx" },
67 { LLM_ARCH_OLMO, "olmo" },
68 { LLM_ARCH_OLMO2, "olmo2" },
69 { LLM_ARCH_OLMOE, "olmoe" },
70 { LLM_ARCH_OPENELM, "openelm" },
71 { LLM_ARCH_ARCTIC, "arctic" },
72 { LLM_ARCH_DEEPSEEK, "deepseek" },
73 { LLM_ARCH_DEEPSEEK2, "deepseek2" },
74 { LLM_ARCH_CHATGLM, "chatglm" },
75 { LLM_ARCH_GLM4, "glm4" },
76 { LLM_ARCH_GLM4_MOE, "glm4moe" },
77 { LLM_ARCH_BITNET, "bitnet" },
78 { LLM_ARCH_T5, "t5" },
79 { LLM_ARCH_T5ENCODER, "t5encoder" },
80 { LLM_ARCH_JAIS, "jais" },
81 { LLM_ARCH_NEMOTRON, "nemotron" },
82 { LLM_ARCH_NEMOTRON_H, "nemotron_h" },
83 { LLM_ARCH_NEMOTRON_H_MOE, "nemotron_h_moe" },
84 { LLM_ARCH_EXAONE, "exaone" },
85 { LLM_ARCH_EXAONE4, "exaone4" },
86 { LLM_ARCH_EXAONE_MOE, "exaone-moe" },
87 { LLM_ARCH_RWKV6, "rwkv6" },
88 { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
89 { LLM_ARCH_RWKV7, "rwkv7" },
90 { LLM_ARCH_ARWKV7, "arwkv7" },
91 { LLM_ARCH_GRANITE, "granite" },
92 { LLM_ARCH_GRANITE_MOE, "granitemoe" },
93 { LLM_ARCH_GRANITE_HYBRID, "granitehybrid" },
94 { LLM_ARCH_CHAMELEON, "chameleon" },
95 { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
96 { LLM_ARCH_PLM, "plm" },
97 { LLM_ARCH_BAILINGMOE, "bailingmoe" },
98 { LLM_ARCH_BAILINGMOE2, "bailingmoe2" },
99 { LLM_ARCH_DOTS1, "dots1" },
100 { LLM_ARCH_ARCEE, "arcee" },
101 { LLM_ARCH_AFMOE, "afmoe" },
102 { LLM_ARCH_ERNIE4_5, "ernie4_5" },
103 { LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" },
104 { LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
105 { LLM_ARCH_HUNYUAN_DENSE, "hunyuan-dense" },
106 { LLM_ARCH_SMOLLM3, "smollm3" },
107 { LLM_ARCH_OPENAI_MOE, "gpt-oss" },
108 { LLM_ARCH_LFM2, "lfm2" },
109 { LLM_ARCH_LFM2MOE, "lfm2moe" },
110 { LLM_ARCH_DREAM, "dream" },
111 { LLM_ARCH_SMALLTHINKER, "smallthinker" },
112 { LLM_ARCH_LLADA, "llada" },
113 { LLM_ARCH_LLADA_MOE, "llada-moe" },
114 { LLM_ARCH_SEED_OSS, "seed_oss" },
115 { LLM_ARCH_GROVEMOE, "grovemoe" },
116 { LLM_ARCH_APERTUS, "apertus" },
117 { LLM_ARCH_MINIMAX_M2, "minimax-m2" },
118 { LLM_ARCH_COGVLM, "cogvlm" },
119 { LLM_ARCH_RND1, "rnd1" },
120 { LLM_ARCH_PANGU_EMBED, "pangu-embedded" },
121 { LLM_ARCH_MISTRAL3, "mistral3" },
122 { LLM_ARCH_MIMO2, "mimo2" },
123 { LLM_ARCH_STEP35, "step35" },
124 { LLM_ARCH_LLAMA_EMBED, "llama-embed" },
125 { LLM_ARCH_MAINCODER, "maincoder" },
126 { LLM_ARCH_KIMI_LINEAR, "kimi-linear" },
127 { LLM_ARCH_UNKNOWN, "(unknown)" },
128};
129
130static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
131 { LLM_KV_GENERAL_TYPE, "general.type" },
132 { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
133 { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
134 { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
135 { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" },
136 { LLM_KV_GENERAL_SAMPLING_SEQUENCE, "general.sampling.sequence" },
137 { LLM_KV_GENERAL_SAMPLING_TOP_K, "general.sampling.top_k" },
138 { LLM_KV_GENERAL_SAMPLING_TOP_P, "general.sampling.top_p" },
139 { LLM_KV_GENERAL_SAMPLING_MIN_P, "general.sampling.min_p" },
140 { LLM_KV_GENERAL_SAMPLING_XTC_PROBABILITY, "general.sampling.xtc_probability" },
141 { LLM_KV_GENERAL_SAMPLING_XTC_THRESHOLD, "general.sampling.xtc_threshold" },
142 { LLM_KV_GENERAL_SAMPLING_TEMP, "general.sampling.temp" },
143 { LLM_KV_GENERAL_SAMPLING_PENALTY_LAST_N, "general.sampling.penalty_last_n" },
144 { LLM_KV_GENERAL_SAMPLING_PENALTY_REPEAT, "general.sampling.penalty_repeat" },
145 { LLM_KV_GENERAL_SAMPLING_MIROSTAT, "general.sampling.mirostat" },
146 { LLM_KV_GENERAL_SAMPLING_MIROSTAT_TAU, "general.sampling.mirostat_tau" },
147 { LLM_KV_GENERAL_SAMPLING_MIROSTAT_ETA, "general.sampling.mirostat_eta" },
148 { LLM_KV_GENERAL_NAME, "general.name" },
149 { LLM_KV_GENERAL_AUTHOR, "general.author" },
150 { LLM_KV_GENERAL_VERSION, "general.version" },
151 { LLM_KV_GENERAL_URL, "general.url" },
152 { LLM_KV_GENERAL_DESCRIPTION, "general.description" },
153 { LLM_KV_GENERAL_LICENSE, "general.license" },
154 { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" },
155 { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" },
156
157 { LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
158 { LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
159 { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
160 { LLM_KV_EMBEDDING_LENGTH_OUT, "%s.embedding_length_out" },
161 { LLM_KV_FEATURES_LENGTH, "%s.features_length" },
162 { LLM_KV_BLOCK_COUNT, "%s.block_count" },
163 { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
164 { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
165 { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, "%s.expert_feed_forward_length" },
166 { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
167 { LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH, "%s.expert_chunk_feed_forward_length" },
168 { LLM_KV_SWIGLU_CLAMP_EXP, "%s.swiglu_clamp_exp" },
169 { LLM_KV_SWIGLU_CLAMP_SHEXP, "%s.swiglu_clamp_shexp" },
170 { LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
171 { LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
172 { LLM_KV_EXPERT_COUNT, "%s.expert_count" },
173 { LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
174 { LLM_KV_EXPERT_SHARED_COUNT, "%s.expert_shared_count" },
175 { LLM_KV_EXPERT_GROUP_COUNT, "%s.expert_group_count" },
176 { LLM_KV_EXPERT_GROUP_USED_COUNT, "%s.expert_group_used_count" },
177 { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
178 { LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
179 { LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
180 { LLM_KV_EXPERT_GROUP_SCALE, "%s.expert_group_scale" },
181 { LLM_KV_EXPERTS_PER_GROUP, "%s.experts_per_group" },
182 { LLM_KV_MOE_EVERY_N_LAYERS, "%s.moe_every_n_layers" },
183 { LLM_KV_NEXTN_PREDICT_LAYERS, "%s.nextn_predict_layers" },
184 { LLM_KV_NUM_DEEPSTACK_LAYERS, "%s.n_deepstack_layers" },
185 { LLM_KV_POOLING_TYPE, "%s.pooling_type" },
186 { LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
187 { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
188 { LLM_KV_DECODER_BLOCK_COUNT, "%s.decoder_block_count" },
189 { LLM_KV_ATTN_LOGIT_SOFTCAPPING, "%s.attn_logit_softcapping" },
190 { LLM_KV_ROUTER_LOGIT_SOFTCAPPING, "%s.router_logit_softcapping" },
191 { LLM_KV_FINAL_LOGIT_SOFTCAPPING, "%s.final_logit_softcapping" },
192 { LLM_KV_SWIN_NORM, "%s.swin_norm" },
193 { LLM_KV_RESCALE_EVERY_N_LAYERS, "%s.rescale_every_n_layers" },
194 { LLM_KV_TIME_MIX_EXTRA_DIM, "%s.time_mix_extra_dim" },
195 { LLM_KV_TIME_DECAY_EXTRA_DIM, "%s.time_decay_extra_dim" },
196 { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
197 { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
198 { LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
199 { LLM_KV_INTERLEAVE_MOE_LAYER_STEP, "%s.interleave_moe_layer_step" },
200 { LLM_KV_FULL_ATTENTION_INTERVAL, "%s.full_attention_interval" },
201
202 { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
203 { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
204 { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
205 { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
206 { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
207 { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
208 { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
209 { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
210 { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
211 { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
212 { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
213 { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
214 { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
215 { LLM_KV_ATTENTION_DECAY_LORA_RANK, "%s.attention.decay_lora_rank" },
216 { LLM_KV_ATTENTION_ICLR_LORA_RANK, "%s.attention.iclr_lora_rank" },
217 { LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, "%s.attention.value_residual_mix_lora_rank" },
218 { LLM_KV_ATTENTION_GATE_LORA_RANK, "%s.attention.gate_lora_rank" },
219 { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
220 { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
221 { LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN, "%s.attention.sliding_window_pattern" },
222 { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
223 { LLM_KV_ATTENTION_OUTPUT_SCALE, "%s.attention.output_scale" },
224 { LLM_KV_ATTENTION_TEMPERATURE_LENGTH, "%s.attention.temperature_length" },
225 { LLM_KV_ATTENTION_TEMPERATURE_SCALE, "%s.attention.temperature_scale" },
226 { LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
227 { LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
228
229 { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
230 { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
231 { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
232 { LLM_KV_ROPE_FREQ_BASE_SWA, "%s.rope.freq_base_swa" },
233 { LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
234 { LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
235 { LLM_KV_ROPE_SCALING_FACTOR, "%s.rope.scaling.factor" },
236 { LLM_KV_ROPE_SCALING_ATTN_FACTOR, "%s.rope.scaling.attn_factor" },
237 { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
238 { LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
239 { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier" },
240 { LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR, "%s.rope.scaling.yarn_ext_factor" },
241 { LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR, "%s.rope.scaling.yarn_attn_factor" },
242 { LLM_KV_ROPE_SCALING_YARN_BETA_FAST, "%s.rope.scaling.yarn_beta_fast" },
243 { LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, "%s.rope.scaling.yarn_beta_slow" },
244
245 { LLM_KV_SPLIT_NO, "split.no" },
246 { LLM_KV_SPLIT_COUNT, "split.count" },
247 { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
248
249 { LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
250 { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
251 { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
252 { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
253 { LLM_KV_SSM_GROUP_COUNT, "%s.ssm.group_count" },
254 { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
255
256 { LLM_KV_KDA_HEAD_DIM, "%s.kda.head_dim" },
257
258 { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
259
260 { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
261 { LLM_KV_POSNET_BLOCK_COUNT, "%s.posnet.block_count" },
262
263 { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
264 { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
265
266 { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
267
268 { LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
269 // sentence-transformers dense modules feature dims
270 { LLM_KV_DENSE_2_FEAT_IN, "%s.dense_2_feat_in" },
271 { LLM_KV_DENSE_2_FEAT_OUT, "%s.dense_2_feat_out" },
272 { LLM_KV_DENSE_3_FEAT_IN, "%s.dense_3_feat_in" },
273 { LLM_KV_DENSE_3_FEAT_OUT, "%s.dense_3_feat_out" },
274
275 { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
276 { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
277 { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
278 { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
279 { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
280 { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" },
281 { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" },
282 { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" },
283 { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" },
284 { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
285 { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
286 { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" },
287 { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" },
288 { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
289 { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" },
290 { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
291 { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
292 { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
293 { LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" },
294 { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
295 { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
296 { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
297 { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
298 { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
299 { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
300 { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
301 { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
302 { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
303 { LLM_KV_TOKENIZER_FIM_PAD_ID, "tokenizer.ggml.fim_pad_token_id" },
304 { LLM_KV_TOKENIZER_FIM_REP_ID, "tokenizer.ggml.fim_rep_token_id" },
305 { LLM_KV_TOKENIZER_FIM_SEP_ID, "tokenizer.ggml.fim_sep_token_id" },
306
307 { LLM_KV_ADAPTER_TYPE, "adapter.type" },
308 { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
309 { LLM_KV_ADAPTER_LORA_TASK_NAME, "adapter.lora.task_name" },
310 { LLM_KV_ADAPTER_LORA_PROMPT_PREFIX, "adapter.lora.prompt_prefix" },
311 { LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS, "adapter.alora.invocation_tokens" },
312
313 { LLM_KV_XIELU_ALPHA_N, "xielu.alpha_n" },
314 { LLM_KV_XIELU_ALPHA_P, "xielu.alpha_p" },
315 { LLM_KV_XIELU_BETA, "xielu.beta" },
316 { LLM_KV_XIELU_EPS, "xielu.eps" },
317
318 // deprecated
319 { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
320 { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
321 { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
322};
323
324static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
325 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
326 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
327 { LLM_TENSOR_OUTPUT_NORM_LFM2, "token_embd_norm" }, // fix for wrong tensor name
328 { LLM_TENSOR_OUTPUT, "output" },
329 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
330 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
331 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
332 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
333 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
334 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
335 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
336 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
337 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
338 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
339 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
340 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
341 { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
342 { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
343 { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
344 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
345 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
346 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
347 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
348 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
349 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
350 { LLM_TENSOR_ATTN_GATE, "blk.%d.attn_gate" },
351 { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
352 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
353 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
354 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
355 { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
356 { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
357 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
358 { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
359 { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
360 { LLM_TENSOR_POS_EMBD, "position_embd" },
361 { LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
362 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
363 { LLM_TENSOR_TOKEN_TYPES, "token_types" },
364 { LLM_TENSOR_CLS, "cls" },
365 { LLM_TENSOR_CLS_OUT, "cls.output" },
366 { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
367 { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
368 { LLM_TENSOR_SSM_A_NOSCAN, "blk.%d.ssm_a" },
369 { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
370 { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
371 { LLM_TENSOR_SSM_BETA_ALPHA, "blk.%d.ssm_ba" },
372 { LLM_TENSOR_SSM_ALPHA, "blk.%d.ssm_alpha" },
373 { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
374 { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
375 { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
376 { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
377 { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
378 { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
379 { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
380 { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
381 { LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
382 { LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
383 { LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
384 { LLM_TENSOR_SSM_CONV1D_Q, "blk.%d.ssm_conv1d_q" },
385 { LLM_TENSOR_SSM_CONV1D_K, "blk.%d.ssm_conv1d_k" },
386 { LLM_TENSOR_SSM_CONV1D_V, "blk.%d.ssm_conv1d_v" },
387 { LLM_TENSOR_SSM_F_A, "blk.%d.ssm_f_a" },
388 { LLM_TENSOR_SSM_F_B, "blk.%d.ssm_f_b" },
389 { LLM_TENSOR_SSM_BETA, "blk.%d.ssm_beta" },
390 { LLM_TENSOR_SSM_G_A, "blk.%d.ssm_g_a" },
391 { LLM_TENSOR_SSM_G_B, "blk.%d.ssm_g_b" },
392 { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
393 { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
394 { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
395 { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
396 { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
397 { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
398 { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
399 { LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" },
400 { LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" },
401 { LLM_TENSOR_PER_LAYER_PROJ_NORM, "per_layer_proj_norm" },
402 { LLM_TENSOR_ALTUP_UNEMBD_PROJ, "altup_unembd_proj" },
403 { LLM_TENSOR_ALTUP_PROJ, "altup_proj" },
404 { LLM_TENSOR_PER_LAYER_INP_GATE, "blk.%d.inp_gate" },
405 { LLM_TENSOR_PER_LAYER_PROJ, "blk.%d.proj" },
406 { LLM_TENSOR_PER_LAYER_POST_NORM, "blk.%d.post_norm" },
407 { LLM_TENSOR_ALTUP_CORRECT_COEF, "blk.%d.altup_correct_coef" },
408 { LLM_TENSOR_ALTUP_CORRECT_SCALE, "blk.%d.altup_correct_scale" },
409 { LLM_TENSOR_ALTUP_PREDICT_COEF, "blk.%d.altup_predict_coef" },
410 { LLM_TENSOR_ALTUP_ROUTER, "blk.%d.altup_router" },
411 { LLM_TENSOR_ALTUP_ROUTER_NORM, "blk.%d.altup_router_norm" },
412 { LLM_TENSOR_LAUREL_L, "blk.%d.laurel_l" },
413 { LLM_TENSOR_LAUREL_R, "blk.%d.laurel_r" },
414 { LLM_TENSOR_LAUREL_POST_NORM, "blk.%d.laurel_post_norm" },
415 { LLM_TENSOR_DENSE_2_OUT, "dense_2" },
416 { LLM_TENSOR_DENSE_3_OUT, "dense_3" },
417 { LLM_TENSOR_FFN_NORM_EXPS, "blk.%d.ffn_norm_exps" },
418 { LLM_TENSOR_ATTN_K_B, "blk.%d.attn_k_b" },
419 { LLM_TENSOR_ATTN_V_B, "blk.%d.attn_v_b" },
420 { LLM_TENSOR_NEXTN_EH_PROJ, "blk.%d.nextn.eh_proj" },
421 { LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.nextn.embed_tokens" },
422 { LLM_TENSOR_NEXTN_ENORM, "blk.%d.nextn.enorm" },
423 { LLM_TENSOR_NEXTN_HNORM, "blk.%d.nextn.hnorm" },
424 { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.nextn.shared_head_head" },
425 { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.nextn.shared_head_norm" },
426 { LLM_TENSOR_ATTN_SUB_NORM, "blk.%d.attn_sub_norm" },
427 { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" },
428 { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" },
429 { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" },
430 { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" },
431 { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" },
432 { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" },
433 { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" },
434 { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" },
435 { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" },
436 { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" },
437 { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" },
438 { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" },
439 { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" },
440 { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
441 { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" },
442 { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" },
443 { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" },
444 { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" },
445 { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
446 { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
447 { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
448 { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
449 { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
450 { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
451 { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
452 { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
453 { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
454 { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
455 { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
456 { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
457 { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
458 { LLM_TENSOR_TIME_MIX_LERP_W, "blk.%d.time_mix_lerp_w" },
459 { LLM_TENSOR_TIME_MIX_LERP_K, "blk.%d.time_mix_lerp_k" },
460 { LLM_TENSOR_TIME_MIX_LERP_V, "blk.%d.time_mix_lerp_v" },
461 { LLM_TENSOR_TIME_MIX_LERP_R, "blk.%d.time_mix_lerp_r" },
462 { LLM_TENSOR_TIME_MIX_LERP_G, "blk.%d.time_mix_lerp_g" },
463 { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
464 { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
465 { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
466 { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
467 { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
468 { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
469 { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
470 { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
471 { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
472 { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
473 { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
474 { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
475 { LLM_TENSOR_CHANNEL_MIX_LERP_R, "blk.%d.channel_mix_lerp_r" },
476 { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
477 { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
478 { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, "blk.%d.channel_mix_receptance" },
479 { LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
480 { LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
481 { LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
482 { LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
483 { LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
484 { LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
485 { LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
486 { LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
487 { LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
488 { LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
489 { LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
490 { LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
491 { LLM_TENSOR_CONV1D, "conv1d" },
492 { LLM_TENSOR_CONVNEXT_DW, "convnext.%d.dw" },
493 { LLM_TENSOR_CONVNEXT_NORM, "convnext.%d.norm" },
494 { LLM_TENSOR_CONVNEXT_PW1, "convnext.%d.pw1" },
495 { LLM_TENSOR_CONVNEXT_PW2, "convnext.%d.pw2" },
496 { LLM_TENSOR_CONVNEXT_GAMMA, "convnext.%d.gamma" },
497 { LLM_TENSOR_POS_NET_CONV1, "posnet.%d.conv1" },
498 { LLM_TENSOR_POS_NET_CONV2, "posnet.%d.conv2" },
499 { LLM_TENSOR_POS_NET_NORM, "posnet.%d.norm" },
500 { LLM_TENSOR_POS_NET_NORM1, "posnet.%d.norm1" },
501 { LLM_TENSOR_POS_NET_NORM2, "posnet.%d.norm2" },
502 { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
503 { LLM_TENSOR_POS_NET_ATTN_Q, "posnet.%d.attn_q" },
504 { LLM_TENSOR_POS_NET_ATTN_K, "posnet.%d.attn_k" },
505 { LLM_TENSOR_POS_NET_ATTN_V, "posnet.%d.attn_v" },
506 { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
507 { LLM_TENSOR_ATTN_SINKS, "blk.%d.attn_sinks" },
508 { LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" },
509 { LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
510 { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
511 { LLM_TENSOR_FFN_GATE_CHEXPS, "blk.%d.ffn_gate_chexps" },
512 { LLM_TENSOR_FFN_DOWN_CHEXPS, "blk.%d.ffn_down_chexps" },
513 { LLM_TENSOR_FFN_UP_CHEXPS, "blk.%d.ffn_up_chexps" },
514 { LLM_TENSOR_VISEXP_ATTN_QKV, "blk.%d.vis_attn_qkv" },
515 { LLM_TENSOR_VISEXP_ATTN_OUT, "blk.%d.vis_attn_output" },
516 { LLM_TENSOR_VISEXP_FFN_GATE, "blk.%d.vis_gate" },
517 { LLM_TENSOR_VISEXP_FFN_DOWN, "blk.%d.vis_down" },
518 { LLM_TENSOR_VISEXP_FFN_UP, "blk.%d.vis_up" },
519};
520
521static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
522 switch (arch) {
523 case LLM_ARCH_CLIP:
524 return {};
525 case LLM_ARCH_LLAMA:
526 case LLM_ARCH_DECI:
527 case LLM_ARCH_MISTRAL3:
528 case LLM_ARCH_LLAMA_EMBED:
529 return {
530 LLM_TENSOR_TOKEN_EMBD,
531 LLM_TENSOR_OUTPUT_NORM,
532 LLM_TENSOR_OUTPUT,
533 LLM_TENSOR_ROPE_FREQS,
534 LLM_TENSOR_ATTN_NORM,
535 LLM_TENSOR_ATTN_Q,
536 LLM_TENSOR_ATTN_K,
537 LLM_TENSOR_ATTN_V,
538 LLM_TENSOR_ATTN_OUT,
539 LLM_TENSOR_ATTN_ROT_EMBD,
540 LLM_TENSOR_FFN_GATE_INP,
541 LLM_TENSOR_FFN_NORM,
542 LLM_TENSOR_FFN_GATE,
543 LLM_TENSOR_FFN_DOWN,
544 LLM_TENSOR_FFN_UP,
545 LLM_TENSOR_FFN_GATE_EXP,
546 LLM_TENSOR_FFN_DOWN_EXP,
547 LLM_TENSOR_FFN_UP_EXP,
548 LLM_TENSOR_FFN_GATE_EXPS,
549 LLM_TENSOR_FFN_DOWN_EXPS,
550 LLM_TENSOR_FFN_UP_EXPS,
551 };
552 case LLM_ARCH_ARCEE:
553 case LLM_ARCH_STARCODER2:
554 case LLM_ARCH_NEMOTRON:
555 return {
556 LLM_TENSOR_TOKEN_EMBD,
557 LLM_TENSOR_OUTPUT_NORM,
558 LLM_TENSOR_OUTPUT,
559 LLM_TENSOR_ROPE_FREQS,
560 LLM_TENSOR_ATTN_NORM,
561 LLM_TENSOR_ATTN_Q,
562 LLM_TENSOR_ATTN_K,
563 LLM_TENSOR_ATTN_V,
564 LLM_TENSOR_ATTN_OUT,
565 LLM_TENSOR_ATTN_ROT_EMBD,
566 LLM_TENSOR_FFN_NORM,
567 LLM_TENSOR_FFN_DOWN,
568 LLM_TENSOR_FFN_UP,
569 };
570 case LLM_ARCH_AFMOE:
571 return {
572 LLM_TENSOR_TOKEN_EMBD,
573 LLM_TENSOR_OUTPUT_NORM,
574 LLM_TENSOR_OUTPUT,
575 LLM_TENSOR_ATTN_NORM,
576 LLM_TENSOR_ATTN_POST_NORM,
577 LLM_TENSOR_ATTN_Q,
578 LLM_TENSOR_ATTN_K,
579 LLM_TENSOR_ATTN_V,
580 LLM_TENSOR_ATTN_OUT,
581 LLM_TENSOR_ATTN_Q_NORM,
582 LLM_TENSOR_ATTN_K_NORM,
583 LLM_TENSOR_ATTN_GATE,
584 LLM_TENSOR_FFN_NORM,
585 LLM_TENSOR_FFN_POST_NORM,
586 LLM_TENSOR_FFN_GATE_INP,
587 LLM_TENSOR_FFN_GATE,
588 LLM_TENSOR_FFN_DOWN,
589 LLM_TENSOR_FFN_UP,
590 LLM_TENSOR_FFN_GATE_EXPS,
591 LLM_TENSOR_FFN_DOWN_EXPS,
592 LLM_TENSOR_FFN_UP_EXPS,
593 LLM_TENSOR_FFN_GATE_SHEXP,
594 LLM_TENSOR_FFN_UP_SHEXP,
595 LLM_TENSOR_FFN_DOWN_SHEXP,
596 LLM_TENSOR_FFN_EXP_PROBS_B,
597 };
598 case LLM_ARCH_LLAMA4:
599 return {
600 LLM_TENSOR_TOKEN_EMBD,
601 LLM_TENSOR_OUTPUT_NORM,
602 LLM_TENSOR_OUTPUT,
603 LLM_TENSOR_ROPE_FREQS,
604 LLM_TENSOR_ATTN_NORM,
605 LLM_TENSOR_ATTN_Q,
606 LLM_TENSOR_ATTN_K,
607 LLM_TENSOR_ATTN_V,
608 LLM_TENSOR_ATTN_OUT,
609 LLM_TENSOR_ATTN_ROT_EMBD,
610 LLM_TENSOR_FFN_GATE_INP,
611 LLM_TENSOR_FFN_NORM,
612 LLM_TENSOR_FFN_GATE,
613 LLM_TENSOR_FFN_DOWN,
614 LLM_TENSOR_FFN_UP,
615 LLM_TENSOR_FFN_GATE_EXP,
616 LLM_TENSOR_FFN_DOWN_EXP,
617 LLM_TENSOR_FFN_UP_EXP,
618 LLM_TENSOR_FFN_GATE_EXPS,
619 LLM_TENSOR_FFN_DOWN_EXPS,
620 LLM_TENSOR_FFN_UP_EXPS,
621 LLM_TENSOR_FFN_GATE_SHEXP,
622 LLM_TENSOR_FFN_DOWN_SHEXP,
623 LLM_TENSOR_FFN_UP_SHEXP,
624 };
625 case LLM_ARCH_BAICHUAN:
626 case LLM_ARCH_ORION:
627 case LLM_ARCH_XVERSE:
628 case LLM_ARCH_EXAONE:
629 return {
630 LLM_TENSOR_TOKEN_EMBD,
631 LLM_TENSOR_OUTPUT_NORM,
632 LLM_TENSOR_OUTPUT,
633 LLM_TENSOR_ROPE_FREQS,
634 LLM_TENSOR_ATTN_NORM,
635 LLM_TENSOR_ATTN_Q,
636 LLM_TENSOR_ATTN_K,
637 LLM_TENSOR_ATTN_V,
638 LLM_TENSOR_ATTN_OUT,
639 LLM_TENSOR_ATTN_ROT_EMBD,
640 LLM_TENSOR_FFN_NORM,
641 LLM_TENSOR_FFN_GATE,
642 LLM_TENSOR_FFN_DOWN,
643 LLM_TENSOR_FFN_UP,
644 };
645 case LLM_ARCH_FALCON:
646 return {
647 LLM_TENSOR_TOKEN_EMBD,
648 LLM_TENSOR_OUTPUT_NORM,
649 LLM_TENSOR_OUTPUT,
650 LLM_TENSOR_ATTN_NORM,
651 LLM_TENSOR_ATTN_NORM_2,
652 LLM_TENSOR_ATTN_QKV,
653 LLM_TENSOR_ATTN_OUT,
654 LLM_TENSOR_FFN_DOWN,
655 LLM_TENSOR_FFN_UP,
656 };
657 case LLM_ARCH_GROK:
658 return {
659 LLM_TENSOR_TOKEN_EMBD,
660 LLM_TENSOR_OUTPUT_NORM,
661 LLM_TENSOR_OUTPUT,
662 LLM_TENSOR_ROPE_FREQS,
663 LLM_TENSOR_ATTN_NORM,
664 LLM_TENSOR_ATTN_Q,
665 LLM_TENSOR_ATTN_K,
666 LLM_TENSOR_ATTN_V,
667 LLM_TENSOR_ATTN_OUT,
668 LLM_TENSOR_ATTN_ROT_EMBD,
669 LLM_TENSOR_FFN_GATE_INP,
670 LLM_TENSOR_FFN_NORM,
671 LLM_TENSOR_FFN_GATE,
672 LLM_TENSOR_FFN_DOWN,
673 LLM_TENSOR_FFN_UP,
674 LLM_TENSOR_FFN_GATE_EXP,
675 LLM_TENSOR_FFN_DOWN_EXP,
676 LLM_TENSOR_FFN_UP_EXP,
677 LLM_TENSOR_FFN_GATE_EXPS,
678 LLM_TENSOR_FFN_DOWN_EXPS,
679 LLM_TENSOR_FFN_UP_EXPS,
680 LLM_TENSOR_FFN_POST_NORM,
681 LLM_TENSOR_LAYER_OUT_NORM,
682 LLM_TENSOR_ATTN_OUT_NORM,
683 };
684 case LLM_ARCH_GPT2:
685 case LLM_ARCH_STARCODER:
686 return {
687 LLM_TENSOR_TOKEN_EMBD,
688 LLM_TENSOR_POS_EMBD,
689 LLM_TENSOR_OUTPUT_NORM,
690 LLM_TENSOR_OUTPUT,
691 LLM_TENSOR_ATTN_NORM,
692 LLM_TENSOR_ATTN_QKV,
693 LLM_TENSOR_ATTN_OUT,
694 LLM_TENSOR_FFN_NORM,
695 LLM_TENSOR_FFN_UP,
696 LLM_TENSOR_FFN_DOWN,
697 };
698 case LLM_ARCH_GPTNEOX:
699 return {
700 LLM_TENSOR_TOKEN_EMBD,
701 LLM_TENSOR_OUTPUT_NORM,
702 LLM_TENSOR_OUTPUT,
703 LLM_TENSOR_ATTN_NORM,
704 LLM_TENSOR_ATTN_QKV,
705 LLM_TENSOR_ATTN_OUT,
706 LLM_TENSOR_FFN_NORM,
707 LLM_TENSOR_FFN_DOWN,
708 LLM_TENSOR_FFN_UP,
709 };
710 case LLM_ARCH_MPT:
711 return {
712 LLM_TENSOR_TOKEN_EMBD,
713 LLM_TENSOR_OUTPUT_NORM,
714 LLM_TENSOR_OUTPUT,
715 LLM_TENSOR_ATTN_NORM,
716 LLM_TENSOR_FFN_NORM,
717 LLM_TENSOR_ATTN_QKV,
718 LLM_TENSOR_ATTN_OUT,
719 LLM_TENSOR_FFN_DOWN,
720 LLM_TENSOR_FFN_UP,
721 LLM_TENSOR_FFN_ACT,
722 LLM_TENSOR_POS_EMBD,
723 LLM_TENSOR_ATTN_Q_NORM,
724 LLM_TENSOR_ATTN_K_NORM,
725 };
726 case LLM_ARCH_REFACT:
727 case LLM_ARCH_QWEN2:
728 case LLM_ARCH_QWEN2VL:
729 case LLM_ARCH_INTERNLM2:
730 case LLM_ARCH_GRANITE:
731 case LLM_ARCH_ERNIE4_5:
732 case LLM_ARCH_SMOLLM3:
733 case LLM_ARCH_DREAM:
734 case LLM_ARCH_LLADA:
735 case LLM_ARCH_PANGU_EMBED:
736 return {
737 LLM_TENSOR_TOKEN_EMBD,
738 LLM_TENSOR_OUTPUT_NORM,
739 LLM_TENSOR_OUTPUT,
740 LLM_TENSOR_ATTN_NORM,
741 LLM_TENSOR_ATTN_Q,
742 LLM_TENSOR_ATTN_K,
743 LLM_TENSOR_ATTN_V,
744 LLM_TENSOR_ATTN_OUT,
745 LLM_TENSOR_FFN_NORM,
746 LLM_TENSOR_FFN_GATE,
747 LLM_TENSOR_FFN_DOWN,
748 LLM_TENSOR_FFN_UP,
749 };
750 case LLM_ARCH_BERT:
751 return {
752 LLM_TENSOR_TOKEN_EMBD,
753 LLM_TENSOR_TOKEN_EMBD_NORM,
754 LLM_TENSOR_TOKEN_TYPES,
755 LLM_TENSOR_POS_EMBD,
756 LLM_TENSOR_ATTN_OUT_NORM,
757 LLM_TENSOR_ATTN_QKV,
758 LLM_TENSOR_ATTN_Q,
759 LLM_TENSOR_ATTN_K,
760 LLM_TENSOR_ATTN_V,
761 LLM_TENSOR_ATTN_OUT,
762 LLM_TENSOR_LAYER_OUT_NORM,
763 LLM_TENSOR_FFN_DOWN,
764 LLM_TENSOR_FFN_UP,
765 LLM_TENSOR_CLS,
766 LLM_TENSOR_CLS_OUT,
767 };
768 case LLM_ARCH_NOMIC_BERT:
769 return {
770 LLM_TENSOR_TOKEN_EMBD,
771 LLM_TENSOR_TOKEN_EMBD_NORM,
772 LLM_TENSOR_TOKEN_TYPES,
773 LLM_TENSOR_ATTN_OUT_NORM,
774 LLM_TENSOR_ATTN_QKV,
775 LLM_TENSOR_ATTN_OUT,
776 LLM_TENSOR_LAYER_OUT_NORM,
777 LLM_TENSOR_FFN_GATE,
778 LLM_TENSOR_FFN_DOWN,
779 LLM_TENSOR_FFN_UP,
780 };
781 case LLM_ARCH_NOMIC_BERT_MOE:
782 return {
783 LLM_TENSOR_TOKEN_EMBD,
784 LLM_TENSOR_TOKEN_EMBD_NORM,
785 LLM_TENSOR_TOKEN_TYPES,
786 LLM_TENSOR_ATTN_OUT_NORM,
787 LLM_TENSOR_ATTN_QKV,
788 LLM_TENSOR_ATTN_OUT,
789 LLM_TENSOR_LAYER_OUT_NORM,
790 LLM_TENSOR_FFN_GATE,
791 LLM_TENSOR_FFN_DOWN,
792 LLM_TENSOR_FFN_UP,
793 LLM_TENSOR_FFN_GATE_INP,
794 LLM_TENSOR_FFN_DOWN_EXPS,
795 LLM_TENSOR_FFN_UP_EXPS,
796 };
797 case LLM_ARCH_NEO_BERT:
798 return {
799 LLM_TENSOR_TOKEN_EMBD,
800 LLM_TENSOR_ATTN_NORM,
801 LLM_TENSOR_ATTN_QKV,
802 LLM_TENSOR_ATTN_OUT,
803 LLM_TENSOR_FFN_NORM,
804 LLM_TENSOR_FFN_DOWN,
805 LLM_TENSOR_FFN_UP,
806 LLM_TENSOR_ENC_OUTPUT_NORM,
807 LLM_TENSOR_CLS,
808 LLM_TENSOR_CLS_OUT,
809 };
810 case LLM_ARCH_MODERN_BERT:
811 return {
812 LLM_TENSOR_TOKEN_EMBD,
813 LLM_TENSOR_TOKEN_EMBD_NORM,
814 LLM_TENSOR_OUTPUT_NORM,
815 LLM_TENSOR_ATTN_NORM,
816 LLM_TENSOR_ATTN_OUT,
817 LLM_TENSOR_ATTN_QKV,
818 LLM_TENSOR_FFN_DOWN,
819 LLM_TENSOR_FFN_UP,
820 LLM_TENSOR_FFN_NORM,
821 LLM_TENSOR_CLS,
822 LLM_TENSOR_CLS_OUT,
823 };
824 case LLM_ARCH_JINA_BERT_V2:
825 return {
826 LLM_TENSOR_TOKEN_EMBD,
827 LLM_TENSOR_TOKEN_EMBD_NORM,
828 LLM_TENSOR_TOKEN_TYPES,
829 LLM_TENSOR_ATTN_NORM_2,
830 LLM_TENSOR_ATTN_OUT_NORM,
831 LLM_TENSOR_ATTN_Q,
832 LLM_TENSOR_ATTN_Q_NORM,
833 LLM_TENSOR_ATTN_K,
834 LLM_TENSOR_ATTN_K_NORM,
835 LLM_TENSOR_ATTN_V,
836 LLM_TENSOR_ATTN_OUT,
837 LLM_TENSOR_LAYER_OUT_NORM,
838 LLM_TENSOR_FFN_DOWN,
839 LLM_TENSOR_FFN_GATE,
840 LLM_TENSOR_FFN_UP,
841 LLM_TENSOR_CLS,
842 };
843 case LLM_ARCH_JINA_BERT_V3:
844 return {
845 LLM_TENSOR_TOKEN_EMBD,
846 LLM_TENSOR_TOKEN_EMBD_NORM,
847 LLM_TENSOR_TOKEN_TYPES,
848 LLM_TENSOR_ATTN_OUT_NORM,
849 LLM_TENSOR_ATTN_QKV,
850 LLM_TENSOR_ATTN_OUT,
851 LLM_TENSOR_FFN_DOWN,
852 LLM_TENSOR_FFN_UP,
853 LLM_TENSOR_LAYER_OUT_NORM,
854 };
855 case LLM_ARCH_BLOOM:
856 return {
857 LLM_TENSOR_TOKEN_EMBD,
858 LLM_TENSOR_TOKEN_EMBD_NORM,
859 LLM_TENSOR_OUTPUT_NORM,
860 LLM_TENSOR_OUTPUT,
861 LLM_TENSOR_ATTN_NORM,
862 LLM_TENSOR_ATTN_QKV,
863 LLM_TENSOR_ATTN_OUT,
864 LLM_TENSOR_FFN_NORM,
865 LLM_TENSOR_FFN_UP,
866 LLM_TENSOR_FFN_DOWN,
867 };
868 case LLM_ARCH_STABLELM:
869 return {
870 LLM_TENSOR_TOKEN_EMBD,
871 LLM_TENSOR_OUTPUT_NORM,
872 LLM_TENSOR_OUTPUT,
873 LLM_TENSOR_ROPE_FREQS,
874 LLM_TENSOR_ATTN_NORM,
875 LLM_TENSOR_ATTN_Q,
876 LLM_TENSOR_ATTN_K,
877 LLM_TENSOR_ATTN_V,
878 LLM_TENSOR_ATTN_OUT,
879 LLM_TENSOR_FFN_NORM,
880 LLM_TENSOR_FFN_GATE,
881 LLM_TENSOR_FFN_DOWN,
882 LLM_TENSOR_FFN_UP,
883 LLM_TENSOR_ATTN_Q_NORM,
884 LLM_TENSOR_ATTN_K_NORM,
885 };
886 case LLM_ARCH_QWEN:
887 return {
888 LLM_TENSOR_TOKEN_EMBD,
889 LLM_TENSOR_OUTPUT_NORM,
890 LLM_TENSOR_OUTPUT,
891 LLM_TENSOR_ROPE_FREQS,
892 LLM_TENSOR_ATTN_NORM,
893 LLM_TENSOR_ATTN_QKV,
894 LLM_TENSOR_ATTN_OUT,
895 LLM_TENSOR_FFN_NORM,
896 LLM_TENSOR_FFN_GATE,
897 LLM_TENSOR_FFN_DOWN,
898 LLM_TENSOR_FFN_UP,
899 };
900 case LLM_ARCH_QWEN2MOE:
901 return {
902 LLM_TENSOR_TOKEN_EMBD,
903 LLM_TENSOR_OUTPUT_NORM,
904 LLM_TENSOR_OUTPUT,
905 LLM_TENSOR_ATTN_NORM,
906 LLM_TENSOR_ATTN_Q,
907 LLM_TENSOR_ATTN_K,
908 LLM_TENSOR_ATTN_V,
909 LLM_TENSOR_ATTN_OUT,
910 LLM_TENSOR_FFN_NORM,
911 LLM_TENSOR_FFN_GATE_INP,
912 LLM_TENSOR_FFN_GATE_EXPS,
913 LLM_TENSOR_FFN_DOWN_EXPS,
914 LLM_TENSOR_FFN_UP_EXPS,
915 LLM_TENSOR_FFN_GATE_INP_SHEXP,
916 LLM_TENSOR_FFN_GATE_SHEXP,
917 LLM_TENSOR_FFN_DOWN_SHEXP,
918 LLM_TENSOR_FFN_UP_SHEXP,
919 };
920 case LLM_ARCH_QWEN3:
921 return {
922 LLM_TENSOR_TOKEN_EMBD,
923 LLM_TENSOR_OUTPUT_NORM,
924 LLM_TENSOR_OUTPUT,
925 LLM_TENSOR_CLS_OUT,
926 LLM_TENSOR_ATTN_NORM,
927 LLM_TENSOR_ATTN_Q,
928 LLM_TENSOR_ATTN_Q_NORM,
929 LLM_TENSOR_ATTN_K,
930 LLM_TENSOR_ATTN_K_NORM,
931 LLM_TENSOR_ATTN_V,
932 LLM_TENSOR_ATTN_OUT,
933 LLM_TENSOR_FFN_NORM,
934 LLM_TENSOR_FFN_GATE,
935 LLM_TENSOR_FFN_DOWN,
936 LLM_TENSOR_FFN_UP,
937 };
938 case LLM_ARCH_QWEN3MOE:
939 case LLM_ARCH_QWEN3VLMOE:
940 case LLM_ARCH_OLMOE:
941 case LLM_ARCH_LLADA_MOE:
942 case LLM_ARCH_RND1:
943 return {
944 LLM_TENSOR_TOKEN_EMBD,
945 LLM_TENSOR_OUTPUT_NORM,
946 LLM_TENSOR_OUTPUT,
947 LLM_TENSOR_ATTN_NORM,
948 LLM_TENSOR_ATTN_Q,
949 LLM_TENSOR_ATTN_Q_NORM,
950 LLM_TENSOR_ATTN_K,
951 LLM_TENSOR_ATTN_K_NORM,
952 LLM_TENSOR_ATTN_V,
953 LLM_TENSOR_ATTN_OUT,
954 LLM_TENSOR_FFN_NORM,
955 LLM_TENSOR_FFN_GATE_INP,
956 LLM_TENSOR_FFN_GATE_EXPS,
957 LLM_TENSOR_FFN_DOWN_EXPS,
958 LLM_TENSOR_FFN_UP_EXPS,
959 };
960 case LLM_ARCH_QWEN3NEXT:
961 return {
962 LLM_TENSOR_TOKEN_EMBD,
963 LLM_TENSOR_OUTPUT_NORM,
964 LLM_TENSOR_OUTPUT,
965 LLM_TENSOR_ATTN_NORM,
966 LLM_TENSOR_ATTN_POST_NORM,
967 LLM_TENSOR_ATTN_Q,
968 LLM_TENSOR_ATTN_Q_NORM,
969 LLM_TENSOR_ATTN_K,
970 LLM_TENSOR_ATTN_K_NORM,
971 LLM_TENSOR_ATTN_V,
972 LLM_TENSOR_ATTN_OUT,
973 LLM_TENSOR_ATTN_QKV,
974 LLM_TENSOR_ATTN_GATE,
975 LLM_TENSOR_FFN_GATE_INP,
976 LLM_TENSOR_FFN_GATE_EXPS,
977 LLM_TENSOR_FFN_DOWN_EXPS,
978 LLM_TENSOR_FFN_UP_EXPS,
979 LLM_TENSOR_FFN_GATE_INP_SHEXP,
980 LLM_TENSOR_FFN_GATE_SHEXP,
981 LLM_TENSOR_FFN_DOWN_SHEXP,
982 LLM_TENSOR_FFN_UP_SHEXP,
983 LLM_TENSOR_SSM_A_NOSCAN,
984 LLM_TENSOR_SSM_CONV1D,
985 LLM_TENSOR_SSM_DT,
986 LLM_TENSOR_SSM_BETA_ALPHA,
987 LLM_TENSOR_SSM_IN,
988 LLM_TENSOR_SSM_NORM,
989 LLM_TENSOR_SSM_OUT,
990 };
991 case LLM_ARCH_QWEN35:
992 return {
993 LLM_TENSOR_TOKEN_EMBD,
994 LLM_TENSOR_OUTPUT_NORM,
995 LLM_TENSOR_OUTPUT,
996 LLM_TENSOR_ATTN_NORM,
997 LLM_TENSOR_ATTN_POST_NORM,
998 LLM_TENSOR_ATTN_Q,
999 LLM_TENSOR_ATTN_Q_NORM,
1000 LLM_TENSOR_ATTN_K,
1001 LLM_TENSOR_ATTN_K_NORM,
1002 LLM_TENSOR_ATTN_V,
1003 LLM_TENSOR_ATTN_OUT,
1004 LLM_TENSOR_ATTN_QKV,
1005 LLM_TENSOR_ATTN_GATE,
1006 LLM_TENSOR_FFN_GATE,
1007 LLM_TENSOR_FFN_DOWN,
1008 LLM_TENSOR_FFN_UP,
1009 LLM_TENSOR_SSM_A_NOSCAN,
1010 LLM_TENSOR_SSM_CONV1D,
1011 LLM_TENSOR_SSM_DT,
1012 LLM_TENSOR_SSM_BETA,
1013 LLM_TENSOR_SSM_ALPHA,
1014 LLM_TENSOR_SSM_NORM,
1015 LLM_TENSOR_SSM_OUT,
1016 };
1017 case LLM_ARCH_QWEN35MOE:
1018 return {
1019 LLM_TENSOR_TOKEN_EMBD,
1020 LLM_TENSOR_OUTPUT_NORM,
1021 LLM_TENSOR_OUTPUT,
1022 LLM_TENSOR_ATTN_NORM,
1023 LLM_TENSOR_ATTN_POST_NORM,
1024 LLM_TENSOR_ATTN_Q,
1025 LLM_TENSOR_ATTN_Q_NORM,
1026 LLM_TENSOR_ATTN_K,
1027 LLM_TENSOR_ATTN_K_NORM,
1028 LLM_TENSOR_ATTN_V,
1029 LLM_TENSOR_ATTN_OUT,
1030 LLM_TENSOR_ATTN_QKV,
1031 LLM_TENSOR_ATTN_GATE,
1032 LLM_TENSOR_FFN_GATE_INP,
1033 LLM_TENSOR_FFN_GATE_EXPS,
1034 LLM_TENSOR_FFN_DOWN_EXPS,
1035 LLM_TENSOR_FFN_UP_EXPS,
1036 LLM_TENSOR_FFN_GATE_INP_SHEXP,
1037 LLM_TENSOR_FFN_GATE_SHEXP,
1038 LLM_TENSOR_FFN_DOWN_SHEXP,
1039 LLM_TENSOR_FFN_UP_SHEXP,
1040 LLM_TENSOR_SSM_A_NOSCAN,
1041 LLM_TENSOR_SSM_CONV1D,
1042 LLM_TENSOR_SSM_DT,
1043 LLM_TENSOR_SSM_BETA,
1044 LLM_TENSOR_SSM_ALPHA,
1045 LLM_TENSOR_SSM_NORM,
1046 LLM_TENSOR_SSM_OUT,
1047 };
1048 case LLM_ARCH_QWEN3VL:
1049 case LLM_ARCH_CHAMELEON:
1050 case LLM_ARCH_HUNYUAN_DENSE:
1051 return {
1052 LLM_TENSOR_TOKEN_EMBD,
1053 LLM_TENSOR_OUTPUT_NORM,
1054 LLM_TENSOR_OUTPUT,
1055 LLM_TENSOR_ATTN_NORM,
1056 LLM_TENSOR_ATTN_Q,
1057 LLM_TENSOR_ATTN_Q_NORM,
1058 LLM_TENSOR_ATTN_K,
1059 LLM_TENSOR_ATTN_K_NORM,
1060 LLM_TENSOR_ATTN_V,
1061 LLM_TENSOR_ATTN_OUT,
1062 LLM_TENSOR_FFN_NORM,
1063 LLM_TENSOR_FFN_GATE,
1064 LLM_TENSOR_FFN_DOWN,
1065 LLM_TENSOR_FFN_UP,
1066 };
1067 case LLM_ARCH_PHI2:
1068 return {
1069 LLM_TENSOR_TOKEN_EMBD,
1070 LLM_TENSOR_OUTPUT_NORM,
1071 LLM_TENSOR_OUTPUT,
1072 LLM_TENSOR_ATTN_NORM,
1073 LLM_TENSOR_ATTN_QKV,
1074 LLM_TENSOR_ATTN_Q,
1075 LLM_TENSOR_ATTN_K,
1076 LLM_TENSOR_ATTN_V,
1077 LLM_TENSOR_ATTN_OUT,
1078 LLM_TENSOR_FFN_DOWN,
1079 LLM_TENSOR_FFN_UP,
1080 };
1081 case LLM_ARCH_PHI3:
1082 return {
1083 LLM_TENSOR_TOKEN_EMBD,
1084 LLM_TENSOR_OUTPUT_NORM,
1085 LLM_TENSOR_OUTPUT,
1086 LLM_TENSOR_ROPE_FACTORS_LONG,
1087 LLM_TENSOR_ROPE_FACTORS_SHORT,
1088 LLM_TENSOR_ATTN_NORM,
1089 LLM_TENSOR_ATTN_QKV,
1090 LLM_TENSOR_ATTN_Q,
1091 LLM_TENSOR_ATTN_K,
1092 LLM_TENSOR_ATTN_V,
1093 LLM_TENSOR_ATTN_OUT,
1094 LLM_TENSOR_FFN_NORM,
1095 LLM_TENSOR_FFN_DOWN,
1096 LLM_TENSOR_FFN_UP,
1097 };
1098 case LLM_ARCH_PHIMOE:
1099 return {
1100 LLM_TENSOR_TOKEN_EMBD,
1101 LLM_TENSOR_OUTPUT_NORM,
1102 LLM_TENSOR_OUTPUT,
1103 LLM_TENSOR_ROPE_FACTORS_LONG,
1104 LLM_TENSOR_ROPE_FACTORS_SHORT,
1105 LLM_TENSOR_ATTN_NORM,
1106 LLM_TENSOR_ATTN_QKV,
1107 LLM_TENSOR_ATTN_Q,
1108 LLM_TENSOR_ATTN_K,
1109 LLM_TENSOR_ATTN_V,
1110 LLM_TENSOR_ATTN_OUT,
1111 LLM_TENSOR_FFN_NORM,
1112 LLM_TENSOR_FFN_GATE_INP,
1113 LLM_TENSOR_FFN_GATE_EXPS,
1114 LLM_TENSOR_FFN_DOWN_EXPS,
1115 LLM_TENSOR_FFN_UP_EXPS,
1116 };
1117 case LLM_ARCH_PLAMO:
1118 return {
1119 LLM_TENSOR_TOKEN_EMBD,
1120 LLM_TENSOR_OUTPUT_NORM,
1121 LLM_TENSOR_OUTPUT,
1122 LLM_TENSOR_ROPE_FREQS,
1123 LLM_TENSOR_ATTN_NORM,
1124 LLM_TENSOR_ATTN_Q,
1125 LLM_TENSOR_ATTN_K,
1126 LLM_TENSOR_ATTN_V,
1127 LLM_TENSOR_ATTN_OUT,
1128 LLM_TENSOR_ATTN_ROT_EMBD,
1129 LLM_TENSOR_FFN_GATE,
1130 LLM_TENSOR_FFN_DOWN,
1131 LLM_TENSOR_FFN_UP,
1132 };
1133 case LLM_ARCH_PLAMO2:
1134 return {
1135 LLM_TENSOR_TOKEN_EMBD,
1136 LLM_TENSOR_OUTPUT_NORM,
1137 LLM_TENSOR_OUTPUT,
1138 LLM_TENSOR_ROPE_FREQS,
1139 LLM_TENSOR_ATTN_NORM,
1140 LLM_TENSOR_ATTN_QKV,
1141 LLM_TENSOR_ATTN_Q_NORM,
1142 LLM_TENSOR_ATTN_K_NORM,
1143 LLM_TENSOR_ATTN_OUT,
1144 LLM_TENSOR_ATTN_ROT_EMBD,
1145 LLM_TENSOR_FFN_NORM,
1146 LLM_TENSOR_FFN_DOWN,
1147 LLM_TENSOR_FFN_UP,
1148 LLM_TENSOR_SSM_IN,
1149 LLM_TENSOR_SSM_CONV1D,
1150 LLM_TENSOR_SSM_X,
1151 LLM_TENSOR_SSM_DT,
1152 LLM_TENSOR_SSM_A,
1153 LLM_TENSOR_SSM_D,
1154 LLM_TENSOR_SSM_OUT,
1155 LLM_TENSOR_SSM_DT_NORM,
1156 LLM_TENSOR_SSM_B_NORM,
1157 LLM_TENSOR_SSM_C_NORM,
1158 LLM_TENSOR_ATTN_POST_NORM,
1159 LLM_TENSOR_FFN_POST_NORM,
1160 };
1161 case LLM_ARCH_PLAMO3:
1162 return {
1163 LLM_TENSOR_TOKEN_EMBD,
1164 LLM_TENSOR_OUTPUT_NORM,
1165 LLM_TENSOR_OUTPUT,
1166 LLM_TENSOR_ATTN_NORM,
1167 LLM_TENSOR_ATTN_QKV,
1168 LLM_TENSOR_ATTN_Q_NORM,
1169 LLM_TENSOR_ATTN_K_NORM,
1170 LLM_TENSOR_ATTN_OUT,
1171 LLM_TENSOR_ATTN_POST_NORM,
1172 LLM_TENSOR_FFN_NORM,
1173 LLM_TENSOR_FFN_POST_NORM,
1174 LLM_TENSOR_FFN_DOWN,
1175 LLM_TENSOR_FFN_UP,
1176 };
1177 case LLM_ARCH_CODESHELL:
1178 return {
1179 LLM_TENSOR_TOKEN_EMBD,
1180 LLM_TENSOR_OUTPUT_NORM,
1181 LLM_TENSOR_OUTPUT,
1182 LLM_TENSOR_ROPE_FREQS,
1183 LLM_TENSOR_ATTN_NORM,
1184 LLM_TENSOR_ATTN_Q,
1185 LLM_TENSOR_ATTN_K,
1186 LLM_TENSOR_ATTN_V,
1187 LLM_TENSOR_ATTN_QKV,
1188 LLM_TENSOR_ATTN_OUT,
1189 LLM_TENSOR_ATTN_ROT_EMBD,
1190 LLM_TENSOR_FFN_NORM,
1191 LLM_TENSOR_FFN_GATE,
1192 LLM_TENSOR_FFN_DOWN,
1193 LLM_TENSOR_FFN_UP,
1194 };
1195 case LLM_ARCH_MINICPM:
1196 return {
1197 LLM_TENSOR_TOKEN_EMBD,
1198 LLM_TENSOR_OUTPUT_NORM,
1199 LLM_TENSOR_OUTPUT,
1200 LLM_TENSOR_ROPE_FREQS,
1201 LLM_TENSOR_ROPE_FACTORS_LONG,
1202 LLM_TENSOR_ROPE_FACTORS_SHORT,
1203 LLM_TENSOR_ATTN_NORM,
1204 LLM_TENSOR_ATTN_Q,
1205 LLM_TENSOR_ATTN_K,
1206 LLM_TENSOR_ATTN_V,
1207 LLM_TENSOR_ATTN_OUT,
1208 LLM_TENSOR_ATTN_ROT_EMBD,
1209 LLM_TENSOR_FFN_GATE_INP,
1210 LLM_TENSOR_FFN_NORM,
1211 LLM_TENSOR_FFN_GATE,
1212 LLM_TENSOR_FFN_DOWN,
1213 LLM_TENSOR_FFN_UP,
1214 LLM_TENSOR_FFN_GATE_EXP,
1215 LLM_TENSOR_FFN_DOWN_EXP,
1216 LLM_TENSOR_FFN_UP_EXP,
1217 };
1218 case LLM_ARCH_MINICPM3:
1219 return {
1220 LLM_TENSOR_TOKEN_EMBD,
1221 LLM_TENSOR_OUTPUT_NORM,
1222 LLM_TENSOR_OUTPUT,
1223 LLM_TENSOR_ROPE_FACTORS_LONG,
1224 LLM_TENSOR_ROPE_FACTORS_SHORT,
1225 LLM_TENSOR_ATTN_NORM,
1226 LLM_TENSOR_ATTN_Q_A_NORM,
1227 LLM_TENSOR_ATTN_KV_A_NORM,
1228 LLM_TENSOR_ATTN_Q,
1229 LLM_TENSOR_ATTN_Q_A,
1230 LLM_TENSOR_ATTN_Q_B,
1231 LLM_TENSOR_ATTN_KV_A_MQA,
1232 LLM_TENSOR_ATTN_KV_B,
1233 LLM_TENSOR_ATTN_OUT,
1234 LLM_TENSOR_FFN_NORM,
1235 LLM_TENSOR_FFN_GATE,
1236 LLM_TENSOR_FFN_UP,
1237 LLM_TENSOR_FFN_DOWN,
1238 };
1239 case LLM_ARCH_GEMMA:
1240 return {
1241 LLM_TENSOR_TOKEN_EMBD,
1242 LLM_TENSOR_OUTPUT_NORM,
1243 LLM_TENSOR_ATTN_NORM,
1244 LLM_TENSOR_ATTN_Q,
1245 LLM_TENSOR_ATTN_K,
1246 LLM_TENSOR_ATTN_V,
1247 LLM_TENSOR_ATTN_OUT,
1248 LLM_TENSOR_FFN_NORM,
1249 LLM_TENSOR_FFN_GATE,
1250 LLM_TENSOR_FFN_DOWN,
1251 LLM_TENSOR_FFN_UP,
1252 };
1253 case LLM_ARCH_GEMMA2:
1254 return {
1255 LLM_TENSOR_TOKEN_EMBD,
1256 LLM_TENSOR_OUTPUT_NORM,
1257 LLM_TENSOR_ATTN_NORM,
1258 LLM_TENSOR_ATTN_Q,
1259 LLM_TENSOR_ATTN_K,
1260 LLM_TENSOR_ATTN_V,
1261 LLM_TENSOR_ATTN_OUT,
1262 LLM_TENSOR_ATTN_POST_NORM,
1263 LLM_TENSOR_FFN_NORM,
1264 LLM_TENSOR_FFN_GATE,
1265 LLM_TENSOR_FFN_DOWN,
1266 LLM_TENSOR_FFN_UP,
1267 LLM_TENSOR_FFN_POST_NORM,
1268 };
1269 case LLM_ARCH_GEMMA3:
1270 return {
1271 LLM_TENSOR_TOKEN_EMBD,
1272 LLM_TENSOR_OUTPUT_NORM,
1273 LLM_TENSOR_OUTPUT,
1274 LLM_TENSOR_ATTN_NORM,
1275 LLM_TENSOR_ATTN_Q,
1276 LLM_TENSOR_ATTN_Q_NORM,
1277 LLM_TENSOR_ATTN_K,
1278 LLM_TENSOR_ATTN_K_NORM,
1279 LLM_TENSOR_ATTN_V,
1280 LLM_TENSOR_ATTN_OUT,
1281 LLM_TENSOR_ATTN_POST_NORM,
1282 LLM_TENSOR_FFN_NORM,
1283 LLM_TENSOR_FFN_GATE,
1284 LLM_TENSOR_FFN_DOWN,
1285 LLM_TENSOR_FFN_UP,
1286 LLM_TENSOR_FFN_POST_NORM,
1287 };
1288 case LLM_ARCH_GEMMA3N:
1289 return {
1290 LLM_TENSOR_TOKEN_EMBD,
1291 LLM_TENSOR_OUTPUT_NORM,
1292 LLM_TENSOR_ATTN_NORM,
1293 LLM_TENSOR_ATTN_Q,
1294 LLM_TENSOR_ATTN_Q_NORM,
1295 LLM_TENSOR_ATTN_K,
1296 LLM_TENSOR_ATTN_K_NORM,
1297 LLM_TENSOR_ATTN_V,
1298 LLM_TENSOR_ATTN_OUT,
1299 LLM_TENSOR_ATTN_POST_NORM,
1300 LLM_TENSOR_FFN_NORM,
1301 LLM_TENSOR_FFN_GATE,
1302 LLM_TENSOR_FFN_DOWN,
1303 LLM_TENSOR_FFN_UP,
1304 LLM_TENSOR_FFN_POST_NORM,
1305 LLM_TENSOR_PER_LAYER_TOKEN_EMBD,
1306 LLM_TENSOR_PER_LAYER_MODEL_PROJ,
1307 LLM_TENSOR_PER_LAYER_PROJ_NORM,
1308 LLM_TENSOR_ALTUP_UNEMBD_PROJ,
1309 LLM_TENSOR_ALTUP_PROJ,
1310 LLM_TENSOR_PER_LAYER_INP_GATE,
1311 LLM_TENSOR_PER_LAYER_PROJ,
1312 LLM_TENSOR_PER_LAYER_POST_NORM,
1313 LLM_TENSOR_ALTUP_CORRECT_COEF,
1314 LLM_TENSOR_ALTUP_CORRECT_SCALE,
1315 LLM_TENSOR_ALTUP_PREDICT_COEF,
1316 LLM_TENSOR_ALTUP_ROUTER,
1317 LLM_TENSOR_ALTUP_ROUTER_NORM,
1318 LLM_TENSOR_LAUREL_L,
1319 LLM_TENSOR_LAUREL_R,
1320 LLM_TENSOR_LAUREL_POST_NORM,
1321 };
1322 case LLM_ARCH_GEMMA_EMBEDDING:
1323 return {
1324 LLM_TENSOR_TOKEN_EMBD,
1325 LLM_TENSOR_OUTPUT_NORM,
1326 LLM_TENSOR_OUTPUT,
1327 LLM_TENSOR_DENSE_2_OUT,
1328 LLM_TENSOR_DENSE_3_OUT,
1329 LLM_TENSOR_ATTN_NORM,
1330 LLM_TENSOR_ATTN_Q,
1331 LLM_TENSOR_ATTN_Q_NORM,
1332 LLM_TENSOR_ATTN_K,
1333 LLM_TENSOR_ATTN_K_NORM,
1334 LLM_TENSOR_ATTN_V,
1335 LLM_TENSOR_ATTN_OUT,
1336 LLM_TENSOR_ATTN_POST_NORM,
1337 LLM_TENSOR_FFN_NORM,
1338 LLM_TENSOR_FFN_GATE,
1339 LLM_TENSOR_FFN_DOWN,
1340 LLM_TENSOR_FFN_UP,
1341 LLM_TENSOR_FFN_POST_NORM,
1342 };
1343 case LLM_ARCH_MAMBA:
1344 return {
1345 LLM_TENSOR_TOKEN_EMBD,
1346 LLM_TENSOR_OUTPUT_NORM,
1347 LLM_TENSOR_OUTPUT,
1348 LLM_TENSOR_ATTN_NORM,
1349 LLM_TENSOR_SSM_IN,
1350 LLM_TENSOR_SSM_CONV1D,
1351 LLM_TENSOR_SSM_X,
1352 LLM_TENSOR_SSM_DT,
1353 LLM_TENSOR_SSM_A,
1354 LLM_TENSOR_SSM_D,
1355 LLM_TENSOR_SSM_OUT,
1356 };
1357 case LLM_ARCH_MAMBA2:
1358 return {
1359 LLM_TENSOR_TOKEN_EMBD,
1360 LLM_TENSOR_OUTPUT_NORM,
1361 LLM_TENSOR_OUTPUT,
1362 LLM_TENSOR_ATTN_NORM,
1363 LLM_TENSOR_SSM_IN,
1364 LLM_TENSOR_SSM_CONV1D,
1365 LLM_TENSOR_SSM_DT,
1366 LLM_TENSOR_SSM_A,
1367 LLM_TENSOR_SSM_D,
1368 LLM_TENSOR_SSM_NORM,
1369 LLM_TENSOR_SSM_OUT,
1370 };
1371 case LLM_ARCH_JAMBA:
1372 return {
1373 LLM_TENSOR_TOKEN_EMBD,
1374 LLM_TENSOR_OUTPUT_NORM,
1375 LLM_TENSOR_OUTPUT,
1376 LLM_TENSOR_ATTN_NORM,
1377 LLM_TENSOR_SSM_IN,
1378 LLM_TENSOR_SSM_CONV1D,
1379 LLM_TENSOR_SSM_X,
1380 LLM_TENSOR_SSM_DT,
1381 LLM_TENSOR_SSM_DT_NORM,
1382 LLM_TENSOR_SSM_A,
1383 LLM_TENSOR_SSM_B_NORM,
1384 LLM_TENSOR_SSM_C_NORM,
1385 LLM_TENSOR_SSM_D,
1386 LLM_TENSOR_SSM_OUT,
1387 LLM_TENSOR_ATTN_Q,
1388 LLM_TENSOR_ATTN_K,
1389 LLM_TENSOR_ATTN_V,
1390 LLM_TENSOR_ATTN_OUT,
1391 LLM_TENSOR_FFN_GATE_INP,
1392 LLM_TENSOR_FFN_NORM,
1393 LLM_TENSOR_FFN_GATE,
1394 LLM_TENSOR_FFN_DOWN,
1395 LLM_TENSOR_FFN_UP,
1396 LLM_TENSOR_FFN_GATE_EXPS,
1397 LLM_TENSOR_FFN_DOWN_EXPS,
1398 LLM_TENSOR_FFN_UP_EXPS,
1399 };
1400 case LLM_ARCH_FALCON_H1:
1401 return {
1402 LLM_TENSOR_TOKEN_EMBD,
1403 LLM_TENSOR_OUTPUT,
1404 LLM_TENSOR_OUTPUT_NORM,
1405 LLM_TENSOR_ATTN_NORM,
1406 LLM_TENSOR_ATTN_Q,
1407 LLM_TENSOR_ATTN_K,
1408 LLM_TENSOR_ATTN_V,
1409 LLM_TENSOR_ATTN_OUT,
1410 LLM_TENSOR_SSM_IN,
1411 LLM_TENSOR_SSM_CONV1D,
1412 LLM_TENSOR_SSM_DT,
1413 LLM_TENSOR_SSM_A,
1414 LLM_TENSOR_SSM_D,
1415 LLM_TENSOR_SSM_NORM,
1416 LLM_TENSOR_SSM_OUT,
1417 LLM_TENSOR_FFN_NORM,
1418 LLM_TENSOR_FFN_GATE,
1419 LLM_TENSOR_FFN_DOWN,
1420 LLM_TENSOR_FFN_UP,
1421 };
1422 case LLM_ARCH_COMMAND_R:
1423 return {
1424 LLM_TENSOR_TOKEN_EMBD,
1425 LLM_TENSOR_OUTPUT_NORM,
1426 LLM_TENSOR_ATTN_NORM,
1427 LLM_TENSOR_ATTN_Q,
1428 LLM_TENSOR_ATTN_K,
1429 LLM_TENSOR_ATTN_V,
1430 LLM_TENSOR_ATTN_OUT,
1431 LLM_TENSOR_FFN_GATE,
1432 LLM_TENSOR_FFN_DOWN,
1433 LLM_TENSOR_FFN_UP,
1434 LLM_TENSOR_ATTN_Q_NORM,
1435 LLM_TENSOR_ATTN_K_NORM,
1436 };
1437 case LLM_ARCH_COHERE2:
1438 return {
1439 LLM_TENSOR_TOKEN_EMBD,
1440 LLM_TENSOR_OUTPUT_NORM,
1441 LLM_TENSOR_ATTN_NORM,
1442 LLM_TENSOR_ATTN_Q,
1443 LLM_TENSOR_ATTN_K,
1444 LLM_TENSOR_ATTN_V,
1445 LLM_TENSOR_ATTN_OUT,
1446 LLM_TENSOR_FFN_GATE,
1447 LLM_TENSOR_FFN_DOWN,
1448 LLM_TENSOR_FFN_UP,
1449 };
1450 case LLM_ARCH_DBRX:
1451 return {
1452 LLM_TENSOR_TOKEN_EMBD,
1453 LLM_TENSOR_OUTPUT_NORM,
1454 LLM_TENSOR_OUTPUT,
1455 LLM_TENSOR_ATTN_QKV,
1456 LLM_TENSOR_ATTN_NORM,
1457 LLM_TENSOR_ATTN_OUT,
1458 LLM_TENSOR_ATTN_OUT_NORM,
1459 LLM_TENSOR_FFN_GATE_INP,
1460 LLM_TENSOR_FFN_GATE_EXPS,
1461 LLM_TENSOR_FFN_DOWN_EXPS,
1462 LLM_TENSOR_FFN_UP_EXPS,
1463 };
1464 case LLM_ARCH_OLMO:
1465 return {
1466 LLM_TENSOR_TOKEN_EMBD,
1467 LLM_TENSOR_OUTPUT,
1468 LLM_TENSOR_ATTN_Q,
1469 LLM_TENSOR_ATTN_K,
1470 LLM_TENSOR_ATTN_V,
1471 LLM_TENSOR_ATTN_OUT,
1472 LLM_TENSOR_FFN_GATE,
1473 LLM_TENSOR_FFN_DOWN,
1474 LLM_TENSOR_FFN_UP,
1475 };
1476 case LLM_ARCH_OLMO2:
1477 return {
1478 LLM_TENSOR_TOKEN_EMBD,
1479 LLM_TENSOR_OUTPUT_NORM,
1480 LLM_TENSOR_OUTPUT,
1481 LLM_TENSOR_ATTN_Q,
1482 LLM_TENSOR_ATTN_K,
1483 LLM_TENSOR_ATTN_V,
1484 LLM_TENSOR_ATTN_OUT,
1485 LLM_TENSOR_ATTN_POST_NORM,
1486 LLM_TENSOR_ATTN_Q_NORM,
1487 LLM_TENSOR_ATTN_K_NORM,
1488 LLM_TENSOR_FFN_POST_NORM,
1489 LLM_TENSOR_FFN_GATE,
1490 LLM_TENSOR_FFN_DOWN,
1491 LLM_TENSOR_FFN_UP,
1492 };
1493 case LLM_ARCH_OPENELM:
1494 return {
1495 LLM_TENSOR_TOKEN_EMBD,
1496 LLM_TENSOR_OUTPUT_NORM,
1497 LLM_TENSOR_ATTN_NORM,
1498 LLM_TENSOR_ATTN_QKV,
1499 LLM_TENSOR_ATTN_Q_NORM,
1500 LLM_TENSOR_ATTN_K_NORM,
1501 LLM_TENSOR_ATTN_OUT,
1502 LLM_TENSOR_FFN_NORM,
1503 LLM_TENSOR_FFN_GATE,
1504 LLM_TENSOR_FFN_DOWN,
1505 LLM_TENSOR_FFN_UP,
1506 };
1507 case LLM_ARCH_ARCTIC:
1508 return {
1509 LLM_TENSOR_TOKEN_EMBD,
1510 LLM_TENSOR_OUTPUT_NORM,
1511 LLM_TENSOR_OUTPUT,
1512 LLM_TENSOR_ATTN_NORM,
1513 LLM_TENSOR_ATTN_Q,
1514 LLM_TENSOR_ATTN_K,
1515 LLM_TENSOR_ATTN_V,
1516 LLM_TENSOR_ATTN_OUT,
1517 LLM_TENSOR_FFN_GATE_INP,
1518 LLM_TENSOR_FFN_NORM,
1519 LLM_TENSOR_FFN_GATE,
1520 LLM_TENSOR_FFN_DOWN,
1521 LLM_TENSOR_FFN_UP,
1522 LLM_TENSOR_FFN_NORM_EXPS,
1523 LLM_TENSOR_FFN_GATE_EXPS,
1524 LLM_TENSOR_FFN_DOWN_EXPS,
1525 LLM_TENSOR_FFN_UP_EXPS,
1526 };
1527 case LLM_ARCH_DEEPSEEK:
1528 return {
1529 LLM_TENSOR_TOKEN_EMBD,
1530 LLM_TENSOR_OUTPUT_NORM,
1531 LLM_TENSOR_OUTPUT,
1532 LLM_TENSOR_ROPE_FREQS,
1533 LLM_TENSOR_ATTN_NORM,
1534 LLM_TENSOR_ATTN_Q,
1535 LLM_TENSOR_ATTN_K,
1536 LLM_TENSOR_ATTN_V,
1537 LLM_TENSOR_ATTN_OUT,
1538 LLM_TENSOR_ATTN_ROT_EMBD,
1539 LLM_TENSOR_FFN_GATE_INP,
1540 LLM_TENSOR_FFN_NORM,
1541 LLM_TENSOR_FFN_GATE,
1542 LLM_TENSOR_FFN_DOWN,
1543 LLM_TENSOR_FFN_UP,
1544 LLM_TENSOR_FFN_GATE_EXPS,
1545 LLM_TENSOR_FFN_DOWN_EXPS,
1546 LLM_TENSOR_FFN_UP_EXPS,
1547 LLM_TENSOR_FFN_GATE_INP_SHEXP,
1548 LLM_TENSOR_FFN_GATE_SHEXP,
1549 LLM_TENSOR_FFN_DOWN_SHEXP,
1550 LLM_TENSOR_FFN_UP_SHEXP,
1551 };
1552 case LLM_ARCH_DEEPSEEK2:
1553 return {
1554 LLM_TENSOR_TOKEN_EMBD,
1555 LLM_TENSOR_OUTPUT_NORM,
1556 LLM_TENSOR_OUTPUT,
1557 LLM_TENSOR_ATTN_NORM,
1558 LLM_TENSOR_ATTN_Q_A_NORM,
1559 LLM_TENSOR_ATTN_KV_A_NORM,
1560 LLM_TENSOR_ATTN_Q,
1561 LLM_TENSOR_ATTN_Q_A,
1562 LLM_TENSOR_ATTN_Q_B,
1563 LLM_TENSOR_ATTN_KV_A_MQA,
1564 LLM_TENSOR_ATTN_KV_B,
1565 LLM_TENSOR_ATTN_K_B,
1566 LLM_TENSOR_ATTN_V_B,
1567 LLM_TENSOR_ATTN_OUT,
1568 LLM_TENSOR_FFN_NORM,
1569 LLM_TENSOR_FFN_GATE,
1570 LLM_TENSOR_FFN_UP,
1571 LLM_TENSOR_FFN_DOWN,
1572 LLM_TENSOR_FFN_GATE_INP,
1573 LLM_TENSOR_FFN_GATE_EXPS,
1574 LLM_TENSOR_FFN_DOWN_EXPS,
1575 LLM_TENSOR_FFN_UP_EXPS,
1576 LLM_TENSOR_FFN_GATE_INP_SHEXP,
1577 LLM_TENSOR_FFN_GATE_SHEXP,
1578 LLM_TENSOR_FFN_DOWN_SHEXP,
1579 LLM_TENSOR_FFN_UP_SHEXP,
1580 LLM_TENSOR_FFN_EXP_PROBS_B,
1581 };
1582 case LLM_ARCH_PLM:
1583 return {
1584 LLM_TENSOR_TOKEN_EMBD,
1585 LLM_TENSOR_OUTPUT_NORM,
1586 LLM_TENSOR_ATTN_NORM,
1587 LLM_TENSOR_ATTN_Q,
1588 LLM_TENSOR_ATTN_KV_A_MQA,
1589 LLM_TENSOR_ATTN_KV_A_NORM,
1590 LLM_TENSOR_ATTN_KV_B,
1591 LLM_TENSOR_ATTN_OUT,
1592 LLM_TENSOR_FFN_NORM,
1593 LLM_TENSOR_FFN_DOWN,
1594 LLM_TENSOR_FFN_UP,
1595 };
1596 case LLM_ARCH_CHATGLM:
1597 return {
1598 LLM_TENSOR_TOKEN_EMBD,
1599 LLM_TENSOR_ROPE_FREQS,
1600 LLM_TENSOR_OUTPUT_NORM,
1601 LLM_TENSOR_OUTPUT,
1602 LLM_TENSOR_ATTN_NORM,
1603 LLM_TENSOR_ATTN_QKV,
1604 LLM_TENSOR_ATTN_Q,
1605 LLM_TENSOR_ATTN_K,
1606 LLM_TENSOR_ATTN_V,
1607 LLM_TENSOR_ATTN_OUT,
1608 LLM_TENSOR_FFN_NORM,
1609 LLM_TENSOR_FFN_UP,
1610 LLM_TENSOR_FFN_DOWN,
1611 };
1612 case LLM_ARCH_GLM4:
1613 return {
1614 LLM_TENSOR_TOKEN_EMBD,
1615 LLM_TENSOR_ROPE_FREQS,
1616 LLM_TENSOR_OUTPUT_NORM,
1617 LLM_TENSOR_OUTPUT,
1618 LLM_TENSOR_ATTN_NORM,
1619 LLM_TENSOR_ATTN_Q,
1620 LLM_TENSOR_ATTN_K,
1621 LLM_TENSOR_ATTN_V,
1622 LLM_TENSOR_ATTN_OUT,
1623 LLM_TENSOR_FFN_NORM,
1624 LLM_TENSOR_FFN_UP,
1625 LLM_TENSOR_FFN_DOWN,
1626 LLM_TENSOR_ATTN_POST_NORM,
1627 LLM_TENSOR_FFN_POST_NORM,
1628 };
1629 case LLM_ARCH_GLM4_MOE:
1630 return {
1631 LLM_TENSOR_TOKEN_EMBD,
1632 LLM_TENSOR_OUTPUT_NORM,
1633 LLM_TENSOR_OUTPUT,
1634 LLM_TENSOR_ATTN_NORM,
1635 LLM_TENSOR_ATTN_POST_NORM,
1636 LLM_TENSOR_ATTN_Q,
1637 LLM_TENSOR_ATTN_K,
1638 LLM_TENSOR_ATTN_V,
1639 LLM_TENSOR_ATTN_OUT,
1640 LLM_TENSOR_ATTN_Q_NORM,
1641 LLM_TENSOR_ATTN_K_NORM,
1642 LLM_TENSOR_FFN_GATE,
1643 LLM_TENSOR_FFN_DOWN,
1644 LLM_TENSOR_FFN_UP,
1645 LLM_TENSOR_FFN_GATE_INP,
1646 LLM_TENSOR_FFN_GATE_EXPS,
1647 LLM_TENSOR_FFN_DOWN_EXPS,
1648 LLM_TENSOR_FFN_UP_EXPS,
1649 LLM_TENSOR_FFN_GATE_SHEXP,
1650 LLM_TENSOR_FFN_DOWN_SHEXP,
1651 LLM_TENSOR_FFN_UP_SHEXP,
1652 LLM_TENSOR_FFN_EXP_PROBS_B,
1653 LLM_TENSOR_NEXTN_EH_PROJ,
1654 LLM_TENSOR_NEXTN_EMBED_TOKENS,
1655 LLM_TENSOR_NEXTN_ENORM,
1656 LLM_TENSOR_NEXTN_HNORM,
1657 LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
1658 LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
1659 };
1660 case LLM_ARCH_BITNET:
1661 return {
1662 LLM_TENSOR_TOKEN_EMBD,
1663 LLM_TENSOR_OUTPUT_NORM,
1664 LLM_TENSOR_ATTN_Q,
1665 LLM_TENSOR_ATTN_K,
1666 LLM_TENSOR_ATTN_V,
1667 LLM_TENSOR_ATTN_OUT,
1668 LLM_TENSOR_ATTN_NORM,
1669 LLM_TENSOR_ATTN_SUB_NORM,
1670 LLM_TENSOR_FFN_GATE,
1671 LLM_TENSOR_FFN_DOWN,
1672 LLM_TENSOR_FFN_UP,
1673 LLM_TENSOR_FFN_NORM,
1674 LLM_TENSOR_FFN_SUB_NORM,
1675 };
1676 case LLM_ARCH_T5:
1677 return {
1678 LLM_TENSOR_TOKEN_EMBD,
1679 LLM_TENSOR_OUTPUT,
1680 LLM_TENSOR_DEC_OUTPUT_NORM,
1681 LLM_TENSOR_DEC_ATTN_NORM,
1682 LLM_TENSOR_DEC_ATTN_Q,
1683 LLM_TENSOR_DEC_ATTN_K,
1684 LLM_TENSOR_DEC_ATTN_V,
1685 LLM_TENSOR_DEC_ATTN_OUT,
1686 LLM_TENSOR_DEC_ATTN_REL_B,
1687 LLM_TENSOR_DEC_CROSS_ATTN_NORM,
1688 LLM_TENSOR_DEC_CROSS_ATTN_Q,
1689 LLM_TENSOR_DEC_CROSS_ATTN_K,
1690 LLM_TENSOR_DEC_CROSS_ATTN_V,
1691 LLM_TENSOR_DEC_CROSS_ATTN_OUT,
1692 LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
1693 LLM_TENSOR_DEC_FFN_NORM,
1694 LLM_TENSOR_DEC_FFN_GATE,
1695 LLM_TENSOR_DEC_FFN_DOWN,
1696 LLM_TENSOR_DEC_FFN_UP,
1697 LLM_TENSOR_ENC_OUTPUT_NORM,
1698 LLM_TENSOR_ENC_ATTN_NORM,
1699 LLM_TENSOR_ENC_ATTN_Q,
1700 LLM_TENSOR_ENC_ATTN_K,
1701 LLM_TENSOR_ENC_ATTN_V,
1702 LLM_TENSOR_ENC_ATTN_OUT,
1703 LLM_TENSOR_ENC_ATTN_REL_B,
1704 LLM_TENSOR_ENC_FFN_NORM,
1705 LLM_TENSOR_ENC_FFN_GATE,
1706 LLM_TENSOR_ENC_FFN_DOWN,
1707 LLM_TENSOR_ENC_FFN_UP,
1708 };
1709 case LLM_ARCH_T5ENCODER:
1710 return {
1711 LLM_TENSOR_TOKEN_EMBD,
1712 LLM_TENSOR_OUTPUT,
1713 LLM_TENSOR_ENC_OUTPUT_NORM,
1714 LLM_TENSOR_ENC_ATTN_NORM,
1715 LLM_TENSOR_ENC_ATTN_Q,
1716 LLM_TENSOR_ENC_ATTN_K,
1717 LLM_TENSOR_ENC_ATTN_V,
1718 LLM_TENSOR_ENC_ATTN_OUT,
1719 LLM_TENSOR_ENC_ATTN_REL_B,
1720 LLM_TENSOR_ENC_FFN_NORM,
1721 LLM_TENSOR_ENC_FFN_GATE,
1722 LLM_TENSOR_ENC_FFN_DOWN,
1723 LLM_TENSOR_ENC_FFN_UP,
1724 };
1725 case LLM_ARCH_JAIS:
1726 return {
1727 LLM_TENSOR_TOKEN_EMBD,
1728 LLM_TENSOR_OUTPUT_NORM,
1729 LLM_TENSOR_OUTPUT,
1730 LLM_TENSOR_ATTN_NORM,
1731 LLM_TENSOR_ATTN_QKV,
1732 LLM_TENSOR_ATTN_OUT,
1733 LLM_TENSOR_FFN_NORM,
1734 LLM_TENSOR_FFN_UP,
1735 LLM_TENSOR_FFN_GATE,
1736 LLM_TENSOR_FFN_DOWN,
1737 };
1738 case LLM_ARCH_NEMOTRON_H:
1739 return {
1740 LLM_TENSOR_TOKEN_EMBD,
1741 LLM_TENSOR_OUTPUT_NORM,
1742 LLM_TENSOR_OUTPUT,
1743 LLM_TENSOR_ATTN_NORM,
1744 LLM_TENSOR_SSM_IN,
1745 LLM_TENSOR_SSM_CONV1D,
1746 LLM_TENSOR_SSM_DT,
1747 LLM_TENSOR_SSM_A,
1748 LLM_TENSOR_SSM_D,
1749 LLM_TENSOR_SSM_NORM,
1750 LLM_TENSOR_SSM_OUT,
1751 LLM_TENSOR_ATTN_Q,
1752 LLM_TENSOR_ATTN_K,
1753 LLM_TENSOR_ATTN_V,
1754 LLM_TENSOR_ATTN_OUT,
1755 LLM_TENSOR_FFN_DOWN,
1756 LLM_TENSOR_FFN_UP,
1757 };
1758 case LLM_ARCH_NEMOTRON_H_MOE:
1759 return {
1760 LLM_TENSOR_TOKEN_EMBD,
1761 LLM_TENSOR_OUTPUT_NORM,
1762 LLM_TENSOR_OUTPUT,
1763 LLM_TENSOR_ATTN_NORM,
1764 // mamba(2) ssm layers
1765 LLM_TENSOR_SSM_IN,
1766 LLM_TENSOR_SSM_CONV1D,
1767 LLM_TENSOR_SSM_DT,
1768 LLM_TENSOR_SSM_A,
1769 LLM_TENSOR_SSM_D,
1770 LLM_TENSOR_SSM_NORM,
1771 LLM_TENSOR_SSM_OUT,
1772 // attention layers
1773 LLM_TENSOR_ATTN_Q,
1774 LLM_TENSOR_ATTN_K,
1775 LLM_TENSOR_ATTN_V,
1776 LLM_TENSOR_ATTN_OUT,
1777 // dense FFN
1778 LLM_TENSOR_FFN_DOWN,
1779 LLM_TENSOR_FFN_UP,
1780 // MoE FFN (for MoE layers)
1781 LLM_TENSOR_FFN_GATE_INP,
1782 LLM_TENSOR_FFN_UP_EXPS,
1783 LLM_TENSOR_FFN_DOWN_EXPS,
1784 LLM_TENSOR_FFN_EXP_PROBS_B,
1785 // MoE shared expert layer
1786 LLM_TENSOR_FFN_DOWN_SHEXP,
1787 LLM_TENSOR_FFN_UP_SHEXP,
1788 };
1789 case LLM_ARCH_EXAONE4:
1790 return {
1791 LLM_TENSOR_TOKEN_EMBD,
1792 LLM_TENSOR_OUTPUT_NORM,
1793 LLM_TENSOR_OUTPUT,
1794 LLM_TENSOR_ROPE_FREQS,
1795 LLM_TENSOR_ATTN_Q,
1796 LLM_TENSOR_ATTN_Q_NORM,
1797 LLM_TENSOR_ATTN_K,
1798 LLM_TENSOR_ATTN_K_NORM,
1799 LLM_TENSOR_ATTN_V,
1800 LLM_TENSOR_ATTN_OUT,
1801 LLM_TENSOR_ATTN_POST_NORM,
1802 LLM_TENSOR_FFN_GATE,
1803 LLM_TENSOR_FFN_DOWN,
1804 LLM_TENSOR_FFN_UP,
1805 LLM_TENSOR_FFN_POST_NORM,
1806 };
1807 case LLM_ARCH_EXAONE_MOE:
1808 return {
1809 LLM_TENSOR_TOKEN_EMBD,
1810 LLM_TENSOR_OUTPUT_NORM,
1811 LLM_TENSOR_OUTPUT,
1812 LLM_TENSOR_ROPE_FREQS,
1813 LLM_TENSOR_ATTN_NORM,
1814 LLM_TENSOR_ATTN_Q,
1815 LLM_TENSOR_ATTN_Q_NORM,
1816 LLM_TENSOR_ATTN_K,
1817 LLM_TENSOR_ATTN_K_NORM,
1818 LLM_TENSOR_ATTN_V,
1819 LLM_TENSOR_ATTN_OUT,
1820 LLM_TENSOR_FFN_NORM,
1821 LLM_TENSOR_FFN_GATE,
1822 LLM_TENSOR_FFN_DOWN,
1823 LLM_TENSOR_FFN_UP,
1824 LLM_TENSOR_FFN_GATE_INP,
1825 LLM_TENSOR_FFN_GATE_EXPS,
1826 LLM_TENSOR_FFN_DOWN_EXPS,
1827 LLM_TENSOR_FFN_UP_EXPS,
1828 LLM_TENSOR_FFN_GATE_SHEXP,
1829 LLM_TENSOR_FFN_UP_SHEXP,
1830 LLM_TENSOR_FFN_DOWN_SHEXP,
1831 LLM_TENSOR_FFN_EXP_PROBS_B,
1832 LLM_TENSOR_NEXTN_EH_PROJ,
1833 LLM_TENSOR_NEXTN_EMBED_TOKENS,
1834 LLM_TENSOR_NEXTN_ENORM,
1835 LLM_TENSOR_NEXTN_HNORM,
1836 LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
1837 LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
1838 };
1839 case LLM_ARCH_RWKV6:
1840 return {
1841 LLM_TENSOR_TOKEN_EMBD,
1842 LLM_TENSOR_TOKEN_EMBD_NORM,
1843 LLM_TENSOR_OUTPUT_NORM,
1844 LLM_TENSOR_OUTPUT,
1845 LLM_TENSOR_ATTN_NORM,
1846 LLM_TENSOR_ATTN_NORM_2,
1847 LLM_TENSOR_TIME_MIX_W1,
1848 LLM_TENSOR_TIME_MIX_W2,
1849 LLM_TENSOR_TIME_MIX_LERP_X,
1850 LLM_TENSOR_TIME_MIX_LERP_W,
1851 LLM_TENSOR_TIME_MIX_LERP_K,
1852 LLM_TENSOR_TIME_MIX_LERP_V,
1853 LLM_TENSOR_TIME_MIX_LERP_R,
1854 LLM_TENSOR_TIME_MIX_LERP_G,
1855 LLM_TENSOR_TIME_MIX_LERP_FUSED,
1856 LLM_TENSOR_TIME_MIX_FIRST,
1857 LLM_TENSOR_TIME_MIX_DECAY,
1858 LLM_TENSOR_TIME_MIX_DECAY_W1,
1859 LLM_TENSOR_TIME_MIX_DECAY_W2,
1860 LLM_TENSOR_TIME_MIX_KEY,
1861 LLM_TENSOR_TIME_MIX_VALUE,
1862 LLM_TENSOR_TIME_MIX_RECEPTANCE,
1863 LLM_TENSOR_TIME_MIX_GATE,
1864 LLM_TENSOR_TIME_MIX_LN,
1865 LLM_TENSOR_TIME_MIX_OUTPUT,
1866 LLM_TENSOR_CHANNEL_MIX_LERP_K,
1867 LLM_TENSOR_CHANNEL_MIX_LERP_R,
1868 LLM_TENSOR_CHANNEL_MIX_KEY,
1869 LLM_TENSOR_CHANNEL_MIX_VALUE,
1870 LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
1871 };
1872 case LLM_ARCH_RWKV6QWEN2:
1873 return {
1874 LLM_TENSOR_TOKEN_EMBD,
1875 LLM_TENSOR_OUTPUT_NORM,
1876 LLM_TENSOR_OUTPUT,
1877 LLM_TENSOR_ATTN_NORM,
1878 LLM_TENSOR_TIME_MIX_W1,
1879 LLM_TENSOR_TIME_MIX_W2,
1880 LLM_TENSOR_TIME_MIX_LERP_X,
1881 LLM_TENSOR_TIME_MIX_LERP_FUSED,
1882 LLM_TENSOR_TIME_MIX_FIRST,
1883 LLM_TENSOR_TIME_MIX_DECAY,
1884 LLM_TENSOR_TIME_MIX_DECAY_W1,
1885 LLM_TENSOR_TIME_MIX_DECAY_W2,
1886 LLM_TENSOR_TIME_MIX_KEY,
1887 LLM_TENSOR_TIME_MIX_VALUE,
1888 LLM_TENSOR_TIME_MIX_RECEPTANCE,
1889 LLM_TENSOR_TIME_MIX_GATE,
1890 LLM_TENSOR_TIME_MIX_OUTPUT,
1891 LLM_TENSOR_FFN_NORM,
1892 LLM_TENSOR_FFN_GATE,
1893 LLM_TENSOR_FFN_DOWN,
1894 LLM_TENSOR_FFN_UP,
1895 };
1896 case LLM_ARCH_RWKV7:
1897 return {
1898 LLM_TENSOR_TOKEN_EMBD,
1899 LLM_TENSOR_TOKEN_EMBD_NORM,
1900 LLM_TENSOR_OUTPUT_NORM,
1901 LLM_TENSOR_OUTPUT,
1902 LLM_TENSOR_ATTN_NORM,
1903 LLM_TENSOR_ATTN_NORM_2,
1904 LLM_TENSOR_TIME_MIX_W0,
1905 LLM_TENSOR_TIME_MIX_W1,
1906 LLM_TENSOR_TIME_MIX_W2,
1907 LLM_TENSOR_TIME_MIX_A0,
1908 LLM_TENSOR_TIME_MIX_A1,
1909 LLM_TENSOR_TIME_MIX_A2,
1910 LLM_TENSOR_TIME_MIX_V0,
1911 LLM_TENSOR_TIME_MIX_V1,
1912 LLM_TENSOR_TIME_MIX_V2,
1913 LLM_TENSOR_TIME_MIX_G1,
1914 LLM_TENSOR_TIME_MIX_G2,
1915 LLM_TENSOR_TIME_MIX_K_K,
1916 LLM_TENSOR_TIME_MIX_K_A,
1917 LLM_TENSOR_TIME_MIX_R_K,
1918 LLM_TENSOR_TIME_MIX_LERP_FUSED,
1919 LLM_TENSOR_TIME_MIX_KEY,
1920 LLM_TENSOR_TIME_MIX_VALUE,
1921 LLM_TENSOR_TIME_MIX_RECEPTANCE,
1922 LLM_TENSOR_TIME_MIX_LN,
1923 LLM_TENSOR_TIME_MIX_OUTPUT,
1924 LLM_TENSOR_CHANNEL_MIX_LERP_K,
1925 LLM_TENSOR_CHANNEL_MIX_KEY,
1926 LLM_TENSOR_CHANNEL_MIX_VALUE,
1927 };
1928 case LLM_ARCH_ARWKV7:
1929 return {
1930 LLM_TENSOR_TOKEN_EMBD,
1931 LLM_TENSOR_TOKEN_EMBD_NORM,
1932 LLM_TENSOR_OUTPUT_NORM,
1933 LLM_TENSOR_OUTPUT,
1934 LLM_TENSOR_ATTN_NORM,
1935 LLM_TENSOR_TIME_MIX_W0,
1936 LLM_TENSOR_TIME_MIX_W1,
1937 LLM_TENSOR_TIME_MIX_W2,
1938 LLM_TENSOR_TIME_MIX_A0,
1939 LLM_TENSOR_TIME_MIX_A1,
1940 LLM_TENSOR_TIME_MIX_A2,
1941 LLM_TENSOR_TIME_MIX_V0,
1942 LLM_TENSOR_TIME_MIX_V1,
1943 LLM_TENSOR_TIME_MIX_V2,
1944 LLM_TENSOR_TIME_MIX_G1,
1945 LLM_TENSOR_TIME_MIX_G2,
1946 LLM_TENSOR_TIME_MIX_K_K,
1947 LLM_TENSOR_TIME_MIX_K_A,
1948 LLM_TENSOR_TIME_MIX_R_K,
1949 LLM_TENSOR_TIME_MIX_LERP_FUSED,
1950 LLM_TENSOR_TIME_MIX_KEY,
1951 LLM_TENSOR_TIME_MIX_VALUE,
1952 LLM_TENSOR_TIME_MIX_RECEPTANCE,
1953 LLM_TENSOR_TIME_MIX_LN,
1954 LLM_TENSOR_TIME_MIX_OUTPUT,
1955 LLM_TENSOR_FFN_NORM,
1956 LLM_TENSOR_FFN_GATE,
1957 LLM_TENSOR_FFN_DOWN,
1958 LLM_TENSOR_FFN_UP,
1959 };
1960 case LLM_ARCH_GRANITE_MOE:
1961 return {
1962 LLM_TENSOR_TOKEN_EMBD,
1963 LLM_TENSOR_OUTPUT_NORM,
1964 LLM_TENSOR_OUTPUT,
1965 LLM_TENSOR_ATTN_NORM,
1966 LLM_TENSOR_ATTN_Q,
1967 LLM_TENSOR_ATTN_K,
1968 LLM_TENSOR_ATTN_V,
1969 LLM_TENSOR_ATTN_OUT,
1970 LLM_TENSOR_FFN_NORM,
1971 LLM_TENSOR_FFN_GATE_INP,
1972 LLM_TENSOR_FFN_GATE_EXPS,
1973 LLM_TENSOR_FFN_DOWN_EXPS,
1974 LLM_TENSOR_FFN_UP_EXPS,
1975 LLM_TENSOR_FFN_GATE_SHEXP,
1976 LLM_TENSOR_FFN_DOWN_SHEXP,
1977 LLM_TENSOR_FFN_UP_SHEXP,
1978 };
1979 case LLM_ARCH_GRANITE_HYBRID:
1980 return {
1981 LLM_TENSOR_TOKEN_EMBD,
1982 LLM_TENSOR_OUTPUT_NORM,
1983 LLM_TENSOR_OUTPUT,
1984 LLM_TENSOR_ATTN_NORM,
1985 LLM_TENSOR_SSM_IN,
1986 LLM_TENSOR_SSM_CONV1D,
1987 LLM_TENSOR_SSM_DT,
1988 LLM_TENSOR_SSM_A,
1989 LLM_TENSOR_SSM_D,
1990 LLM_TENSOR_SSM_NORM,
1991 LLM_TENSOR_SSM_OUT,
1992 LLM_TENSOR_ATTN_Q,
1993 LLM_TENSOR_ATTN_K,
1994 LLM_TENSOR_ATTN_V,
1995 LLM_TENSOR_ATTN_OUT,
1996 LLM_TENSOR_FFN_NORM,
1997 LLM_TENSOR_FFN_GATE,
1998 LLM_TENSOR_FFN_DOWN,
1999 LLM_TENSOR_FFN_UP,
2000 LLM_TENSOR_FFN_NORM,
2001 LLM_TENSOR_FFN_GATE_INP,
2002 LLM_TENSOR_FFN_GATE_EXPS,
2003 LLM_TENSOR_FFN_DOWN_EXPS,
2004 LLM_TENSOR_FFN_UP_EXPS,
2005 LLM_TENSOR_FFN_GATE_SHEXP,
2006 LLM_TENSOR_FFN_DOWN_SHEXP,
2007 LLM_TENSOR_FFN_UP_SHEXP,
2008 };
2009 case LLM_ARCH_WAVTOKENIZER_DEC:
2010 return {
2011 LLM_TENSOR_TOKEN_EMBD,
2012 LLM_TENSOR_TOKEN_EMBD_NORM,
2013 LLM_TENSOR_CONV1D,
2014 LLM_TENSOR_CONVNEXT_DW,
2015 LLM_TENSOR_CONVNEXT_NORM,
2016 LLM_TENSOR_CONVNEXT_PW1,
2017 LLM_TENSOR_CONVNEXT_PW2,
2018 LLM_TENSOR_CONVNEXT_GAMMA,
2019 LLM_TENSOR_OUTPUT_NORM,
2020 LLM_TENSOR_OUTPUT,
2021 LLM_TENSOR_POS_NET_CONV1,
2022 LLM_TENSOR_POS_NET_CONV2,
2023 LLM_TENSOR_POS_NET_NORM,
2024 LLM_TENSOR_POS_NET_NORM1,
2025 LLM_TENSOR_POS_NET_NORM2,
2026 LLM_TENSOR_POS_NET_ATTN_NORM,
2027 LLM_TENSOR_POS_NET_ATTN_Q,
2028 LLM_TENSOR_POS_NET_ATTN_K,
2029 LLM_TENSOR_POS_NET_ATTN_V,
2030 LLM_TENSOR_POS_NET_ATTN_OUT,
2031 };
2032 case LLM_ARCH_BAILINGMOE:
2033 return {
2034 LLM_TENSOR_TOKEN_EMBD,
2035 LLM_TENSOR_OUTPUT_NORM,
2036 LLM_TENSOR_OUTPUT,
2037 LLM_TENSOR_ROPE_FREQS,
2038 LLM_TENSOR_ATTN_NORM,
2039 LLM_TENSOR_ATTN_Q,
2040 LLM_TENSOR_ATTN_K,
2041 LLM_TENSOR_ATTN_V,
2042 LLM_TENSOR_ATTN_OUT,
2043 LLM_TENSOR_FFN_GATE_INP,
2044 LLM_TENSOR_FFN_NORM,
2045 LLM_TENSOR_FFN_GATE_EXPS,
2046 LLM_TENSOR_FFN_DOWN_EXPS,
2047 LLM_TENSOR_FFN_UP_EXPS,
2048 LLM_TENSOR_FFN_GATE_INP_SHEXP,
2049 LLM_TENSOR_FFN_GATE_SHEXP,
2050 LLM_TENSOR_FFN_DOWN_SHEXP,
2051 LLM_TENSOR_FFN_UP_SHEXP,
2052 };
2053 case LLM_ARCH_BAILINGMOE2:
2054 return {
2055 LLM_TENSOR_TOKEN_EMBD,
2056 LLM_TENSOR_OUTPUT_NORM,
2057 LLM_TENSOR_OUTPUT,
2058 LLM_TENSOR_ATTN_NORM,
2059 LLM_TENSOR_ATTN_Q_NORM,
2060 LLM_TENSOR_ATTN_K_NORM,
2061 LLM_TENSOR_ATTN_QKV,
2062 LLM_TENSOR_ATTN_OUT,
2063 LLM_TENSOR_FFN_GATE_INP,
2064 LLM_TENSOR_FFN_EXP_PROBS_B,
2065 LLM_TENSOR_FFN_NORM,
2066 LLM_TENSOR_FFN_GATE,
2067 LLM_TENSOR_FFN_DOWN,
2068 LLM_TENSOR_FFN_UP,
2069 LLM_TENSOR_FFN_GATE_EXPS,
2070 LLM_TENSOR_FFN_DOWN_EXPS,
2071 LLM_TENSOR_FFN_UP_EXPS,
2072 LLM_TENSOR_FFN_GATE_SHEXP,
2073 LLM_TENSOR_FFN_DOWN_SHEXP,
2074 LLM_TENSOR_FFN_UP_SHEXP,
2075 LLM_TENSOR_NEXTN_EH_PROJ,
2076 LLM_TENSOR_NEXTN_EMBED_TOKENS,
2077 LLM_TENSOR_NEXTN_ENORM,
2078 LLM_TENSOR_NEXTN_HNORM,
2079 LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
2080 LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
2081 LLM_TENSOR_LAYER_OUT_NORM,
2082 };
2083 case LLM_ARCH_DOTS1:
2084 return {
2085 LLM_TENSOR_TOKEN_EMBD,
2086 LLM_TENSOR_OUTPUT_NORM,
2087 LLM_TENSOR_OUTPUT,
2088 LLM_TENSOR_ATTN_NORM,
2089 LLM_TENSOR_ATTN_Q,
2090 LLM_TENSOR_ATTN_Q_NORM,
2091 LLM_TENSOR_ATTN_K,
2092 LLM_TENSOR_ATTN_K_NORM,
2093 LLM_TENSOR_ATTN_V,
2094 LLM_TENSOR_ATTN_OUT,
2095 LLM_TENSOR_FFN_NORM,
2096 LLM_TENSOR_FFN_GATE,
2097 LLM_TENSOR_FFN_UP,
2098 LLM_TENSOR_FFN_DOWN,
2099 LLM_TENSOR_FFN_GATE_INP,
2100 LLM_TENSOR_FFN_GATE_EXPS,
2101 LLM_TENSOR_FFN_DOWN_EXPS,
2102 LLM_TENSOR_FFN_UP_EXPS,
2103 LLM_TENSOR_FFN_GATE_INP_SHEXP,
2104 LLM_TENSOR_FFN_GATE_SHEXP,
2105 LLM_TENSOR_FFN_DOWN_SHEXP,
2106 LLM_TENSOR_FFN_UP_SHEXP,
2107 LLM_TENSOR_FFN_EXP_PROBS_B,
2108 };
2109 case LLM_ARCH_ERNIE4_5_MOE:
2110 return {
2111 LLM_TENSOR_TOKEN_EMBD,
2112 LLM_TENSOR_OUTPUT_NORM,
2113 LLM_TENSOR_OUTPUT,
2114 LLM_TENSOR_ATTN_NORM,
2115 LLM_TENSOR_ATTN_Q,
2116 LLM_TENSOR_ATTN_K,
2117 LLM_TENSOR_ATTN_V,
2118 LLM_TENSOR_ATTN_OUT,
2119 LLM_TENSOR_FFN_NORM,
2120 LLM_TENSOR_FFN_GATE,
2121 LLM_TENSOR_FFN_DOWN,
2122 LLM_TENSOR_FFN_UP,
2123 LLM_TENSOR_FFN_GATE_INP,
2124 LLM_TENSOR_FFN_GATE_SHEXP,
2125 LLM_TENSOR_FFN_DOWN_SHEXP,
2126 LLM_TENSOR_FFN_UP_SHEXP,
2127 LLM_TENSOR_FFN_GATE_EXPS,
2128 LLM_TENSOR_FFN_DOWN_EXPS,
2129 LLM_TENSOR_FFN_UP_EXPS,
2130 LLM_TENSOR_FFN_EXP_PROBS_B,
2131 };
2132 case LLM_ARCH_HUNYUAN_MOE:
2133 return {
2134 LLM_TENSOR_TOKEN_EMBD,
2135 LLM_TENSOR_OUTPUT_NORM,
2136 LLM_TENSOR_OUTPUT,
2137 LLM_TENSOR_ATTN_NORM,
2138 LLM_TENSOR_ATTN_Q,
2139 LLM_TENSOR_ATTN_Q_NORM,
2140 LLM_TENSOR_ATTN_K,
2141 LLM_TENSOR_ATTN_K_NORM,
2142 LLM_TENSOR_ATTN_V,
2143 LLM_TENSOR_ATTN_OUT,
2144 LLM_TENSOR_FFN_GATE_INP,
2145 LLM_TENSOR_FFN_NORM,
2146 LLM_TENSOR_FFN_GATE_SHEXP,
2147 LLM_TENSOR_FFN_DOWN_SHEXP,
2148 LLM_TENSOR_FFN_UP_SHEXP,
2149 LLM_TENSOR_FFN_GATE_EXPS,
2150 LLM_TENSOR_FFN_DOWN_EXPS,
2151 LLM_TENSOR_FFN_UP_EXPS,
2152 };
2153 case LLM_ARCH_OPENAI_MOE:
2154 return {
2155 LLM_TENSOR_TOKEN_EMBD,
2156 LLM_TENSOR_OUTPUT_NORM,
2157 LLM_TENSOR_OUTPUT,
2158 LLM_TENSOR_ATTN_NORM,
2159 LLM_TENSOR_ATTN_POST_NORM,
2160 LLM_TENSOR_ATTN_Q,
2161 LLM_TENSOR_ATTN_K,
2162 LLM_TENSOR_ATTN_V,
2163 LLM_TENSOR_ATTN_OUT,
2164 LLM_TENSOR_ATTN_SINKS,
2165 LLM_TENSOR_FFN_GATE_INP,
2166 LLM_TENSOR_FFN_GATE_EXPS,
2167 LLM_TENSOR_FFN_DOWN_EXPS,
2168 LLM_TENSOR_FFN_UP_EXPS,
2169 };
2170 case LLM_ARCH_LFM2:
2171 return {
2172 LLM_TENSOR_ATTN_NORM,
2173 LLM_TENSOR_ATTN_Q,
2174 LLM_TENSOR_ATTN_K,
2175 LLM_TENSOR_ATTN_V,
2176 LLM_TENSOR_ATTN_OUT,
2177 LLM_TENSOR_ATTN_K_NORM,
2178 LLM_TENSOR_ATTN_Q_NORM,
2179 LLM_TENSOR_FFN_DOWN,
2180 LLM_TENSOR_FFN_GATE,
2181 LLM_TENSOR_FFN_NORM,
2182 LLM_TENSOR_FFN_UP,
2183 LLM_TENSOR_SHORTCONV_CONV,
2184 LLM_TENSOR_SHORTCONV_INPROJ,
2185 LLM_TENSOR_SHORTCONV_OUTPROJ,
2186 LLM_TENSOR_TOKEN_EMBD,
2187 LLM_TENSOR_OUTPUT_NORM_LFM2,
2188 LLM_TENSOR_OUTPUT,
2189 LLM_TENSOR_DENSE_2_OUT,
2190 };
2191 case LLM_ARCH_LFM2MOE:
2192 return {
2193 LLM_TENSOR_ATTN_NORM,
2194 LLM_TENSOR_ATTN_Q,
2195 LLM_TENSOR_ATTN_K,
2196 LLM_TENSOR_ATTN_V,
2197 LLM_TENSOR_ATTN_OUT,
2198 LLM_TENSOR_ATTN_K_NORM,
2199 LLM_TENSOR_ATTN_Q_NORM,
2200 LLM_TENSOR_FFN_DOWN,
2201 LLM_TENSOR_FFN_GATE,
2202 LLM_TENSOR_FFN_NORM,
2203 LLM_TENSOR_FFN_UP,
2204 LLM_TENSOR_SHORTCONV_CONV,
2205 LLM_TENSOR_SHORTCONV_INPROJ,
2206 LLM_TENSOR_SHORTCONV_OUTPROJ,
2207 LLM_TENSOR_TOKEN_EMBD,
2208 LLM_TENSOR_OUTPUT_NORM_LFM2,
2209 LLM_TENSOR_FFN_GATE_INP,
2210 LLM_TENSOR_FFN_GATE_EXPS,
2211 LLM_TENSOR_FFN_DOWN_EXPS,
2212 LLM_TENSOR_FFN_UP_EXPS,
2213 LLM_TENSOR_FFN_EXP_PROBS_B,
2214 };
2215 case LLM_ARCH_SMALLTHINKER:
2216 return {
2217 LLM_TENSOR_TOKEN_EMBD,
2218 LLM_TENSOR_OUTPUT_NORM,
2219 LLM_TENSOR_OUTPUT,
2220 LLM_TENSOR_ATTN_NORM,
2221 LLM_TENSOR_ATTN_Q,
2222 LLM_TENSOR_ATTN_K,
2223 LLM_TENSOR_ATTN_V,
2224 LLM_TENSOR_ATTN_OUT,
2225 LLM_TENSOR_FFN_NORM,
2226 LLM_TENSOR_FFN_GATE,
2227 LLM_TENSOR_FFN_DOWN,
2228 LLM_TENSOR_FFN_UP,
2229 LLM_TENSOR_FFN_GATE_INP,
2230 LLM_TENSOR_FFN_GATE_EXPS,
2231 LLM_TENSOR_FFN_DOWN_EXPS,
2232 LLM_TENSOR_FFN_UP_EXPS,
2233 };
2234 case LLM_ARCH_APERTUS:
2235 return {
2236 LLM_TENSOR_TOKEN_EMBD,
2237 LLM_TENSOR_OUTPUT_NORM,
2238 LLM_TENSOR_OUTPUT,
2239 LLM_TENSOR_ROPE_FREQS,
2240 LLM_TENSOR_ATTN_NORM,
2241 LLM_TENSOR_ATTN_Q,
2242 LLM_TENSOR_ATTN_K,
2243 LLM_TENSOR_ATTN_V,
2244 LLM_TENSOR_ATTN_OUT,
2245 LLM_TENSOR_ATTN_Q_NORM,
2246 LLM_TENSOR_ATTN_K_NORM,
2247 LLM_TENSOR_FFN_NORM,
2248 LLM_TENSOR_FFN_DOWN,
2249 LLM_TENSOR_FFN_UP,
2250 };
2251 case LLM_ARCH_SEED_OSS:
2252 return {
2253 LLM_TENSOR_TOKEN_EMBD,
2254 LLM_TENSOR_OUTPUT_NORM,
2255 LLM_TENSOR_OUTPUT,
2256 LLM_TENSOR_ATTN_NORM,
2257 LLM_TENSOR_ATTN_Q,
2258 LLM_TENSOR_ATTN_K,
2259 LLM_TENSOR_ATTN_V,
2260 LLM_TENSOR_ATTN_OUT,
2261 LLM_TENSOR_ATTN_POST_NORM,
2262 LLM_TENSOR_FFN_GATE,
2263 LLM_TENSOR_FFN_DOWN,
2264 LLM_TENSOR_FFN_UP,
2265 };
2266 case LLM_ARCH_GROVEMOE:
2267 return {
2268 LLM_TENSOR_TOKEN_EMBD,
2269 LLM_TENSOR_OUTPUT_NORM,
2270 LLM_TENSOR_OUTPUT,
2271 LLM_TENSOR_ATTN_NORM,
2272 LLM_TENSOR_ATTN_Q,
2273 LLM_TENSOR_ATTN_Q_NORM,
2274 LLM_TENSOR_ATTN_K,
2275 LLM_TENSOR_ATTN_K_NORM,
2276 LLM_TENSOR_ATTN_V,
2277 LLM_TENSOR_ATTN_OUT,
2278 LLM_TENSOR_FFN_NORM,
2279 LLM_TENSOR_FFN_GATE_INP,
2280 LLM_TENSOR_FFN_GATE_EXPS,
2281 LLM_TENSOR_FFN_DOWN_EXPS,
2282 LLM_TENSOR_FFN_UP_EXPS,
2283 LLM_TENSOR_FFN_GATE_CHEXPS,
2284 LLM_TENSOR_FFN_DOWN_CHEXPS,
2285 LLM_TENSOR_FFN_UP_CHEXPS,
2286 };
2287 case LLM_ARCH_MINIMAX_M2:
2288 return {
2289 LLM_TENSOR_TOKEN_EMBD,
2290 LLM_TENSOR_OUTPUT_NORM,
2291 LLM_TENSOR_OUTPUT,
2292 LLM_TENSOR_ATTN_NORM,
2293 LLM_TENSOR_ATTN_Q,
2294 LLM_TENSOR_ATTN_K,
2295 LLM_TENSOR_ATTN_V,
2296 LLM_TENSOR_ATTN_OUT,
2297 LLM_TENSOR_ATTN_Q_NORM,
2298 LLM_TENSOR_ATTN_K_NORM,
2299 LLM_TENSOR_FFN_NORM,
2300 LLM_TENSOR_FFN_GATE_INP,
2301 LLM_TENSOR_FFN_GATE_EXPS,
2302 LLM_TENSOR_FFN_DOWN_EXPS,
2303 LLM_TENSOR_FFN_UP_EXPS,
2304 LLM_TENSOR_FFN_EXP_PROBS_B,
2305 };
2306 case LLM_ARCH_COGVLM:
2307 return {
2308 LLM_TENSOR_TOKEN_EMBD,
2309 LLM_TENSOR_OUTPUT_NORM,
2310 LLM_TENSOR_OUTPUT,
2311 LLM_TENSOR_ATTN_NORM,
2312 LLM_TENSOR_ATTN_QKV,
2313 LLM_TENSOR_ATTN_OUT,
2314 LLM_TENSOR_FFN_NORM,
2315 LLM_TENSOR_FFN_GATE,
2316 LLM_TENSOR_FFN_DOWN,
2317 LLM_TENSOR_FFN_UP,
2318 LLM_TENSOR_VISEXP_ATTN_QKV,
2319 LLM_TENSOR_VISEXP_ATTN_OUT,
2320 LLM_TENSOR_VISEXP_FFN_GATE,
2321 LLM_TENSOR_VISEXP_FFN_DOWN,
2322 LLM_TENSOR_VISEXP_FFN_UP,
2323 };
2324 case LLM_ARCH_MIMO2:
2325 return {
2326 LLM_TENSOR_TOKEN_EMBD,
2327 LLM_TENSOR_OUTPUT_NORM,
2328 LLM_TENSOR_OUTPUT,
2329 LLM_TENSOR_ATTN_NORM,
2330 LLM_TENSOR_ATTN_Q,
2331 LLM_TENSOR_ATTN_K,
2332 LLM_TENSOR_ATTN_V,
2333 LLM_TENSOR_ATTN_SINKS,
2334 LLM_TENSOR_ATTN_OUT,
2335 LLM_TENSOR_FFN_NORM,
2336 LLM_TENSOR_FFN_GATE,
2337 LLM_TENSOR_FFN_DOWN,
2338 LLM_TENSOR_FFN_UP,
2339 LLM_TENSOR_FFN_GATE_INP,
2340 LLM_TENSOR_FFN_GATE_EXPS,
2341 LLM_TENSOR_FFN_DOWN_EXPS,
2342 LLM_TENSOR_FFN_UP_EXPS,
2343 LLM_TENSOR_FFN_EXP_PROBS_B,
2344 };
2345 case LLM_ARCH_STEP35:
2346 return {
2347 LLM_TENSOR_TOKEN_EMBD,
2348 LLM_TENSOR_OUTPUT_NORM,
2349 LLM_TENSOR_OUTPUT,
2350 LLM_TENSOR_ROPE_FREQS,
2351 LLM_TENSOR_ROPE_FACTORS_LONG,
2352 LLM_TENSOR_ROPE_FACTORS_SHORT,
2353 LLM_TENSOR_ATTN_NORM,
2354 LLM_TENSOR_ATTN_Q,
2355 LLM_TENSOR_ATTN_Q_NORM,
2356 LLM_TENSOR_ATTN_K,
2357 LLM_TENSOR_ATTN_K_NORM,
2358 LLM_TENSOR_ATTN_V,
2359 LLM_TENSOR_ATTN_GATE,
2360 LLM_TENSOR_ATTN_OUT,
2361 LLM_TENSOR_FFN_NORM,
2362 LLM_TENSOR_FFN_GATE,
2363 LLM_TENSOR_FFN_DOWN,
2364 LLM_TENSOR_FFN_UP,
2365 LLM_TENSOR_FFN_GATE_INP,
2366 LLM_TENSOR_FFN_GATE_EXPS,
2367 LLM_TENSOR_FFN_DOWN_EXPS,
2368 LLM_TENSOR_FFN_UP_EXPS,
2369 LLM_TENSOR_FFN_GATE_SHEXP,
2370 LLM_TENSOR_FFN_UP_SHEXP,
2371 LLM_TENSOR_FFN_DOWN_SHEXP,
2372 LLM_TENSOR_FFN_EXP_PROBS_B,
2373 };
2374 case LLM_ARCH_GPTJ:
2375 case LLM_ARCH_UNKNOWN:
2376 return {
2377 LLM_TENSOR_TOKEN_EMBD,
2378 };
2379 case LLM_ARCH_MAINCODER:
2380 return {
2381 LLM_TENSOR_TOKEN_EMBD,
2382 LLM_TENSOR_OUTPUT_NORM,
2383 LLM_TENSOR_OUTPUT,
2384 LLM_TENSOR_ATTN_NORM,
2385 LLM_TENSOR_ATTN_Q,
2386 LLM_TENSOR_ATTN_Q_NORM,
2387 LLM_TENSOR_ATTN_K,
2388 LLM_TENSOR_ATTN_K_NORM,
2389 LLM_TENSOR_ATTN_V,
2390 LLM_TENSOR_ATTN_OUT,
2391 LLM_TENSOR_FFN_NORM,
2392 LLM_TENSOR_FFN_GATE,
2393 LLM_TENSOR_FFN_DOWN,
2394 LLM_TENSOR_FFN_UP,
2395 };
2396 case LLM_ARCH_KIMI_LINEAR:
2397 return {
2398 LLM_TENSOR_TOKEN_EMBD,
2399 LLM_TENSOR_OUTPUT_NORM,
2400 LLM_TENSOR_OUTPUT,
2401 LLM_TENSOR_ROPE_FREQS,
2402 LLM_TENSOR_ATTN_NORM,
2403 LLM_TENSOR_ATTN_Q,
2404 LLM_TENSOR_ATTN_K,
2405 LLM_TENSOR_ATTN_V,
2406 LLM_TENSOR_ATTN_OUT,
2407 LLM_TENSOR_FFN_NORM,
2408 // Dense FFN (layer 0 only)
2409 LLM_TENSOR_FFN_GATE,
2410 LLM_TENSOR_FFN_DOWN,
2411 LLM_TENSOR_FFN_UP,
2412 // MoE FFN (layers 1+)
2413 LLM_TENSOR_FFN_GATE_INP,
2414 LLM_TENSOR_FFN_GATE_EXPS,
2415 LLM_TENSOR_FFN_DOWN_EXPS,
2416 LLM_TENSOR_FFN_UP_EXPS,
2417 LLM_TENSOR_FFN_EXP_PROBS_B,
2418 // Shared experts
2419 LLM_TENSOR_FFN_GATE_SHEXP,
2420 LLM_TENSOR_FFN_DOWN_SHEXP,
2421 LLM_TENSOR_FFN_UP_SHEXP,
2422 // KDA (using SSM_ enum prefix, keeping GGUF names for backward compat)
2423 LLM_TENSOR_SSM_CONV1D_Q,
2424 LLM_TENSOR_SSM_CONV1D_K,
2425 LLM_TENSOR_SSM_CONV1D_V,
2426 LLM_TENSOR_SSM_F_A,
2427 LLM_TENSOR_SSM_F_B,
2428 LLM_TENSOR_SSM_BETA,
2429 LLM_TENSOR_SSM_A,
2430 LLM_TENSOR_SSM_G_A,
2431 LLM_TENSOR_SSM_G_B,
2432 LLM_TENSOR_SSM_DT,
2433 LLM_TENSOR_SSM_NORM,
2434 // MLA
2435 LLM_TENSOR_ATTN_Q_A,
2436 LLM_TENSOR_ATTN_Q_B,
2437 LLM_TENSOR_ATTN_Q_A_NORM,
2438 LLM_TENSOR_ATTN_KV_A_MQA,
2439 LLM_TENSOR_ATTN_KV_B,
2440 LLM_TENSOR_ATTN_K_B,
2441 LLM_TENSOR_ATTN_V_B,
2442 LLM_TENSOR_ATTN_KV_A_NORM,
2443 };
2444 default:
2445 GGML_ABORT("unknown architecture for tensor mapping");
2446 }
2447}
2448
2449// declare information about the model weight tensors:
2450// - the layer in which the tensor is going to be used. this is needed in order to assign the correct buffer type for the weight
2451// - the operator which is going to use the weight. this is needed to determine if the respective backend supports the operator
2452//
2453// for example, input layers are usually assigned to CPU/host buffer types
2454//
2455// a mismatch between the declared information and the actual layer/op in which the tensor is used can lead to sub-optimal
2456// assignment of the buffer types and extra overhead during computation
2457// example: https://github.com/ggml-org/llama.cpp/pull/17548
2458//
2459static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
2460 {LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2461 {LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2462 {LLM_TENSOR_TOKEN_TYPES, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2463 {LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, GGML_OP_MUL}},
2464 {LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2465 {LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2466 {LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2467 {LLM_TENSOR_DENSE_2_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
2468 {LLM_TENSOR_DENSE_3_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
2469 {LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2470 {LLM_TENSOR_OUTPUT_NORM_LFM2, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2471 {LLM_TENSOR_DEC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2472 {LLM_TENSOR_ENC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2473 {LLM_TENSOR_ROPE_FREQS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
2474 {LLM_TENSOR_ROPE_FACTORS_LONG, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
2475 {LLM_TENSOR_ROPE_FACTORS_SHORT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
2476 {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2477 {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2478 {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2479 {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2480 {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2481 {LLM_TENSOR_ATTN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2482 {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2483 {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2484 {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2485 {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2486 {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2487 {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2488 {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2489 {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2490 {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2491 {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2492 {LLM_TENSOR_ATTN_K_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2493 {LLM_TENSOR_ATTN_V_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2494 {LLM_TENSOR_ATTN_SINKS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SCALE}},
2495 {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2496 {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2497 {LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2498 {LLM_TENSOR_DEC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2499 {LLM_TENSOR_DEC_CROSS_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2500 {LLM_TENSOR_DEC_CROSS_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2501 {LLM_TENSOR_DEC_CROSS_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2502 {LLM_TENSOR_DEC_CROSS_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2503 {LLM_TENSOR_DEC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2504 {LLM_TENSOR_DEC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2505 {LLM_TENSOR_DEC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2506 {LLM_TENSOR_ENC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2507 {LLM_TENSOR_ENC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2508 {LLM_TENSOR_ENC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2509 {LLM_TENSOR_ENC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2510 {LLM_TENSOR_ENC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2511 {LLM_TENSOR_ENC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2512 {LLM_TENSOR_ENC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2513 {LLM_TENSOR_FFN_GATE_INP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2514 {LLM_TENSOR_FFN_GATE_INP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2515 {LLM_TENSOR_SSM_IN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2516 {LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2517 {LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2518 {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2519 {LLM_TENSOR_SSM_ALPHA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2520 {LLM_TENSOR_SSM_BETA_ALPHA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2521 {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2522 {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2523 {LLM_TENSOR_TIME_MIX_A1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2524 {LLM_TENSOR_TIME_MIX_A2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2525 {LLM_TENSOR_TIME_MIX_V1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2526 {LLM_TENSOR_TIME_MIX_V2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2527 {LLM_TENSOR_TIME_MIX_G1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2528 {LLM_TENSOR_TIME_MIX_G2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2529 {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2530 {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2531 {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2532 {LLM_TENSOR_TIME_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2533 {LLM_TENSOR_TIME_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2534 {LLM_TENSOR_TIME_MIX_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2535 {LLM_TENSOR_TIME_MIX_OUTPUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2536 {LLM_TENSOR_CHANNEL_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2537 {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2538 {LLM_TENSOR_CHANNEL_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2539 {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
2540 {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2541 {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
2542 {LLM_TENSOR_SSM_A_NOSCAN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, // a version of SSM_A used for MUL instead of SSM_SCAN
2543 {LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2544 {LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2545 {LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2546 {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2547 {LLM_TENSOR_SSM_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2548 // Kimi KDA - Conv tensors are 4D [d_conv, 1, d_inner, 1], reshaped to 2D at runtime
2549 {LLM_TENSOR_SSM_CONV1D_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2550 {LLM_TENSOR_SSM_CONV1D_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2551 {LLM_TENSOR_SSM_CONV1D_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2552 {LLM_TENSOR_SSM_F_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2553 {LLM_TENSOR_SSM_F_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2554 {LLM_TENSOR_SSM_BETA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2555 {LLM_TENSOR_SSM_G_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2556 {LLM_TENSOR_SSM_G_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2557 {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2558 {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2559 {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2560 {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2561 {LLM_TENSOR_TIME_MIX_K_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2562 {LLM_TENSOR_TIME_MIX_K_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2563 {LLM_TENSOR_TIME_MIX_R_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2564 {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2565 {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2566 {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2567 {LLM_TENSOR_TIME_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2568 {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2569 {LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2570 {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2571 {LLM_TENSOR_TIME_MIX_W0, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2572 {LLM_TENSOR_TIME_MIX_A0, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2573 {LLM_TENSOR_TIME_MIX_V0, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2574 {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_RWKV_WKV6}},
2575 {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2576 {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2577 {LLM_TENSOR_ATTN_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2578 {LLM_TENSOR_ATTN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2579 {LLM_TENSOR_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2580 {LLM_TENSOR_FFN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2581 {LLM_TENSOR_FFN_NORM_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2582 {LLM_TENSOR_ATTN_Q_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2583 {LLM_TENSOR_ATTN_K_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2584 {LLM_TENSOR_LAYER_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2585 {LLM_TENSOR_ATTN_Q_A_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2586 {LLM_TENSOR_ATTN_KV_A_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2587 {LLM_TENSOR_ATTN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2588 {LLM_TENSOR_FFN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2589 {LLM_TENSOR_DEC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2590 {LLM_TENSOR_DEC_CROSS_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2591 {LLM_TENSOR_DEC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2592 {LLM_TENSOR_ENC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2593 {LLM_TENSOR_ENC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2594 {LLM_TENSOR_DEC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
2595 {LLM_TENSOR_ENC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
2596 {LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2597 {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2598 {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2599 {LLM_TENSOR_FFN_DOWN_CHEXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2600 {LLM_TENSOR_FFN_GATE_CHEXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2601 {LLM_TENSOR_FFN_UP_CHEXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2602 {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2603 // altup / laurel (gemma 3n)
2604 {LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2605 {LLM_TENSOR_PER_LAYER_MODEL_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2606 {LLM_TENSOR_PER_LAYER_PROJ_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2607 {LLM_TENSOR_ALTUP_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2608 {LLM_TENSOR_ALTUP_UNEMBD_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2609 {LLM_TENSOR_PER_LAYER_INP_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2610 {LLM_TENSOR_PER_LAYER_PROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2611 {LLM_TENSOR_PER_LAYER_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2612 {LLM_TENSOR_ALTUP_CORRECT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2613 {LLM_TENSOR_ALTUP_CORRECT_SCALE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2614 {LLM_TENSOR_ALTUP_PREDICT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2615 {LLM_TENSOR_ALTUP_ROUTER, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2616 {LLM_TENSOR_ALTUP_ROUTER_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2617 {LLM_TENSOR_LAUREL_L, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2618 {LLM_TENSOR_LAUREL_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2619 {LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2620 // this tensor is loaded for T5, but never used
2621 {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
2622 {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, GGML_OP_IM2COL}},
2623 {LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2624 {LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2625 {LLM_TENSOR_POS_NET_NORM2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2626 {LLM_TENSOR_POS_NET_CONV1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
2627 {LLM_TENSOR_POS_NET_CONV2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
2628 {LLM_TENSOR_POS_NET_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2629 {LLM_TENSOR_POS_NET_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2630 {LLM_TENSOR_POS_NET_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2631 {LLM_TENSOR_POS_NET_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2632 {LLM_TENSOR_POS_NET_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2633 {LLM_TENSOR_CONVNEXT_DW, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
2634 {LLM_TENSOR_CONVNEXT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2635 {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2636 {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2637 {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2638 {LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2639 {LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2640 {LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2641 {LLM_TENSOR_VISEXP_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2642 {LLM_TENSOR_VISEXP_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2643 {LLM_TENSOR_VISEXP_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2644 {LLM_TENSOR_VISEXP_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2645 {LLM_TENSOR_VISEXP_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2646 // NextN/MTP tensors are currently ignored (reserved for future MTP support)
2647 // These tensors only exist in the last layer(s) and are treated as output tensors
2648 {LLM_TENSOR_NEXTN_EH_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2649 {LLM_TENSOR_NEXTN_EMBED_TOKENS, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2650 {LLM_TENSOR_NEXTN_ENORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2651 {LLM_TENSOR_NEXTN_HNORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2652 {LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2653 {LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2654};
2655
2656LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
2657
2658std::string LLM_KV::operator()(llm_kv kv) const {
2659 std::string name = ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
2660
2661 if (suffix != nullptr) {
2662 name += ".";
2663 name += suffix;
2664 }
2665
2666 return name;
2667}
2668
2669LLM_TN_IMPL::LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid)
2670 : arch(arch), tensor(tensor), suffix(suffix), bid(bid), xid(xid),
2671 model_tensors(llm_get_tensor_names(arch)) {}
2672
2673std::string LLM_TN_IMPL::str() const {
2674 if (LLM_TENSOR_NAMES.find(tensor) == LLM_TENSOR_NAMES.end()) {
2675 GGML_ABORT("unknown tensor name for tensor id %d", static_cast<int>(tensor));
2676 }
2677
2678 if (model_tensors.find(tensor) == model_tensors.end()) {
2679 return LLM_TENSOR_NAMES.at(tensor);
2680 }
2681
2682 std::string name = ::format(LLM_TENSOR_NAMES.at(tensor), bid, xid);
2683 if (suffix != nullptr) {
2684 name += ".";
2685 name += suffix;
2686 }
2687
2688 return name;
2689}
2690
2691const char * llm_arch_name(llm_arch arch) {
2692 auto it = LLM_ARCH_NAMES.find(arch);
2693 if (it == LLM_ARCH_NAMES.end()) {
2694 return "unknown";
2695 }
2696 return it->second;
2697}
2698
2699llm_arch llm_arch_from_string(const std::string & name) {
2700 for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
2701 if (kv.second == name) {
2702 return kv.first;
2703 }
2704 }
2705
2706 return LLM_ARCH_UNKNOWN;
2707}
2708
2709const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
2710 return LLM_TENSOR_INFOS.at(tensor);
2711}
2712
2713bool llm_arch_is_recurrent(const llm_arch & arch) {
2714 switch (arch) {
2715 case LLM_ARCH_MAMBA:
2716 case LLM_ARCH_MAMBA2:
2717 case LLM_ARCH_RWKV6:
2718 case LLM_ARCH_RWKV6QWEN2:
2719 case LLM_ARCH_RWKV7:
2720 case LLM_ARCH_ARWKV7:
2721 return true;
2722 default:
2723 return false;
2724 }
2725}
2726
2727bool llm_arch_is_hybrid(const llm_arch & arch) {
2728 switch (arch) {
2729 case LLM_ARCH_JAMBA:
2730 case LLM_ARCH_FALCON_H1:
2731 case LLM_ARCH_PLAMO2:
2732 case LLM_ARCH_GRANITE_HYBRID:
2733 case LLM_ARCH_LFM2:
2734 case LLM_ARCH_LFM2MOE:
2735 case LLM_ARCH_NEMOTRON_H:
2736 case LLM_ARCH_NEMOTRON_H_MOE:
2737 case LLM_ARCH_QWEN3NEXT:
2738 case LLM_ARCH_KIMI_LINEAR:
2739 case LLM_ARCH_QWEN35:
2740 case LLM_ARCH_QWEN35MOE:
2741 return true;
2742 default:
2743 return false;
2744 }
2745}
2746
2747bool llm_arch_is_diffusion(const llm_arch & arch) {
2748 switch (arch) {
2749 case LLM_ARCH_DREAM:
2750 case LLM_ARCH_LLADA:
2751 case LLM_ARCH_LLADA_MOE:
2752 case LLM_ARCH_RND1:
2753 return true;
2754 default:
2755 return false;
2756 }
2757}