1from __future__ import annotations
2
3from enum import Enum, IntEnum, auto
4from typing import Any
5
6#
7# constants
8#
9
10GGUF_MAGIC = 0x46554747 # "GGUF"
11GGUF_VERSION = 3
12GGUF_DEFAULT_ALIGNMENT = 32
13GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h
14
15#
16# metadata keys
17#
18
19
20class Keys:
21 class General:
22 TYPE = "general.type"
23 ARCHITECTURE = "general.architecture"
24 QUANTIZATION_VERSION = "general.quantization_version"
25 ALIGNMENT = "general.alignment"
26 FILE_TYPE = "general.file_type"
27
28 # Recommended Sampler Parameters
29 SAMPLING_SEQUENCE = "general.sampling.sequence"
30 SAMPLING_TOP_K = "general.sampling.top_k"
31 SAMPLING_TOP_P = "general.sampling.top_p"
32 SAMPLING_MIN_P = "general.sampling.min_p"
33 SAMPLING_XTC_PROBABILITY = "general.sampling.xtc_probability"
34 SAMPLING_XTC_THRESHOLD = "general.sampling.xtc_threshold"
35 SAMPLING_TEMP = "general.sampling.temp"
36 SAMPLING_PENALTY_LAST_N = "general.sampling.penalty_last_n"
37 SAMPLING_PENALTY_REPEAT = "general.sampling.penalty_repeat"
38 SAMPLING_MIROSTAT = "general.sampling.mirostat"
39 SAMPLING_MIROSTAT_TAU = "general.sampling.mirostat_tau"
40 SAMPLING_MIROSTAT_ETA = "general.sampling.mirostat_eta"
41
42 # Authorship Metadata
43 NAME = "general.name"
44 AUTHOR = "general.author"
45 VERSION = "general.version"
46 ORGANIZATION = "general.organization"
47
48 FINETUNE = "general.finetune"
49 BASENAME = "general.basename"
50
51 DESCRIPTION = "general.description"
52 QUANTIZED_BY = "general.quantized_by"
53
54 SIZE_LABEL = "general.size_label"
55
56 # Licensing details
57 LICENSE = "general.license"
58 LICENSE_NAME = "general.license.name"
59 LICENSE_LINK = "general.license.link"
60
61 # Typically represents the converted GGUF repo (Unless native)
62 URL = "general.url" # Model Website/Paper
63 DOI = "general.doi"
64 UUID = "general.uuid"
65 REPO_URL = "general.repo_url" # Model Source Repository (git/svn/etc...)
66
67 # Model Source during conversion
68 SOURCE_URL = "general.source.url" # Model Website/Paper
69 SOURCE_DOI = "general.source.doi"
70 SOURCE_UUID = "general.source.uuid"
71 SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...)
72
73 # Base Model Source. There can be more than one source if it's a merged
74 # model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
75 # tracing linage of models as it is finetuned or merged over time.
76 BASE_MODEL_COUNT = "general.base_model.count"
77 BASE_MODEL_NAME = "general.base_model.{id}.name"
78 BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
79 BASE_MODEL_VERSION = "general.base_model.{id}.version"
80 BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
81 BASE_MODEL_DESCRIPTION = "general.base_model.{id}.description"
82 BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
83 BASE_MODEL_DOI = "general.base_model.{id}.doi"
84 BASE_MODEL_UUID = "general.base_model.{id}.uuid"
85 BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
86
87 # Dataset Source
88 DATASET_COUNT = "general.dataset.count"
89 DATASET_NAME = "general.dataset.{id}.name"
90 DATASET_AUTHOR = "general.dataset.{id}.author"
91 DATASET_VERSION = "general.dataset.{id}.version"
92 DATASET_ORGANIZATION = "general.dataset.{id}.organization"
93 DATASET_DESCRIPTION = "general.dataset.{id}.description"
94 DATASET_URL = "general.dataset.{id}.url" # Model Website/Paper
95 DATASET_DOI = "general.dataset.{id}.doi"
96 DATASET_UUID = "general.dataset.{id}.uuid"
97 DATASET_REPO_URL = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
98
99 # Array based KV stores
100 TAGS = "general.tags"
101 LANGUAGES = "general.languages"
102
103 class LLM:
104 VOCAB_SIZE = "{arch}.vocab_size"
105 CONTEXT_LENGTH = "{arch}.context_length"
106 EMBEDDING_LENGTH = "{arch}.embedding_length"
107 EMBEDDING_LENGTH_OUT = "{arch}.embedding_length_out"
108 FEATURES_LENGTH = "{arch}.features_length"
109 BLOCK_COUNT = "{arch}.block_count"
110 LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
111 FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
112 EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length"
113 EXPERT_SHARED_FEED_FORWARD_LENGTH = "{arch}.expert_shared_feed_forward_length"
114 EXPERT_CHUNK_FEED_FORWARD_LENGTH = "{arch}.expert_chunk_feed_forward_length"
115 USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
116 TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
117 EXPERT_COUNT = "{arch}.expert_count"
118 EXPERT_USED_COUNT = "{arch}.expert_used_count"
119 EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
120 EXPERT_GROUP_COUNT = "{arch}.expert_group_count"
121 EXPERT_GROUP_USED_COUNT = "{arch}.expert_group_used_count"
122 EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
123 EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
124 EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
125 EXPERT_GROUP_SCALE = "{arch}.expert_group_scale"
126 EXPERTS_PER_GROUP = "{arch}.experts_per_group"
127 MOE_EVERY_N_LAYERS = "{arch}.moe_every_n_layers"
128 NEXTN_PREDICT_LAYERS = "{arch}.nextn_predict_layers"
129 NUM_DEEPSTACK_LAYERS = "{arch}.n_deepstack_layers"
130 POOLING_TYPE = "{arch}.pooling_type"
131 LOGIT_SCALE = "{arch}.logit_scale"
132 DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
133 DECODER_BLOCK_COUNT = "{arch}.decoder_block_count"
134 ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
135 ROUTER_LOGIT_SOFTCAPPING = "{arch}.router_logit_softcapping"
136 FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
137 SWIN_NORM = "{arch}.swin_norm"
138 RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
139 TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
140 TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
141 RESIDUAL_SCALE = "{arch}.residual_scale"
142 EMBEDDING_SCALE = "{arch}.embedding_scale"
143 TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
144 INTERLEAVE_MOE_LAYER_STEP = "{arch}.interleave_moe_layer_step"
145 FULL_ATTENTION_INTERVAL = "{arch}.full_attention_interval"
146 ACTIVATION_SPARSITY_SCALE = "{arch}.activation_sparsity_scale"
147 ALTUP_ACTIVE_IDX = "{arch}.altup.active_idx"
148 ALTUP_NUM_INPUTS = "{arch}.altup.num_inputs"
149 EMBD_LENGTH_PER_LAYER_INP = "{arch}.embedding_length_per_layer_input"
150 SWIGLU_CLAMP_EXP = "{arch}.swiglu_clamp_exp"
151 SWIGLU_CLAMP_SHEXP = "{arch}.swiglu_clamp_shexp"
152 DENSE_FEAT_IN_SIZE = "{arch}.{dense}_feat_in"
153 DENSE_FEAT_OUT_SIZE = "{arch}.{dense}_feat_out"
154
155 class Attention:
156 HEAD_COUNT = "{arch}.attention.head_count"
157 HEAD_COUNT_KV = "{arch}.attention.head_count_kv"
158 MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias"
159 CLAMP_KQV = "{arch}.attention.clamp_kqv"
160 KEY_LENGTH = "{arch}.attention.key_length"
161 VALUE_LENGTH = "{arch}.attention.value_length"
162 LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
163 LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
164 GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
165 GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
166 CAUSAL = "{arch}.attention.causal"
167 Q_LORA_RANK = "{arch}.attention.q_lora_rank"
168 KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
169 DECAY_LORA_RANK = "{arch}.attention.decay_lora_rank"
170 ICLR_LORA_RANK = "{arch}.attention.iclr_lora_rank"
171 VALUE_RESIDUAL_MIX_LORA_RANK = "{arch}.attention.value_residual_mix_lora_rank"
172 GATE_LORA_RANK = "{arch}.attention.gate_lora_rank"
173 REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
174 SLIDING_WINDOW = "{arch}.attention.sliding_window"
175 SCALE = "{arch}.attention.scale"
176 OUTPUT_SCALE = "{arch}.attention.output_scale"
177 TEMPERATURE_LENGTH = "{arch}.attention.temperature_length"
178 KEY_LENGTH_MLA = "{arch}.attention.key_length_mla"
179 VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
180 SHARED_KV_LAYERS = "{arch}.attention.shared_kv_layers"
181 SLIDING_WINDOW_PATTERN = "{arch}.attention.sliding_window_pattern"
182 TEMPERATURE_SCALE = "{arch}.attention.temperature_scale"
183
184 class Rope:
185 DIMENSION_COUNT = "{arch}.rope.dimension_count"
186 DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
187 FREQ_BASE = "{arch}.rope.freq_base"
188 FREQ_BASE_SWA = "{arch}.rope.freq_base_swa"
189 SCALING_TYPE = "{arch}.rope.scaling.type"
190 SCALING_FACTOR = "{arch}.rope.scaling.factor"
191 SCALING_ATTN_FACTOR = "{arch}.rope.scaling.attn_factor"
192 SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
193 SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
194 SCALING_YARN_LOG_MUL = "{arch}.rope.scaling.yarn_log_multiplier"
195 SCALING_YARN_EXT_FACTOR = "{arch}.rope.scaling.yarn_ext_factor"
196 SCALING_YARN_ATTN_FACTOR = "{arch}.rope.scaling.yarn_attn_factor"
197 SCALING_YARN_BETA_FAST = "{arch}.rope.scaling.yarn_beta_fast"
198 SCALING_YARN_BETA_SLOW = "{arch}.rope.scaling.yarn_beta_slow"
199
200 class Split:
201 LLM_KV_SPLIT_NO = "split.no"
202 LLM_KV_SPLIT_COUNT = "split.count"
203 LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count"
204
205 class SSM:
206 CONV_KERNEL = "{arch}.ssm.conv_kernel"
207 INNER_SIZE = "{arch}.ssm.inner_size"
208 STATE_SIZE = "{arch}.ssm.state_size"
209 TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
210 GROUP_COUNT = "{arch}.ssm.group_count"
211 DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
212
213 class KDA:
214 HEAD_DIM = "{arch}.kda.head_dim"
215
216 class WKV:
217 HEAD_SIZE = "{arch}.wkv.head_size"
218
219 class PosNet:
220 EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
221 BLOCK_COUNT = "{arch}.posnet.block_count"
222
223 class ConvNext:
224 EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
225 BLOCK_COUNT = "{arch}.convnext.block_count"
226
227 class Classifier:
228 OUTPUT_LABELS = "{arch}.classifier.output_labels"
229
230 class ShortConv:
231 L_CACHE = "{arch}.shortconv.l_cache"
232
233 class Tokenizer:
234 MODEL = "tokenizer.ggml.model"
235 PRE = "tokenizer.ggml.pre"
236 LIST = "tokenizer.ggml.tokens"
237 TOKEN_TYPE = "tokenizer.ggml.token_type"
238 TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
239 SCORES = "tokenizer.ggml.scores"
240 MERGES = "tokenizer.ggml.merges"
241 BOS_ID = "tokenizer.ggml.bos_token_id"
242 EOS_ID = "tokenizer.ggml.eos_token_id"
243 EOT_ID = "tokenizer.ggml.eot_token_id"
244 EOM_ID = "tokenizer.ggml.eom_token_id"
245 UNK_ID = "tokenizer.ggml.unknown_token_id"
246 SEP_ID = "tokenizer.ggml.seperator_token_id"
247 PAD_ID = "tokenizer.ggml.padding_token_id"
248 MASK_ID = "tokenizer.ggml.mask_token_id"
249 ADD_BOS = "tokenizer.ggml.add_bos_token"
250 ADD_EOS = "tokenizer.ggml.add_eos_token"
251 ADD_SEP = "tokenizer.ggml.add_sep_token"
252 ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
253 REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces"
254 PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap"
255 HF_JSON = "tokenizer.huggingface.json"
256 RWKV = "tokenizer.rwkv.world"
257 CHAT_TEMPLATE = "tokenizer.chat_template"
258 CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
259 CHAT_TEMPLATES = "tokenizer.chat_templates"
260 # FIM/Infill special tokens constants
261 FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
262 FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
263 FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
264 FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
265 FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
266 FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
267 # deprecated:
268 PREFIX_ID = "tokenizer.ggml.prefix_token_id"
269 SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
270 MIDDLE_ID = "tokenizer.ggml.middle_token_id"
271
272 class Adapter:
273 TYPE = "adapter.type"
274 LORA_ALPHA = "adapter.lora.alpha"
275 LORA_TASK_NAME = "adapter.lora.task_name"
276 LORA_PROMPT_PREFIX = "adapter.lora.prompt_prefix"
277 ALORA_INVOCATION_TOKENS = "adapter.alora.invocation_tokens"
278
279 class IMatrix:
280 CHUNK_COUNT = "imatrix.chunk_count"
281 CHUNK_SIZE = "imatrix.chunk_size"
282 DATASETS = "imatrix.datasets"
283
284 class Clip:
285 PROJECTOR_TYPE = "clip.projector_type"
286 HAS_VISION_ENCODER = "clip.has_vision_encoder"
287 HAS_AUDIO_ENCODER = "clip.has_audio_encoder"
288 HAS_LLAVA_PROJECTOR = "clip.has_llava_projector"
289
290 class ClipVision:
291 PROJECTOR_TYPE = "clip.vision.projector_type" # for mixed modality models
292 IMAGE_SIZE = "clip.vision.image_size"
293 IMAGE_MIN_PIXELS = "clip.vision.image_min_pixels"
294 IMAGE_MAX_PIXELS = "clip.vision.image_max_pixels"
295 PREPROC_IMAGE_SIZE = "clip.vision.preproc_image_size"
296 PATCH_SIZE = "clip.vision.patch_size"
297 EMBEDDING_LENGTH = "clip.vision.embedding_length"
298 FEED_FORWARD_LENGTH = "clip.vision.feed_forward_length"
299 PROJECTION_DIM = "clip.vision.projection_dim"
300 BLOCK_COUNT = "clip.vision.block_count"
301 IMAGE_MEAN = "clip.vision.image_mean"
302 IMAGE_STD = "clip.vision.image_std"
303 SPATIAL_MERGE_SIZE = "clip.vision.spatial_merge_size"
304 USE_GELU = "clip.use_gelu"
305 USE_SILU = "clip.use_silu"
306 N_WA_PATTERN = "clip.vision.n_wa_pattern" # used by qwen2.5vl
307 WA_LAYER_INDEXES = "clip.vision.wa_layer_indexes" # used by youtuvl
308 IS_DEEPSTACK_LAYERS = "clip.vision.is_deepstack_layers"
309 WINDOW_SIZE = "clip.vision.window_size"
310
311 class Attention:
312 HEAD_COUNT = "clip.vision.attention.head_count"
313 LAYERNORM_EPS = "clip.vision.attention.layer_norm_epsilon"
314
315 class Projector:
316 SCALE_FACTOR = "clip.vision.projector.scale_factor"
317
318 class ClipAudio:
319 PROJECTOR_TYPE = "clip.audio.projector_type" # for mixed modality models
320 NUM_MEL_BINS = "clip.audio.num_mel_bins"
321 EMBEDDING_LENGTH = "clip.audio.embedding_length"
322 FEED_FORWARD_LENGTH = "clip.audio.feed_forward_length"
323 PROJECTION_DIM = "clip.audio.projection_dim"
324 BLOCK_COUNT = "clip.audio.block_count"
325
326 class Attention:
327 HEAD_COUNT = "clip.audio.attention.head_count"
328 LAYERNORM_EPS = "clip.audio.attention.layer_norm_epsilon"
329
330 class Projector:
331 STACK_FACTOR = "clip.audio.projector.stack_factor"
332
333 class Diffusion:
334 SHIFT_LOGITS = "diffusion.shift_logits"
335
336 class xIELU:
337 ALPHA_P = "xielu.alpha_p"
338 ALPHA_N = "xielu.alpha_n"
339 BETA = "xielu.beta"
340 EPS = "xielu.eps"
341
342
343#
344# recommended mapping of model tensor names for storage in gguf
345#
346
347
348class GGUFType:
349 MODEL = "model"
350 ADAPTER = "adapter"
351 IMATRIX = "imatrix"
352 MMPROJ = "mmproj" # dummy, unused for now
353
354
355class MODEL_ARCH(IntEnum):
356 MMPROJ = auto() # dummy arch for clip.cpp
357 LLAMA = auto()
358 LLAMA4 = auto()
359 DECI = auto()
360 FALCON = auto()
361 FALCON_H1 = auto()
362 BAICHUAN = auto()
363 GROK = auto()
364 GPT2 = auto()
365 GPTJ = auto()
366 GPTNEOX = auto()
367 MPT = auto()
368 STARCODER = auto()
369 REFACT = auto()
370 BERT = auto()
371 MODERN_BERT = auto()
372 NOMIC_BERT = auto()
373 NOMIC_BERT_MOE = auto()
374 NEO_BERT = auto()
375 JINA_BERT_V2 = auto()
376 JINA_BERT_V3 = auto()
377 BLOOM = auto()
378 STABLELM = auto()
379 QWEN = auto()
380 QWEN2 = auto()
381 QWEN2MOE = auto()
382 QWEN2VL = auto()
383 QWEN3 = auto()
384 QWEN3MOE = auto()
385 QWEN3NEXT = auto()
386 QWEN3VL = auto()
387 QWEN3VLMOE = auto()
388 QWEN35 = auto()
389 QWEN35MOE = auto()
390 PHI2 = auto()
391 PHI3 = auto()
392 PHIMOE = auto()
393 PLAMO = auto()
394 PLAMO2 = auto()
395 PLAMO3 = auto()
396 CODESHELL = auto()
397 ORION = auto()
398 INTERNLM2 = auto()
399 MINICPM = auto()
400 MINICPM3 = auto()
401 GEMMA = auto()
402 GEMMA2 = auto()
403 GEMMA3 = auto()
404 GEMMA3N = auto()
405 GEMMA_EMBEDDING = auto()
406 STARCODER2 = auto()
407 RWKV6 = auto()
408 RWKV6QWEN2 = auto()
409 RWKV7 = auto()
410 ARWKV7 = auto()
411 MAMBA = auto()
412 MAMBA2 = auto()
413 JAMBA = auto()
414 XVERSE = auto()
415 COMMAND_R = auto()
416 COHERE2 = auto()
417 DBRX = auto()
418 OLMO = auto()
419 OLMO2 = auto()
420 OLMOE = auto()
421 OPENELM = auto()
422 ARCTIC = auto()
423 DEEPSEEK = auto()
424 DEEPSEEK2 = auto()
425 CHATGLM = auto()
426 GLM4 = auto()
427 GLM4_MOE = auto()
428 BITNET = auto()
429 T5 = auto()
430 T5ENCODER = auto()
431 JAIS = auto()
432 NEMOTRON = auto()
433 NEMOTRON_H = auto()
434 NEMOTRON_H_MOE = auto()
435 EXAONE = auto()
436 EXAONE4 = auto()
437 EXAONE_MOE = auto()
438 GRANITE = auto()
439 GRANITE_MOE = auto()
440 GRANITE_HYBRID = auto()
441 CHAMELEON = auto()
442 WAVTOKENIZER_DEC = auto()
443 PLM = auto()
444 BAILINGMOE = auto()
445 BAILINGMOE2 = auto()
446 DOTS1 = auto()
447 ARCEE = auto()
448 AFMOE = auto()
449 ERNIE4_5 = auto()
450 ERNIE4_5_MOE = auto()
451 HUNYUAN_MOE = auto()
452 HUNYUAN_DENSE = auto()
453 SMOLLM3 = auto()
454 GPT_OSS = auto()
455 LFM2 = auto()
456 LFM2MOE = auto()
457 DREAM = auto()
458 SMALLTHINKER = auto()
459 LLADA = auto()
460 LLADA_MOE = auto()
461 SEED_OSS = auto()
462 GROVEMOE = auto()
463 APERTUS = auto()
464 COGVLM = auto()
465 MINIMAXM2 = auto()
466 RND1 = auto()
467 PANGU_EMBED = auto()
468 MISTRAL3 = auto()
469 MIMO2 = auto()
470 STEP35 = auto()
471 LLAMA_EMBED = auto()
472 MAINCODER = auto()
473 KIMI_LINEAR = auto()
474
475
476class VISION_PROJECTOR_TYPE(IntEnum):
477 MLP = auto()
478 LDP = auto()
479 LDPV2 = auto()
480 RESAMPLER = auto()
481 GLM_EDGE = auto()
482 MERGER = auto()
483 GEMMA3N = auto()
484 GEMMA3 = auto()
485 QWEN3VL = auto()
486 COGVLM = auto()
487
488
489class MODEL_TENSOR(IntEnum):
490 TOKEN_EMBD = auto()
491 TOKEN_EMBD_NORM = auto()
492 TOKEN_TYPES = auto()
493 POS_EMBD = auto()
494 OUTPUT = auto()
495 DENSE_2_OUT = auto() # embeddinggemma 2_Dense
496 DENSE_3_OUT = auto() # embeddinggemma 3_Dense
497 OUTPUT_NORM = auto()
498 ROPE_FREQS = auto()
499 ROPE_FACTORS_LONG = auto()
500 ROPE_FACTORS_SHORT = auto()
501 ATTN_Q = auto()
502 ATTN_K = auto()
503 ATTN_V = auto()
504 ATTN_QKV = auto()
505 ATTN_OUT = auto()
506 ATTN_NORM = auto()
507 ATTN_NORM_2 = auto()
508 ATTN_OUT_NORM = auto()
509 ATTN_POST_NORM = auto()
510 ATTN_ROT_EMBD = auto()
511 ATTN_SINKS = auto()
512 ATTN_GATE = auto()
513 FFN_GATE_INP = auto()
514 FFN_GATE_INP_SHEXP = auto()
515 FFN_NORM = auto()
516 FFN_PRE_NORM = auto()
517 FFN_POST_NORM = auto()
518 FFN_GATE = auto()
519 FFN_DOWN = auto()
520 FFN_UP = auto()
521 FFN_ACT = auto()
522 FFN_NORM_EXP = auto()
523 FFN_GATE_EXP = auto()
524 FFN_DOWN_EXP = auto()
525 FFN_UP_EXP = auto()
526 FFN_GATE_SHEXP = auto()
527 FFN_DOWN_SHEXP = auto()
528 FFN_UP_SHEXP = auto()
529 FFN_GATE_CHEXP = auto()
530 FFN_DOWN_CHEXP = auto()
531 FFN_UP_CHEXP = auto()
532 FFN_EXP_PROBS_B = auto()
533 ATTN_Q_NORM = auto()
534 ATTN_K_NORM = auto()
535 LAYER_OUT_NORM = auto()
536 PER_LAYER_TOKEN_EMBD = auto() # gemma3n
537 PER_LAYER_MODEL_PROJ = auto() # gemma3n
538 PER_LAYER_INP_GATE = auto() # gemma3n
539 PER_LAYER_PROJ = auto() # gemma3n
540 PER_LAYER_PROJ_NORM = auto() # gemma3n
541 PER_LAYER_POST_NORM = auto() # gemma3n
542 ALTUP_PROJ = auto() # gemma3n
543 ALTUP_UNEMBD_PROJ = auto() # gemma3n
544 ALTUP_CORRECT_COEF = auto() # gemma3n
545 ALTUP_CORRECT_SCALE = auto() # gemma3n
546 ALTUP_PREDICT_COEF = auto() # gemma3n
547 ALTUP_ROUTER = auto() # gemma3n
548 ALTUP_ROUTER_NORM = auto() # gemma3n
549 LAUREL_L = auto() # gemma3n
550 LAUREL_R = auto() # gemma3n
551 LAUREL_POST_NORM = auto() # gemma3n
552 SSM_IN = auto()
553 SSM_CONV1D = auto()
554 SSM_X = auto()
555 SSM_DT = auto()
556 SSM_DT_NORM = auto()
557 SSM_A = auto()
558 SSM_B_NORM = auto()
559 SSM_C_NORM = auto()
560 SSM_D = auto()
561 SSM_NORM = auto()
562 SSM_OUT = auto()
563 SSM_ALPHA = auto() # qwen3.5
564 SSM_BETA_ALPHA = auto() # qwen3next
565 SSM_CONV1D_Q = auto() # Kimi Linear
566 SSM_CONV1D_K = auto() # Kimi Linear
567 SSM_CONV1D_V = auto() # Kimi Linear
568 SSM_F_A = auto() # Kimi Linear
569 SSM_F_B = auto() # Kimi Linear
570 SSM_BETA = auto() # Kimi Linear qwen3.5
571 SSM_G_A = auto() # Kimi Linear
572 SSM_G_B = auto() # Kimi Linear
573 TIME_MIX_W0 = auto()
574 TIME_MIX_W1 = auto()
575 TIME_MIX_W2 = auto()
576 TIME_MIX_A0 = auto()
577 TIME_MIX_A1 = auto()
578 TIME_MIX_A2 = auto()
579 TIME_MIX_V0 = auto()
580 TIME_MIX_V1 = auto()
581 TIME_MIX_V2 = auto()
582 TIME_MIX_G1 = auto()
583 TIME_MIX_G2 = auto()
584 TIME_MIX_K_K = auto()
585 TIME_MIX_K_A = auto()
586 TIME_MIX_R_K = auto()
587 TIME_MIX_LERP_X = auto()
588 TIME_MIX_LERP_K = auto()
589 TIME_MIX_LERP_V = auto()
590 TIME_MIX_LERP_R = auto()
591 TIME_MIX_LERP_G = auto()
592 TIME_MIX_LERP_FUSED = auto()
593 TIME_MIX_LERP_W = auto()
594 TIME_MIX_FIRST = auto()
595 TIME_MIX_DECAY = auto()
596 TIME_MIX_DECAY_W1 = auto()
597 TIME_MIX_DECAY_W2 = auto()
598 TIME_MIX_KEY = auto()
599 TIME_MIX_VALUE = auto()
600 TIME_MIX_RECEPTANCE = auto()
601 TIME_MIX_GATE = auto()
602 TIME_MIX_LN = auto()
603 TIME_MIX_OUTPUT = auto()
604 CHANNEL_MIX_LERP_K = auto()
605 CHANNEL_MIX_LERP_R = auto()
606 CHANNEL_MIX_KEY = auto()
607 CHANNEL_MIX_RECEPTANCE = auto()
608 CHANNEL_MIX_VALUE = auto()
609 ATTN_Q_A = auto()
610 ATTN_Q_B = auto()
611 ATTN_KV_A_MQA = auto()
612 ATTN_KV_B = auto()
613 ATTN_K_B = auto()
614 ATTN_V_B = auto()
615 ATTN_Q_A_NORM = auto()
616 ATTN_KV_A_NORM = auto()
617 FFN_SUB_NORM = auto()
618 ATTN_SUB_NORM = auto()
619 DEC_ATTN_NORM = auto()
620 DEC_ATTN_Q = auto()
621 DEC_ATTN_K = auto()
622 DEC_ATTN_V = auto()
623 DEC_ATTN_OUT = auto()
624 DEC_ATTN_REL_B = auto()
625 DEC_CROSS_ATTN_NORM = auto()
626 DEC_CROSS_ATTN_Q = auto()
627 DEC_CROSS_ATTN_K = auto()
628 DEC_CROSS_ATTN_V = auto()
629 DEC_CROSS_ATTN_OUT = auto()
630 DEC_CROSS_ATTN_REL_B = auto()
631 DEC_FFN_NORM = auto()
632 DEC_FFN_GATE = auto()
633 DEC_FFN_DOWN = auto()
634 DEC_FFN_UP = auto()
635 DEC_OUTPUT_NORM = auto()
636 ENC_ATTN_NORM = auto()
637 ENC_ATTN_Q = auto()
638 ENC_ATTN_K = auto()
639 ENC_ATTN_V = auto()
640 ENC_ATTN_OUT = auto()
641 ENC_ATTN_REL_B = auto()
642 ENC_FFN_NORM = auto()
643 ENC_FFN_GATE = auto()
644 ENC_FFN_DOWN = auto()
645 ENC_FFN_UP = auto()
646 ENC_OUTPUT_NORM = auto()
647 CLS = auto() # classifier
648 CLS_OUT = auto() # classifier output projection
649 CONV1D = auto()
650 CONVNEXT_DW = auto()
651 CONVNEXT_NORM = auto()
652 CONVNEXT_PW1 = auto()
653 CONVNEXT_PW2 = auto()
654 CONVNEXT_GAMMA = auto()
655 POSNET_CONV1 = auto()
656 POSNET_CONV2 = auto()
657 POSNET_NORM = auto()
658 POSNET_NORM1 = auto()
659 POSNET_NORM2 = auto()
660 POSNET_ATTN_NORM = auto()
661 POSNET_ATTN_Q = auto()
662 POSNET_ATTN_K = auto()
663 POSNET_ATTN_V = auto()
664 POSNET_ATTN_OUT = auto()
665 SHORTCONV_CONV = auto()
666 SHORTCONV_INPROJ = auto()
667 SHORTCONV_OUTPROJ = auto()
668 VISEXP_ATTN_QKV = auto()
669 VISEXP_ATTN_OUT = auto()
670 VISEXP_GATE = auto()
671 VISEXP_DOWN = auto()
672 VISEXP_UP = auto()
673 # vision
674 V_MMPROJ = auto()
675 V_MMPROJ_FC = auto()
676 V_MMPROJ_MLP = auto()
677 V_MMPROJ_PEG = auto()
678 V_ENC_EMBD_CLS = auto()
679 V_ENC_EMBD_PATCH = auto()
680 V_ENC_EMBD_NORM = auto()
681 V_ENC_EMBD_POS = auto()
682 V_ENC_INPUT_NORM = auto()
683 V_ENC_ATTN_QKV = auto()
684 V_ENC_ATTN_Q = auto()
685 V_ENC_ATTN_Q_NORM = auto()
686 V_ENC_ATTN_K = auto()
687 V_ENC_ATTN_K_NORM = auto()
688 V_ENC_ATTN_V = auto()
689 V_ENC_ATTN_O = auto()
690 V_ENC_ATTN_O_NORM = auto()
691 V_ENC_POST_ATTN_NORM = auto()
692 V_ENC_FFN_UP = auto()
693 V_ENC_FFN_GATE = auto()
694 V_ENC_FFN_DOWN = auto()
695 V_LAYER_SCALE_1 = auto()
696 V_LAYER_SCALE_2 = auto()
697 V_PRE_NORM = auto()
698 V_POST_NORM = auto()
699 V_MM_POST_NORM = auto()
700 V_MM_INP_NORM = auto()
701 V_MM_INP_PROJ = auto() # gemma3
702 V_MM_SOFT_EMB_NORM = auto() # gemma3
703 V_MM_EMBEDDING = auto() # gemma3n
704 V_MM_HARD_EMB_NORM = auto() # gemma3n
705 V_ENC_CONV_STEM = auto() # gemma3n
706 V_ENC_CONV_STEM_NORM = auto() # gemma3n
707 V_ENC_MSFA_EXP = auto() # gemma3n
708 V_ENC_MSFA_EXP_NORM = auto() # gemma3n
709 V_ENC_MSFA_PROJ = auto() # gemma3n
710 V_ENC_MSFA_PROJ_NORM = auto() # gemma3n
711 V_ENC_MSFA_NORM = auto() # gemma3n
712 V_RESMPL_POS_EMBD_K = auto() # minicpmv
713 V_RESMPL_ATTN_Q = auto() # minicpmv
714 V_RESMPL_ATTN_K = auto() # minicpmv
715 V_RESMPL_ATTN_V = auto() # minicpmv
716 V_RESMPL_ATTN_OUT = auto() # minicpmv
717 V_RESMPL_KV = auto() # minicpmv
718 V_RESMPL_KV_NORM = auto() # minicpmv
719 V_RESMPL_POST_NORM = auto() # minicpmv
720 V_RESMPL_Q_NORM = auto() # minicpmv
721 V_RESMPL_PROJ = auto() # minicpmv
722 V_RESMPL_QUERY = auto() # minicpmv
723 V_TOK_EMBD_IMG_BREAK = auto() # pixtral
724 V_MM_PATCH_MERGER = auto() # mistral small 3.1
725 V_DS_NORM = auto() # qwen3vl
726 V_DS_FC1 = auto() # qwen3vl
727 V_DS_FC2 = auto() # qwen3vl
728 V_MM_POST_FC_NORM = auto() # cogvlm
729 V_MM_UP = auto() # cogvlm
730 V_MM_DOWN = auto() # cogvlm
731 V_MM_GATE = auto() # cogvlm
732 V_TOK_BOI = auto() # cogvlm
733 V_TOK_EOI = auto() # cogvlm
734 # audio (mtmd)
735 A_ENC_EMBD_POS = auto()
736 A_ENC_EMBD_NORM = auto()
737 A_ENC_EMBD_TO_LOGITS = auto() # lfm2
738 A_ENC_CONV1D = auto()
739 A_ENC_CONV1D_NORM = auto() # gemma3n
740 A_PRE_NORM = auto()
741 A_POST_NORM = auto()
742 A_ENC_LAYER_PRE_NORM = auto() # gemma3n
743 A_ENC_ATTN_Q = auto()
744 A_ENC_ATTN_K = auto()
745 A_ENC_ATTN_V = auto()
746 A_ENC_PER_DIM_SCALE = auto() # gemma3n
747 A_ENC_INPUT_NORM = auto()
748 A_ENC_OUTPUT = auto()
749 A_ENC_OUTPUT_NORM = auto()
750 A_ENC_FFN_UP = auto()
751 A_ENC_FFN_NORM = auto()
752 A_ENC_FFN_POST_NORM = auto() # gemma3n
753 A_ENC_FFN_SCALE = auto() # gemma3n
754 A_ENC_FFN_GATE = auto()
755 A_ENC_FFN_DOWN = auto()
756 A_ENC_FFN_UP_1 = auto() # lfm2, gemma3n
757 A_ENC_FFN_NORM_1 = auto() # lfm2, gemma3n (pre-norm)
758 A_ENC_FFN_POST_NORM_1 = auto() # gemma3n
759 A_ENC_FFN_SCALE_1 = auto() # gemma3n
760 A_ENC_FFN_GATE_1 = auto() # lfm2, gemma3n
761 A_ENC_FFN_DOWN_1 = auto() # lfm2, gemma3n
762 A_MMPROJ = auto()
763 A_MMPROJ_FC = auto()
764 A_MM_NORM_PRE = auto()
765 A_MM_NORM_MID = auto()
766 A_MM_EMBEDDING = auto() # gemma3n
767 A_MM_HARD_EMB_NORM = auto() # gemma3n
768 A_MM_SOFT_EMB_NORM = auto() # gemma3n
769 A_MM_INP_PROJ = auto() # gemma3n
770 # nextn/mtp
771 NEXTN_EH_PROJ = auto()
772 NEXTN_EMBED_TOKENS = auto()
773 NEXTN_ENORM = auto()
774 NEXTN_HNORM = auto()
775 NEXTN_SHARED_HEAD_HEAD = auto()
776 NEXTN_SHARED_HEAD_NORM = auto()
777 # lfm2 audio
778 A_ENC_NORM_CONV = auto()
779 A_ENC_LINEAR_POS = auto()
780 A_ENC_POS_BIAS_U = auto()
781 A_ENC_POS_BIAS_V = auto()
782 A_ENC_OUT = auto()
783 A_ENC_CONV_DW = auto() # SSM conv
784 A_ENC_CONV_NORM = auto() # SSM conv
785 A_ENC_CONV_PW1 = auto()
786 A_ENC_CONV_PW2 = auto()
787
788
789MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
790 MODEL_ARCH.MMPROJ: "clip", # dummy arch for clip.cpp
791 MODEL_ARCH.LLAMA: "llama",
792 MODEL_ARCH.LLAMA4: "llama4",
793 MODEL_ARCH.DECI: "deci",
794 MODEL_ARCH.FALCON: "falcon",
795 MODEL_ARCH.BAICHUAN: "baichuan",
796 MODEL_ARCH.GROK: "grok",
797 MODEL_ARCH.GPT2: "gpt2",
798 MODEL_ARCH.GPTJ: "gptj",
799 MODEL_ARCH.GPTNEOX: "gptneox",
800 MODEL_ARCH.MPT: "mpt",
801 MODEL_ARCH.STARCODER: "starcoder",
802 MODEL_ARCH.REFACT: "refact",
803 MODEL_ARCH.BERT: "bert",
804 MODEL_ARCH.MODERN_BERT: "modern-bert",
805 MODEL_ARCH.NOMIC_BERT: "nomic-bert",
806 MODEL_ARCH.NOMIC_BERT_MOE: "nomic-bert-moe",
807 MODEL_ARCH.NEO_BERT: "neo-bert",
808 MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
809 MODEL_ARCH.JINA_BERT_V3: "jina-bert-v3",
810 MODEL_ARCH.BLOOM: "bloom",
811 MODEL_ARCH.STABLELM: "stablelm",
812 MODEL_ARCH.QWEN: "qwen",
813 MODEL_ARCH.QWEN2: "qwen2",
814 MODEL_ARCH.QWEN2MOE: "qwen2moe",
815 MODEL_ARCH.QWEN2VL: "qwen2vl",
816 MODEL_ARCH.QWEN3: "qwen3",
817 MODEL_ARCH.QWEN3MOE: "qwen3moe",
818 MODEL_ARCH.QWEN3NEXT: "qwen3next",
819 MODEL_ARCH.QWEN3VL: "qwen3vl",
820 MODEL_ARCH.QWEN3VLMOE: "qwen3vlmoe",
821 MODEL_ARCH.QWEN35: "qwen35",
822 MODEL_ARCH.QWEN35MOE: "qwen35moe",
823 MODEL_ARCH.PHI2: "phi2",
824 MODEL_ARCH.PHI3: "phi3",
825 MODEL_ARCH.PHIMOE: "phimoe",
826 MODEL_ARCH.PLAMO: "plamo",
827 MODEL_ARCH.PLAMO2: "plamo2",
828 MODEL_ARCH.PLAMO3: "plamo3",
829 MODEL_ARCH.CODESHELL: "codeshell",
830 MODEL_ARCH.ORION: "orion",
831 MODEL_ARCH.INTERNLM2: "internlm2",
832 MODEL_ARCH.MINICPM: "minicpm",
833 MODEL_ARCH.MINICPM3: "minicpm3",
834 MODEL_ARCH.GEMMA: "gemma",
835 MODEL_ARCH.GEMMA2: "gemma2",
836 MODEL_ARCH.GEMMA3: "gemma3",
837 MODEL_ARCH.GEMMA3N: "gemma3n",
838 MODEL_ARCH.GEMMA_EMBEDDING: "gemma-embedding",
839 MODEL_ARCH.STARCODER2: "starcoder2",
840 MODEL_ARCH.RWKV6: "rwkv6",
841 MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
842 MODEL_ARCH.RWKV7: "rwkv7",
843 MODEL_ARCH.ARWKV7: "arwkv7",
844 MODEL_ARCH.MAMBA: "mamba",
845 MODEL_ARCH.MAMBA2: "mamba2",
846 MODEL_ARCH.JAMBA: "jamba",
847 MODEL_ARCH.XVERSE: "xverse",
848 MODEL_ARCH.COMMAND_R: "command-r",
849 MODEL_ARCH.COHERE2: "cohere2",
850 MODEL_ARCH.DBRX: "dbrx",
851 MODEL_ARCH.OLMO: "olmo",
852 MODEL_ARCH.OLMO2: "olmo2",
853 MODEL_ARCH.OLMOE: "olmoe",
854 MODEL_ARCH.OPENELM: "openelm",
855 MODEL_ARCH.ARCTIC: "arctic",
856 MODEL_ARCH.DEEPSEEK: "deepseek",
857 MODEL_ARCH.DEEPSEEK2: "deepseek2",
858 MODEL_ARCH.CHATGLM: "chatglm",
859 MODEL_ARCH.GLM4: "glm4",
860 MODEL_ARCH.GLM4_MOE: "glm4moe",
861 MODEL_ARCH.BITNET: "bitnet",
862 MODEL_ARCH.T5: "t5",
863 MODEL_ARCH.T5ENCODER: "t5encoder",
864 MODEL_ARCH.JAIS: "jais",
865 MODEL_ARCH.NEMOTRON: "nemotron",
866 MODEL_ARCH.NEMOTRON_H: "nemotron_h",
867 MODEL_ARCH.NEMOTRON_H_MOE: "nemotron_h_moe",
868 MODEL_ARCH.EXAONE: "exaone",
869 MODEL_ARCH.EXAONE4: "exaone4",
870 MODEL_ARCH.EXAONE_MOE: "exaone-moe",
871 MODEL_ARCH.GRANITE: "granite",
872 MODEL_ARCH.GRANITE_MOE: "granitemoe",
873 MODEL_ARCH.GRANITE_HYBRID: "granitehybrid",
874 MODEL_ARCH.CHAMELEON: "chameleon",
875 MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
876 MODEL_ARCH.PLM: "plm",
877 MODEL_ARCH.BAILINGMOE: "bailingmoe",
878 MODEL_ARCH.BAILINGMOE2: "bailingmoe2",
879 MODEL_ARCH.DOTS1: "dots1",
880 MODEL_ARCH.ARCEE: "arcee",
881 MODEL_ARCH.AFMOE: "afmoe",
882 MODEL_ARCH.ERNIE4_5: "ernie4_5",
883 MODEL_ARCH.ERNIE4_5_MOE: "ernie4_5-moe",
884 MODEL_ARCH.FALCON_H1: "falcon-h1",
885 MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
886 MODEL_ARCH.HUNYUAN_DENSE: "hunyuan-dense",
887 MODEL_ARCH.SMOLLM3: "smollm3",
888 MODEL_ARCH.GPT_OSS: "gpt-oss",
889 MODEL_ARCH.LFM2: "lfm2",
890 MODEL_ARCH.LFM2MOE: "lfm2moe",
891 MODEL_ARCH.DREAM: "dream",
892 MODEL_ARCH.SMALLTHINKER: "smallthinker",
893 MODEL_ARCH.LLADA: "llada",
894 MODEL_ARCH.LLADA_MOE: "llada-moe",
895 MODEL_ARCH.SEED_OSS: "seed_oss",
896 MODEL_ARCH.GROVEMOE: "grovemoe",
897 MODEL_ARCH.APERTUS: "apertus",
898 MODEL_ARCH.MINIMAXM2: "minimax-m2",
899 MODEL_ARCH.COGVLM: "cogvlm",
900 MODEL_ARCH.RND1: "rnd1",
901 MODEL_ARCH.PANGU_EMBED: "pangu-embedded",
902 MODEL_ARCH.MISTRAL3: "mistral3",
903 MODEL_ARCH.MIMO2: "mimo2",
904 MODEL_ARCH.STEP35: "step35",
905 MODEL_ARCH.LLAMA_EMBED: "llama-embed",
906 MODEL_ARCH.MAINCODER: "maincoder",
907 MODEL_ARCH.KIMI_LINEAR: "kimi-linear",
908}
909
910VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
911 VISION_PROJECTOR_TYPE.MLP: "mlp",
912 VISION_PROJECTOR_TYPE.LDP: "ldp",
913 VISION_PROJECTOR_TYPE.LDPV2: "ldpv2",
914 VISION_PROJECTOR_TYPE.RESAMPLER: "resampler",
915 VISION_PROJECTOR_TYPE.GLM_EDGE: "adapter",
916 VISION_PROJECTOR_TYPE.MERGER: "qwen2vl_merger",
917 VISION_PROJECTOR_TYPE.GEMMA3: "gemma3",
918}
919
920TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
921 MODEL_TENSOR.TOKEN_EMBD: "token_embd",
922 MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
923 MODEL_TENSOR.TOKEN_TYPES: "token_types",
924 MODEL_TENSOR.POS_EMBD: "position_embd",
925 MODEL_TENSOR.OUTPUT_NORM: "output_norm",
926 MODEL_TENSOR.OUTPUT: "output",
927 MODEL_TENSOR.DENSE_2_OUT: "dense_2", # embeddinggemma 2_Dense
928 MODEL_TENSOR.DENSE_3_OUT: "dense_3", # embeddinggemma 2_Dense
929 MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
930 MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
931 MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
932 MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
933 MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
934 MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
935 MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
936 MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
937 MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
938 MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
939 MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
940 MODEL_TENSOR.ATTN_SINKS: "blk.{bid}.attn_sinks",
941 MODEL_TENSOR.ATTN_GATE: "blk.{bid}.attn_gate",
942 MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
943 MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
944 MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
945 MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
946 MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
947 MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
948 MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
949 MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
950 MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
951 MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
952 MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
953 MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
954 MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
955 MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
956 MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
957 MODEL_TENSOR.FFN_GATE_CHEXP: "blk.{bid}.ffn_gate_chexps",
958 MODEL_TENSOR.FFN_DOWN_CHEXP: "blk.{bid}.ffn_down_chexps",
959 MODEL_TENSOR.FFN_UP_CHEXP: "blk.{bid}.ffn_up_chexps",
960 MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
961 MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
962 MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
963 MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
964 MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
965 MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
966 MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
967 MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: "per_layer_token_embd", # gemma3n
968 MODEL_TENSOR.PER_LAYER_MODEL_PROJ: "per_layer_model_proj", # gemma3n
969 MODEL_TENSOR.PER_LAYER_PROJ_NORM: "per_layer_proj_norm", # gemma3n
970 MODEL_TENSOR.ALTUP_UNEMBD_PROJ: "altup_unembd_proj", # gemma3n
971 MODEL_TENSOR.ALTUP_PROJ: "altup_proj", # gemma3n
972 MODEL_TENSOR.PER_LAYER_INP_GATE: "blk.{bid}.inp_gate", # gemma3n
973 MODEL_TENSOR.PER_LAYER_PROJ: "blk.{bid}.proj", # gemma3n
974 MODEL_TENSOR.PER_LAYER_POST_NORM: "blk.{bid}.post_norm", # gemma3n
975 MODEL_TENSOR.ALTUP_CORRECT_COEF: "blk.{bid}.altup_correct_coef", # gemma3n
976 MODEL_TENSOR.ALTUP_CORRECT_SCALE: "blk.{bid}.altup_correct_scale", # gemma3n
977 MODEL_TENSOR.ALTUP_PREDICT_COEF: "blk.{bid}.altup_predict_coef", # gemma3n
978 MODEL_TENSOR.ALTUP_ROUTER: "blk.{bid}.altup_router", # gemma3n
979 MODEL_TENSOR.ALTUP_ROUTER_NORM: "blk.{bid}.altup_router_norm", # gemma3n
980 MODEL_TENSOR.LAUREL_L: "blk.{bid}.laurel_l", # gemma3n
981 MODEL_TENSOR.LAUREL_R: "blk.{bid}.laurel_r", # gemma3n
982 MODEL_TENSOR.LAUREL_POST_NORM: "blk.{bid}.laurel_post_norm", # gemma3n
983 MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
984 MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
985 MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
986 MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
987 MODEL_TENSOR.SSM_DT_NORM: "blk.{bid}.ssm_dt_norm",
988 MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
989 MODEL_TENSOR.SSM_B_NORM: "blk.{bid}.ssm_b_norm",
990 MODEL_TENSOR.SSM_C_NORM: "blk.{bid}.ssm_c_norm",
991 MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
992 MODEL_TENSOR.SSM_NORM: "blk.{bid}.ssm_norm",
993 MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
994 MODEL_TENSOR.SSM_ALPHA: "blk.{bid}.ssm_alpha", # qwen3.5
995 MODEL_TENSOR.SSM_BETA_ALPHA: "blk.{bid}.ssm_ba",
996 MODEL_TENSOR.SSM_CONV1D_Q: "blk.{bid}.ssm_conv1d_q", # Kimi Linear
997 MODEL_TENSOR.SSM_CONV1D_K: "blk.{bid}.ssm_conv1d_k", # Kimi Linear
998 MODEL_TENSOR.SSM_CONV1D_V: "blk.{bid}.ssm_conv1d_v", # Kimi Linear
999 MODEL_TENSOR.SSM_F_A: "blk.{bid}.ssm_f_a", # Kimi Linear
1000 MODEL_TENSOR.SSM_F_B: "blk.{bid}.ssm_f_b", # Kimi Linear
1001 MODEL_TENSOR.SSM_BETA: "blk.{bid}.ssm_beta", # Kimi Linear qwen3.5
1002 MODEL_TENSOR.SSM_G_A: "blk.{bid}.ssm_g_a", # Kimi Linear
1003 MODEL_TENSOR.SSM_G_B: "blk.{bid}.ssm_g_b", # Kimi Linear
1004 MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
1005 MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
1006 MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
1007 MODEL_TENSOR.TIME_MIX_A0: "blk.{bid}.time_mix_a0",
1008 MODEL_TENSOR.TIME_MIX_A1: "blk.{bid}.time_mix_a1",
1009 MODEL_TENSOR.TIME_MIX_A2: "blk.{bid}.time_mix_a2",
1010 MODEL_TENSOR.TIME_MIX_V0: "blk.{bid}.time_mix_v0",
1011 MODEL_TENSOR.TIME_MIX_V1: "blk.{bid}.time_mix_v1",
1012 MODEL_TENSOR.TIME_MIX_V2: "blk.{bid}.time_mix_v2",
1013 MODEL_TENSOR.TIME_MIX_G1: "blk.{bid}.time_mix_g1",
1014 MODEL_TENSOR.TIME_MIX_G2: "blk.{bid}.time_mix_g2",
1015 MODEL_TENSOR.TIME_MIX_K_K: "blk.{bid}.time_mix_k_k",
1016 MODEL_TENSOR.TIME_MIX_K_A: "blk.{bid}.time_mix_k_a",
1017 MODEL_TENSOR.TIME_MIX_R_K: "blk.{bid}.time_mix_r_k",
1018 MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
1019 MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
1020 MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
1021 MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
1022 MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
1023 MODEL_TENSOR.TIME_MIX_LERP_FUSED: "blk.{bid}.time_mix_lerp_fused",
1024 MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
1025 MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
1026 MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
1027 MODEL_TENSOR.TIME_MIX_DECAY_W1: "blk.{bid}.time_mix_decay_w1",
1028 MODEL_TENSOR.TIME_MIX_DECAY_W2: "blk.{bid}.time_mix_decay_w2",
1029 MODEL_TENSOR.TIME_MIX_KEY: "blk.{bid}.time_mix_key",
1030 MODEL_TENSOR.TIME_MIX_VALUE: "blk.{bid}.time_mix_value",
1031 MODEL_TENSOR.TIME_MIX_RECEPTANCE: "blk.{bid}.time_mix_receptance",
1032 MODEL_TENSOR.TIME_MIX_GATE: "blk.{bid}.time_mix_gate",
1033 MODEL_TENSOR.TIME_MIX_LN: "blk.{bid}.time_mix_ln",
1034 MODEL_TENSOR.TIME_MIX_OUTPUT: "blk.{bid}.time_mix_output",
1035 MODEL_TENSOR.CHANNEL_MIX_LERP_K: "blk.{bid}.channel_mix_lerp_k",
1036 MODEL_TENSOR.CHANNEL_MIX_LERP_R: "blk.{bid}.channel_mix_lerp_r",
1037 MODEL_TENSOR.CHANNEL_MIX_KEY: "blk.{bid}.channel_mix_key",
1038 MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: "blk.{bid}.channel_mix_receptance",
1039 MODEL_TENSOR.CHANNEL_MIX_VALUE: "blk.{bid}.channel_mix_value",
1040 MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
1041 MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
1042 MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
1043 MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
1044 MODEL_TENSOR.ATTN_K_B: "blk.{bid}.attn_k_b",
1045 MODEL_TENSOR.ATTN_V_B: "blk.{bid}.attn_v_b",
1046 MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
1047 MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
1048 MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
1049 MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
1050 MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
1051 MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
1052 MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
1053 MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
1054 MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
1055 MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
1056 MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
1057 MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
1058 MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
1059 MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
1060 MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
1061 MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
1062 MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
1063 MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
1064 MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
1065 MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
1066 MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
1067 MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
1068 MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
1069 MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
1070 MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
1071 MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
1072 MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
1073 MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
1074 MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
1075 MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
1076 MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
1077 MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
1078 MODEL_TENSOR.CLS: "cls",
1079 MODEL_TENSOR.CLS_OUT: "cls.output",
1080 MODEL_TENSOR.CONV1D: "conv1d",
1081 MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
1082 MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
1083 MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
1084 MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
1085 MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
1086 MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
1087 MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
1088 MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
1089 MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
1090 MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
1091 MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
1092 MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
1093 MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
1094 MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
1095 MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
1096 MODEL_TENSOR.SHORTCONV_CONV: "blk.{bid}.shortconv.conv",
1097 MODEL_TENSOR.SHORTCONV_INPROJ: "blk.{bid}.shortconv.in_proj",
1098 MODEL_TENSOR.SHORTCONV_OUTPROJ: "blk.{bid}.shortconv.out_proj",
1099 MODEL_TENSOR.VISEXP_ATTN_QKV: "blk.{bid}.vis_attn_qkv",
1100 MODEL_TENSOR.VISEXP_ATTN_OUT: "blk.{bid}.vis_attn_output",
1101 MODEL_TENSOR.VISEXP_GATE: "blk.{bid}.vis_gate",
1102 MODEL_TENSOR.VISEXP_DOWN: "blk.{bid}.vis_down",
1103 MODEL_TENSOR.VISEXP_UP: "blk.{bid}.vis_up",
1104 # vision
1105 MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
1106 MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
1107 MODEL_TENSOR.V_MMPROJ_MLP: "mm.model.mlp.{bid}",
1108 MODEL_TENSOR.V_MMPROJ_PEG: "mm.model.peg.{bid}",
1109 MODEL_TENSOR.V_ENC_EMBD_CLS: "v.class_embd",
1110 MODEL_TENSOR.V_ENC_EMBD_PATCH: "v.patch_embd",
1111 MODEL_TENSOR.V_ENC_EMBD_NORM: "v.norm_embd",
1112 MODEL_TENSOR.V_ENC_EMBD_POS: "v.position_embd",
1113 MODEL_TENSOR.V_ENC_ATTN_QKV: "v.blk.{bid}.attn_qkv",
1114 MODEL_TENSOR.V_ENC_ATTN_Q: "v.blk.{bid}.attn_q",
1115 MODEL_TENSOR.V_ENC_ATTN_Q_NORM: "v.blk.{bid}.attn_q_norm",
1116 MODEL_TENSOR.V_ENC_ATTN_K: "v.blk.{bid}.attn_k",
1117 MODEL_TENSOR.V_ENC_ATTN_K_NORM: "v.blk.{bid}.attn_k_norm",
1118 MODEL_TENSOR.V_ENC_ATTN_V: "v.blk.{bid}.attn_v",
1119 MODEL_TENSOR.V_ENC_INPUT_NORM: "v.blk.{bid}.ln1",
1120 MODEL_TENSOR.V_ENC_ATTN_O: "v.blk.{bid}.attn_out",
1121 MODEL_TENSOR.V_ENC_ATTN_O_NORM: "v.blk.{bid}.attn_out_norm",
1122 MODEL_TENSOR.V_ENC_POST_ATTN_NORM: "v.blk.{bid}.ln2",
1123 MODEL_TENSOR.V_ENC_FFN_UP: "v.blk.{bid}.ffn_up",
1124 MODEL_TENSOR.V_ENC_FFN_GATE: "v.blk.{bid}.ffn_gate",
1125 MODEL_TENSOR.V_ENC_FFN_DOWN: "v.blk.{bid}.ffn_down",
1126 MODEL_TENSOR.V_LAYER_SCALE_1: "v.blk.{bid}.ls1",
1127 MODEL_TENSOR.V_LAYER_SCALE_2: "v.blk.{bid}.ls2",
1128 MODEL_TENSOR.V_PRE_NORM: "v.pre_ln",
1129 MODEL_TENSOR.V_POST_NORM: "v.post_ln",
1130 MODEL_TENSOR.V_MM_POST_NORM: "mm.post_norm",
1131 MODEL_TENSOR.V_MM_INP_PROJ: "mm.input_projection",
1132 MODEL_TENSOR.V_MM_INP_NORM: "mm.input_norm",
1133 MODEL_TENSOR.V_MM_SOFT_EMB_NORM: "mm.soft_emb_norm", # gemma3n
1134 MODEL_TENSOR.V_MM_EMBEDDING: "mm.embedding", # gemma3n
1135 MODEL_TENSOR.V_MM_HARD_EMB_NORM: "mm.hard_emb_norm", # gemma3n
1136 MODEL_TENSOR.V_ENC_CONV_STEM: "v.conv_stem.conv", # gemma3n
1137 MODEL_TENSOR.V_ENC_CONV_STEM_NORM: "v.conv_stem.bn", # gemma3n
1138 MODEL_TENSOR.V_ENC_MSFA_EXP: "v.msfa.ffn.pw_exp.conv", # gemma3n
1139 MODEL_TENSOR.V_ENC_MSFA_EXP_NORM: "v.msfa.ffn.pw_exp.bn", # gemma3n
1140 MODEL_TENSOR.V_ENC_MSFA_PROJ: "v.msfa.ffn.pw_proj.conv", # gemma3n
1141 MODEL_TENSOR.V_ENC_MSFA_PROJ_NORM: "v.msfa.ffn.pw_proj.bn", # gemma3n
1142 MODEL_TENSOR.V_ENC_MSFA_NORM: "v.msfa.norm", # gemma3n
1143 MODEL_TENSOR.V_RESMPL_POS_EMBD_K: "resampler.pos_embd_k",
1144 MODEL_TENSOR.V_RESMPL_ATTN_Q: "resampler.attn.q",
1145 MODEL_TENSOR.V_RESMPL_ATTN_K: "resampler.attn.k",
1146 MODEL_TENSOR.V_RESMPL_ATTN_V: "resampler.attn.v",
1147 MODEL_TENSOR.V_RESMPL_ATTN_OUT: "resampler.attn.out",
1148 MODEL_TENSOR.V_RESMPL_KV: "resampler.kv",
1149 MODEL_TENSOR.V_RESMPL_KV_NORM: "resampler.ln_kv",
1150 MODEL_TENSOR.V_RESMPL_POST_NORM: "resampler.ln_post",
1151 MODEL_TENSOR.V_RESMPL_Q_NORM: "resampler.ln_q",
1152 MODEL_TENSOR.V_RESMPL_PROJ: "resampler.proj",
1153 MODEL_TENSOR.V_RESMPL_QUERY: "resampler.query",
1154 MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: "v.token_embd.img_break", # pixtral
1155 MODEL_TENSOR.V_MM_PATCH_MERGER: "mm.patch_merger", # mistral small 3.1
1156 MODEL_TENSOR.V_DS_NORM: "v.deepstack.{bid}.norm",
1157 MODEL_TENSOR.V_DS_FC1: "v.deepstack.{bid}.fc1",
1158 MODEL_TENSOR.V_DS_FC2: "v.deepstack.{bid}.fc2",
1159 MODEL_TENSOR.V_MM_POST_FC_NORM: "mm.post_fc_norm", # cogvlm
1160 MODEL_TENSOR.V_MM_UP: "mm.up",
1161 MODEL_TENSOR.V_MM_DOWN: "mm.down",
1162 MODEL_TENSOR.V_MM_GATE: "mm.gate",
1163 MODEL_TENSOR.V_TOK_BOI: "v.boi",
1164 MODEL_TENSOR.V_TOK_EOI: "v.eoi",
1165 # audio (mtmd)
1166 # note: all audio tensor names must use prefix "a." or "mm.a."
1167 MODEL_TENSOR.A_ENC_EMBD_POS: "a.position_embd",
1168 MODEL_TENSOR.A_ENC_EMBD_NORM: "a.position_embd_norm",
1169 MODEL_TENSOR.A_ENC_EMBD_TO_LOGITS: "a.embd_to_logits",
1170 MODEL_TENSOR.A_ENC_CONV1D: "a.conv1d.{bid}",
1171 MODEL_TENSOR.A_ENC_CONV1D_NORM: "a.conv1d.{bid}.norm",
1172 MODEL_TENSOR.A_PRE_NORM: "a.pre_ln",
1173 MODEL_TENSOR.A_POST_NORM: "a.post_ln",
1174 MODEL_TENSOR.A_ENC_LAYER_PRE_NORM: "a.blk.{bid}.layer_pre_norm",
1175 MODEL_TENSOR.A_ENC_ATTN_Q: "a.blk.{bid}.attn_q",
1176 MODEL_TENSOR.A_ENC_ATTN_K: "a.blk.{bid}.attn_k",
1177 MODEL_TENSOR.A_ENC_ATTN_V: "a.blk.{bid}.attn_v",
1178 MODEL_TENSOR.A_ENC_PER_DIM_SCALE: "a.blk.{bid}.per_dim_scale",
1179 MODEL_TENSOR.A_ENC_INPUT_NORM: "a.blk.{bid}.ln1",
1180 MODEL_TENSOR.A_ENC_OUTPUT: "a.blk.{bid}.attn_out",
1181 MODEL_TENSOR.A_ENC_OUTPUT_NORM: "a.blk.{bid}.ln2",
1182 MODEL_TENSOR.A_ENC_FFN_NORM: "a.blk.{bid}.ffn_norm",
1183 MODEL_TENSOR.A_ENC_FFN_POST_NORM: "a.blk.{bid}.ffn_post_norm",
1184 MODEL_TENSOR.A_ENC_FFN_SCALE: "a.blk.{bid}.ffn_scale",
1185 MODEL_TENSOR.A_ENC_FFN_UP: "a.blk.{bid}.ffn_up",
1186 MODEL_TENSOR.A_ENC_FFN_GATE: "a.blk.{bid}.ffn_gate",
1187 MODEL_TENSOR.A_ENC_FFN_DOWN: "a.blk.{bid}.ffn_down",
1188 MODEL_TENSOR.A_ENC_FFN_NORM_1: "a.blk.{bid}.ffn_norm_1",
1189 MODEL_TENSOR.A_ENC_FFN_POST_NORM_1: "a.blk.{bid}.ffn_post_norm_1",
1190 MODEL_TENSOR.A_ENC_FFN_SCALE_1: "a.blk.{bid}.ffn_scale_1",
1191 MODEL_TENSOR.A_ENC_FFN_UP_1: "a.blk.{bid}.ffn_up_1",
1192 MODEL_TENSOR.A_ENC_FFN_GATE_1: "a.blk.{bid}.ffn_gate_1",
1193 MODEL_TENSOR.A_ENC_FFN_DOWN_1: "a.blk.{bid}.ffn_down_1",
1194 MODEL_TENSOR.A_MMPROJ: "mm.a.mlp.{bid}",
1195 MODEL_TENSOR.A_MMPROJ_FC: "mm.a.fc",
1196 MODEL_TENSOR.A_MM_NORM_PRE: "mm.a.norm_pre",
1197 MODEL_TENSOR.A_MM_NORM_MID: "mm.a.norm_mid",
1198 MODEL_TENSOR.A_MM_INP_PROJ: "mm.a.input_projection", # gemma3n
1199 MODEL_TENSOR.A_MM_SOFT_EMB_NORM: "mm.a.soft_emb_norm", # gemma3n
1200 MODEL_TENSOR.A_MM_EMBEDDING: "mm.a.embedding", # gemma3n
1201 MODEL_TENSOR.A_MM_HARD_EMB_NORM: "mm.a.hard_emb_norm", # gemma3n
1202 # lfm2 audio
1203 MODEL_TENSOR.A_ENC_NORM_CONV: "a.blk.{bid}.norm_conv",
1204 MODEL_TENSOR.A_ENC_LINEAR_POS: "a.blk.{bid}.linear_pos",
1205 MODEL_TENSOR.A_ENC_POS_BIAS_U: "a.blk.{bid}.pos_bias_u",
1206 MODEL_TENSOR.A_ENC_POS_BIAS_V: "a.blk.{bid}.pos_bias_v",
1207 MODEL_TENSOR.A_ENC_OUT: "a.pre_encode.out",
1208 MODEL_TENSOR.A_ENC_CONV_DW: "a.blk.{bid}.conv_dw",
1209 MODEL_TENSOR.A_ENC_CONV_NORM: "a.blk.{bid}.conv_norm",
1210 MODEL_TENSOR.A_ENC_CONV_PW1: "a.blk.{bid}.conv_pw1",
1211 MODEL_TENSOR.A_ENC_CONV_PW2: "a.blk.{bid}.conv_pw2",
1212 # NextN/MTP
1213 MODEL_TENSOR.NEXTN_EH_PROJ: "blk.{bid}.nextn.eh_proj",
1214 MODEL_TENSOR.NEXTN_EMBED_TOKENS: "blk.{bid}.nextn.embed_tokens",
1215 MODEL_TENSOR.NEXTN_ENORM: "blk.{bid}.nextn.enorm",
1216 MODEL_TENSOR.NEXTN_HNORM: "blk.{bid}.nextn.hnorm",
1217 MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD: "blk.{bid}.nextn.shared_head_head",
1218 MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM: "blk.{bid}.nextn.shared_head_norm",
1219}
1220
1221MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1222 MODEL_ARCH.MMPROJ: [
1223 MODEL_TENSOR.V_MMPROJ,
1224 MODEL_TENSOR.V_MMPROJ_FC,
1225 MODEL_TENSOR.V_MMPROJ_MLP,
1226 MODEL_TENSOR.V_MMPROJ_PEG,
1227 MODEL_TENSOR.V_ENC_EMBD_CLS,
1228 MODEL_TENSOR.V_ENC_EMBD_PATCH,
1229 MODEL_TENSOR.V_ENC_EMBD_NORM,
1230 MODEL_TENSOR.V_ENC_EMBD_POS,
1231 MODEL_TENSOR.V_ENC_INPUT_NORM,
1232 MODEL_TENSOR.V_ENC_ATTN_QKV,
1233 MODEL_TENSOR.V_ENC_ATTN_Q,
1234 MODEL_TENSOR.V_ENC_ATTN_Q_NORM,
1235 MODEL_TENSOR.V_ENC_ATTN_K,
1236 MODEL_TENSOR.V_ENC_ATTN_K_NORM,
1237 MODEL_TENSOR.V_ENC_ATTN_V,
1238 MODEL_TENSOR.V_ENC_ATTN_O,
1239 MODEL_TENSOR.V_ENC_ATTN_O_NORM,
1240 MODEL_TENSOR.V_ENC_POST_ATTN_NORM,
1241 MODEL_TENSOR.V_ENC_FFN_UP,
1242 MODEL_TENSOR.V_ENC_FFN_GATE,
1243 MODEL_TENSOR.V_ENC_FFN_DOWN,
1244 MODEL_TENSOR.V_LAYER_SCALE_1,
1245 MODEL_TENSOR.V_LAYER_SCALE_2,
1246 MODEL_TENSOR.V_PRE_NORM,
1247 MODEL_TENSOR.V_POST_NORM,
1248 MODEL_TENSOR.V_MM_POST_NORM,
1249 MODEL_TENSOR.V_MM_INP_PROJ,
1250 MODEL_TENSOR.V_MM_INP_NORM,
1251 MODEL_TENSOR.V_MM_SOFT_EMB_NORM,
1252 MODEL_TENSOR.V_MM_EMBEDDING,
1253 MODEL_TENSOR.V_MM_HARD_EMB_NORM,
1254 MODEL_TENSOR.V_ENC_CONV_STEM,
1255 MODEL_TENSOR.V_ENC_CONV_STEM_NORM,
1256 MODEL_TENSOR.V_ENC_MSFA_EXP,
1257 MODEL_TENSOR.V_ENC_MSFA_EXP_NORM,
1258 MODEL_TENSOR.V_ENC_MSFA_PROJ,
1259 MODEL_TENSOR.V_ENC_MSFA_PROJ_NORM,
1260 MODEL_TENSOR.V_ENC_MSFA_NORM,
1261 MODEL_TENSOR.V_RESMPL_POS_EMBD_K,
1262 MODEL_TENSOR.V_RESMPL_ATTN_Q,
1263 MODEL_TENSOR.V_RESMPL_ATTN_K,
1264 MODEL_TENSOR.V_RESMPL_ATTN_V,
1265 MODEL_TENSOR.V_RESMPL_ATTN_OUT,
1266 MODEL_TENSOR.V_RESMPL_KV,
1267 MODEL_TENSOR.V_RESMPL_KV_NORM,
1268 MODEL_TENSOR.V_RESMPL_POST_NORM,
1269 MODEL_TENSOR.V_RESMPL_Q_NORM,
1270 MODEL_TENSOR.V_RESMPL_PROJ,
1271 MODEL_TENSOR.V_RESMPL_QUERY,
1272 MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK,
1273 MODEL_TENSOR.V_MM_PATCH_MERGER,
1274 MODEL_TENSOR.V_DS_NORM,
1275 MODEL_TENSOR.V_DS_FC1,
1276 MODEL_TENSOR.V_DS_FC2,
1277 MODEL_TENSOR.V_MM_POST_FC_NORM,
1278 MODEL_TENSOR.V_MM_UP,
1279 MODEL_TENSOR.V_MM_DOWN,
1280 MODEL_TENSOR.V_MM_GATE,
1281 MODEL_TENSOR.V_TOK_BOI,
1282 MODEL_TENSOR.V_TOK_EOI,
1283 # audio
1284 MODEL_TENSOR.A_ENC_EMBD_POS,
1285 MODEL_TENSOR.A_ENC_EMBD_NORM,
1286 MODEL_TENSOR.A_ENC_EMBD_TO_LOGITS,
1287 MODEL_TENSOR.A_ENC_CONV1D,
1288 MODEL_TENSOR.A_ENC_CONV1D_NORM,
1289 MODEL_TENSOR.A_PRE_NORM,
1290 MODEL_TENSOR.A_POST_NORM,
1291 MODEL_TENSOR.A_ENC_LAYER_PRE_NORM,
1292 MODEL_TENSOR.A_ENC_ATTN_Q,
1293 MODEL_TENSOR.A_ENC_ATTN_K,
1294 MODEL_TENSOR.A_ENC_ATTN_V,
1295 MODEL_TENSOR.A_ENC_PER_DIM_SCALE,
1296 MODEL_TENSOR.A_ENC_INPUT_NORM,
1297 MODEL_TENSOR.A_ENC_OUTPUT,
1298 MODEL_TENSOR.A_ENC_OUTPUT_NORM,
1299 MODEL_TENSOR.A_ENC_FFN_NORM,
1300 MODEL_TENSOR.A_ENC_FFN_POST_NORM,
1301 MODEL_TENSOR.A_ENC_FFN_SCALE,
1302 MODEL_TENSOR.A_ENC_FFN_UP,
1303 MODEL_TENSOR.A_ENC_FFN_GATE,
1304 MODEL_TENSOR.A_ENC_FFN_DOWN,
1305 MODEL_TENSOR.A_ENC_FFN_NORM_1,
1306 MODEL_TENSOR.A_ENC_FFN_POST_NORM_1,
1307 MODEL_TENSOR.A_ENC_FFN_SCALE_1,
1308 MODEL_TENSOR.A_ENC_FFN_UP_1,
1309 MODEL_TENSOR.A_ENC_FFN_GATE_1,
1310 MODEL_TENSOR.A_ENC_FFN_DOWN_1,
1311 MODEL_TENSOR.A_MMPROJ,
1312 MODEL_TENSOR.A_MMPROJ_FC,
1313 MODEL_TENSOR.A_MM_NORM_PRE,
1314 MODEL_TENSOR.A_MM_NORM_MID,
1315 MODEL_TENSOR.A_ENC_NORM_CONV,
1316 MODEL_TENSOR.A_ENC_LINEAR_POS,
1317 MODEL_TENSOR.A_ENC_POS_BIAS_U,
1318 MODEL_TENSOR.A_ENC_POS_BIAS_V,
1319 MODEL_TENSOR.A_ENC_OUT,
1320 MODEL_TENSOR.A_ENC_CONV_DW,
1321 MODEL_TENSOR.A_ENC_CONV_NORM,
1322 MODEL_TENSOR.A_ENC_CONV_PW1,
1323 MODEL_TENSOR.A_ENC_CONV_PW2,
1324 MODEL_TENSOR.A_MM_INP_PROJ,
1325 MODEL_TENSOR.A_MM_SOFT_EMB_NORM,
1326 MODEL_TENSOR.A_MM_EMBEDDING,
1327 MODEL_TENSOR.A_MM_HARD_EMB_NORM,
1328 ],
1329 MODEL_ARCH.LLAMA: [
1330 MODEL_TENSOR.TOKEN_EMBD,
1331 MODEL_TENSOR.OUTPUT_NORM,
1332 MODEL_TENSOR.OUTPUT,
1333 MODEL_TENSOR.ROPE_FREQS,
1334 MODEL_TENSOR.ATTN_NORM,
1335 MODEL_TENSOR.ATTN_Q,
1336 MODEL_TENSOR.ATTN_K,
1337 MODEL_TENSOR.ATTN_V,
1338 MODEL_TENSOR.ATTN_OUT,
1339 MODEL_TENSOR.ATTN_ROT_EMBD,
1340 MODEL_TENSOR.FFN_GATE_INP,
1341 MODEL_TENSOR.FFN_NORM,
1342 MODEL_TENSOR.FFN_GATE,
1343 MODEL_TENSOR.FFN_DOWN,
1344 MODEL_TENSOR.FFN_UP,
1345 MODEL_TENSOR.FFN_GATE_EXP,
1346 MODEL_TENSOR.FFN_DOWN_EXP,
1347 MODEL_TENSOR.FFN_UP_EXP,
1348 ],
1349 MODEL_ARCH.LLAMA4: [
1350 MODEL_TENSOR.TOKEN_EMBD,
1351 MODEL_TENSOR.OUTPUT_NORM,
1352 MODEL_TENSOR.OUTPUT,
1353 MODEL_TENSOR.ROPE_FREQS,
1354 MODEL_TENSOR.ATTN_NORM,
1355 MODEL_TENSOR.ATTN_Q,
1356 MODEL_TENSOR.ATTN_K,
1357 MODEL_TENSOR.ATTN_V,
1358 MODEL_TENSOR.ATTN_OUT,
1359 MODEL_TENSOR.ATTN_ROT_EMBD,
1360 MODEL_TENSOR.FFN_GATE_INP,
1361 MODEL_TENSOR.FFN_NORM,
1362 MODEL_TENSOR.FFN_GATE,
1363 MODEL_TENSOR.FFN_DOWN,
1364 MODEL_TENSOR.FFN_UP,
1365 MODEL_TENSOR.FFN_GATE_EXP,
1366 MODEL_TENSOR.FFN_DOWN_EXP,
1367 MODEL_TENSOR.FFN_UP_EXP,
1368 MODEL_TENSOR.FFN_GATE_SHEXP,
1369 MODEL_TENSOR.FFN_DOWN_SHEXP,
1370 MODEL_TENSOR.FFN_UP_SHEXP,
1371 ],
1372 MODEL_ARCH.DECI: [
1373 MODEL_TENSOR.TOKEN_EMBD,
1374 MODEL_TENSOR.OUTPUT_NORM,
1375 MODEL_TENSOR.OUTPUT,
1376 MODEL_TENSOR.ROPE_FREQS,
1377 MODEL_TENSOR.ATTN_NORM,
1378 MODEL_TENSOR.ATTN_Q,
1379 MODEL_TENSOR.ATTN_K,
1380 MODEL_TENSOR.ATTN_V,
1381 MODEL_TENSOR.ATTN_OUT,
1382 MODEL_TENSOR.ATTN_ROT_EMBD,
1383 MODEL_TENSOR.FFN_GATE_INP,
1384 MODEL_TENSOR.FFN_NORM,
1385 MODEL_TENSOR.FFN_GATE,
1386 MODEL_TENSOR.FFN_DOWN,
1387 MODEL_TENSOR.FFN_UP,
1388 MODEL_TENSOR.FFN_GATE_EXP,
1389 MODEL_TENSOR.FFN_DOWN_EXP,
1390 MODEL_TENSOR.FFN_UP_EXP,
1391 ],
1392 MODEL_ARCH.GROK: [
1393 MODEL_TENSOR.TOKEN_EMBD,
1394 MODEL_TENSOR.OUTPUT_NORM,
1395 MODEL_TENSOR.OUTPUT,
1396 MODEL_TENSOR.ROPE_FREQS,
1397 MODEL_TENSOR.ATTN_NORM,
1398 MODEL_TENSOR.ATTN_Q,
1399 MODEL_TENSOR.ATTN_K,
1400 MODEL_TENSOR.ATTN_V,
1401 MODEL_TENSOR.ATTN_OUT,
1402 MODEL_TENSOR.ATTN_ROT_EMBD,
1403 MODEL_TENSOR.ATTN_OUT_NORM,
1404 MODEL_TENSOR.FFN_GATE_INP,
1405 MODEL_TENSOR.FFN_NORM,
1406 MODEL_TENSOR.FFN_GATE,
1407 MODEL_TENSOR.FFN_DOWN,
1408 MODEL_TENSOR.FFN_UP,
1409 MODEL_TENSOR.FFN_GATE_EXP,
1410 MODEL_TENSOR.FFN_DOWN_EXP,
1411 MODEL_TENSOR.FFN_UP_EXP,
1412 MODEL_TENSOR.FFN_POST_NORM,
1413 MODEL_TENSOR.LAYER_OUT_NORM,
1414 ],
1415 MODEL_ARCH.GPTNEOX: [
1416 MODEL_TENSOR.TOKEN_EMBD,
1417 MODEL_TENSOR.OUTPUT_NORM,
1418 MODEL_TENSOR.OUTPUT,
1419 MODEL_TENSOR.ATTN_NORM,
1420 MODEL_TENSOR.ATTN_QKV,
1421 MODEL_TENSOR.ATTN_OUT,
1422 MODEL_TENSOR.FFN_NORM,
1423 MODEL_TENSOR.FFN_DOWN,
1424 MODEL_TENSOR.FFN_UP,
1425 ],
1426 MODEL_ARCH.FALCON: [
1427 MODEL_TENSOR.TOKEN_EMBD,
1428 MODEL_TENSOR.OUTPUT_NORM,
1429 MODEL_TENSOR.OUTPUT,
1430 MODEL_TENSOR.ATTN_NORM,
1431 MODEL_TENSOR.ATTN_NORM_2,
1432 MODEL_TENSOR.ATTN_QKV,
1433 MODEL_TENSOR.ATTN_OUT,
1434 MODEL_TENSOR.FFN_DOWN,
1435 MODEL_TENSOR.FFN_UP,
1436 ],
1437 MODEL_ARCH.BAICHUAN: [
1438 MODEL_TENSOR.TOKEN_EMBD,
1439 MODEL_TENSOR.OUTPUT_NORM,
1440 MODEL_TENSOR.OUTPUT,
1441 MODEL_TENSOR.ROPE_FREQS,
1442 MODEL_TENSOR.ATTN_NORM,
1443 MODEL_TENSOR.ATTN_Q,
1444 MODEL_TENSOR.ATTN_K,
1445 MODEL_TENSOR.ATTN_V,
1446 MODEL_TENSOR.ATTN_OUT,
1447 MODEL_TENSOR.ATTN_ROT_EMBD,
1448 MODEL_TENSOR.FFN_NORM,
1449 MODEL_TENSOR.FFN_GATE,
1450 MODEL_TENSOR.FFN_DOWN,
1451 MODEL_TENSOR.FFN_UP,
1452 ],
1453 MODEL_ARCH.STARCODER: [
1454 MODEL_TENSOR.TOKEN_EMBD,
1455 MODEL_TENSOR.POS_EMBD,
1456 MODEL_TENSOR.OUTPUT_NORM,
1457 MODEL_TENSOR.OUTPUT,
1458 MODEL_TENSOR.ATTN_NORM,
1459 MODEL_TENSOR.ATTN_QKV,
1460 MODEL_TENSOR.ATTN_OUT,
1461 MODEL_TENSOR.FFN_NORM,
1462 MODEL_TENSOR.FFN_DOWN,
1463 MODEL_TENSOR.FFN_UP,
1464 ],
1465 MODEL_ARCH.BERT: [
1466 MODEL_TENSOR.TOKEN_EMBD,
1467 MODEL_TENSOR.TOKEN_EMBD_NORM,
1468 MODEL_TENSOR.TOKEN_TYPES,
1469 MODEL_TENSOR.POS_EMBD,
1470 MODEL_TENSOR.OUTPUT_NORM,
1471 MODEL_TENSOR.ATTN_OUT_NORM,
1472 MODEL_TENSOR.ATTN_QKV,
1473 MODEL_TENSOR.ATTN_Q,
1474 MODEL_TENSOR.ATTN_K,
1475 MODEL_TENSOR.ATTN_V,
1476 MODEL_TENSOR.ATTN_OUT,
1477 MODEL_TENSOR.FFN_DOWN,
1478 MODEL_TENSOR.FFN_UP,
1479 MODEL_TENSOR.LAYER_OUT_NORM,
1480 MODEL_TENSOR.CLS,
1481 MODEL_TENSOR.CLS_OUT,
1482 ],
1483 MODEL_ARCH.MODERN_BERT: [
1484 MODEL_TENSOR.TOKEN_EMBD,
1485 MODEL_TENSOR.TOKEN_EMBD_NORM,
1486 MODEL_TENSOR.OUTPUT_NORM,
1487 MODEL_TENSOR.ATTN_NORM,
1488 MODEL_TENSOR.ATTN_OUT,
1489 MODEL_TENSOR.ATTN_QKV,
1490 MODEL_TENSOR.FFN_UP,
1491 MODEL_TENSOR.FFN_DOWN,
1492 MODEL_TENSOR.FFN_NORM,
1493 MODEL_TENSOR.CLS,
1494 MODEL_TENSOR.CLS_OUT,
1495 ],
1496 MODEL_ARCH.NOMIC_BERT: [
1497 MODEL_TENSOR.TOKEN_EMBD,
1498 MODEL_TENSOR.TOKEN_EMBD_NORM,
1499 MODEL_TENSOR.TOKEN_TYPES,
1500 MODEL_TENSOR.POS_EMBD,
1501 MODEL_TENSOR.OUTPUT_NORM,
1502 MODEL_TENSOR.ATTN_OUT_NORM,
1503 MODEL_TENSOR.ATTN_QKV,
1504 MODEL_TENSOR.ATTN_OUT,
1505 MODEL_TENSOR.FFN_GATE,
1506 MODEL_TENSOR.FFN_DOWN,
1507 MODEL_TENSOR.FFN_UP,
1508 MODEL_TENSOR.LAYER_OUT_NORM,
1509 ],
1510 MODEL_ARCH.NOMIC_BERT_MOE: [
1511 MODEL_TENSOR.TOKEN_EMBD,
1512 MODEL_TENSOR.TOKEN_EMBD_NORM,
1513 MODEL_TENSOR.TOKEN_TYPES,
1514 MODEL_TENSOR.POS_EMBD,
1515 MODEL_TENSOR.OUTPUT_NORM,
1516 MODEL_TENSOR.ATTN_OUT_NORM,
1517 MODEL_TENSOR.ATTN_QKV,
1518 MODEL_TENSOR.ATTN_OUT,
1519 MODEL_TENSOR.FFN_DOWN,
1520 MODEL_TENSOR.FFN_UP,
1521 MODEL_TENSOR.FFN_GATE_INP,
1522 MODEL_TENSOR.FFN_DOWN_EXP,
1523 MODEL_TENSOR.FFN_UP_EXP,
1524 MODEL_TENSOR.LAYER_OUT_NORM,
1525 ],
1526 MODEL_ARCH.NEO_BERT: [
1527 MODEL_TENSOR.TOKEN_EMBD,
1528 MODEL_TENSOR.ATTN_NORM,
1529 MODEL_TENSOR.ATTN_QKV,
1530 MODEL_TENSOR.ATTN_OUT,
1531 MODEL_TENSOR.FFN_NORM,
1532 MODEL_TENSOR.FFN_DOWN,
1533 MODEL_TENSOR.FFN_UP,
1534 MODEL_TENSOR.ENC_OUTPUT_NORM,
1535 MODEL_TENSOR.CLS,
1536 MODEL_TENSOR.CLS_OUT,
1537 ],
1538 MODEL_ARCH.JINA_BERT_V2: [
1539 MODEL_TENSOR.TOKEN_EMBD,
1540 MODEL_TENSOR.TOKEN_EMBD_NORM,
1541 MODEL_TENSOR.TOKEN_TYPES,
1542 MODEL_TENSOR.ATTN_NORM_2,
1543 MODEL_TENSOR.ATTN_OUT_NORM,
1544 MODEL_TENSOR.ATTN_Q,
1545 MODEL_TENSOR.ATTN_Q_NORM,
1546 MODEL_TENSOR.ATTN_K,
1547 MODEL_TENSOR.ATTN_K_NORM,
1548 MODEL_TENSOR.ATTN_V,
1549 MODEL_TENSOR.ATTN_OUT,
1550 MODEL_TENSOR.FFN_UP,
1551 MODEL_TENSOR.FFN_GATE,
1552 MODEL_TENSOR.FFN_DOWN,
1553 MODEL_TENSOR.LAYER_OUT_NORM,
1554 MODEL_TENSOR.CLS,
1555 ],
1556 MODEL_ARCH.JINA_BERT_V3: [
1557 MODEL_TENSOR.TOKEN_EMBD,
1558 MODEL_TENSOR.TOKEN_EMBD_NORM,
1559 MODEL_TENSOR.TOKEN_TYPES,
1560 MODEL_TENSOR.OUTPUT_NORM,
1561 MODEL_TENSOR.ATTN_OUT_NORM,
1562 MODEL_TENSOR.ATTN_QKV,
1563 MODEL_TENSOR.ATTN_OUT,
1564 MODEL_TENSOR.FFN_DOWN,
1565 MODEL_TENSOR.FFN_UP,
1566 MODEL_TENSOR.LAYER_OUT_NORM,
1567 ],
1568 MODEL_ARCH.MPT: [
1569 MODEL_TENSOR.TOKEN_EMBD,
1570 MODEL_TENSOR.OUTPUT_NORM,
1571 MODEL_TENSOR.OUTPUT,
1572 MODEL_TENSOR.ATTN_NORM,
1573 MODEL_TENSOR.ATTN_QKV,
1574 MODEL_TENSOR.ATTN_OUT,
1575 MODEL_TENSOR.FFN_NORM,
1576 MODEL_TENSOR.FFN_DOWN,
1577 MODEL_TENSOR.FFN_UP,
1578 MODEL_TENSOR.FFN_ACT,
1579 MODEL_TENSOR.ATTN_Q_NORM,
1580 MODEL_TENSOR.ATTN_K_NORM,
1581 MODEL_TENSOR.POS_EMBD,
1582 ],
1583 MODEL_ARCH.GPTJ: [
1584 MODEL_TENSOR.TOKEN_EMBD,
1585 MODEL_TENSOR.OUTPUT_NORM,
1586 MODEL_TENSOR.OUTPUT,
1587 MODEL_TENSOR.ATTN_NORM,
1588 MODEL_TENSOR.ATTN_Q,
1589 MODEL_TENSOR.ATTN_K,
1590 MODEL_TENSOR.ATTN_V,
1591 MODEL_TENSOR.ATTN_OUT,
1592 MODEL_TENSOR.FFN_DOWN,
1593 MODEL_TENSOR.FFN_UP,
1594 ],
1595 MODEL_ARCH.REFACT: [
1596 MODEL_TENSOR.TOKEN_EMBD,
1597 MODEL_TENSOR.OUTPUT_NORM,
1598 MODEL_TENSOR.OUTPUT,
1599 MODEL_TENSOR.ATTN_NORM,
1600 MODEL_TENSOR.ATTN_Q,
1601 MODEL_TENSOR.ATTN_K,
1602 MODEL_TENSOR.ATTN_V,
1603 MODEL_TENSOR.ATTN_OUT,
1604 MODEL_TENSOR.FFN_NORM,
1605 MODEL_TENSOR.FFN_GATE,
1606 MODEL_TENSOR.FFN_DOWN,
1607 MODEL_TENSOR.FFN_UP,
1608 ],
1609 MODEL_ARCH.BLOOM: [
1610 MODEL_TENSOR.TOKEN_EMBD,
1611 MODEL_TENSOR.TOKEN_EMBD_NORM,
1612 MODEL_TENSOR.OUTPUT_NORM,
1613 MODEL_TENSOR.OUTPUT,
1614 MODEL_TENSOR.ATTN_NORM,
1615 MODEL_TENSOR.ATTN_QKV,
1616 MODEL_TENSOR.ATTN_OUT,
1617 MODEL_TENSOR.FFN_NORM,
1618 MODEL_TENSOR.FFN_DOWN,
1619 MODEL_TENSOR.FFN_UP,
1620 ],
1621 MODEL_ARCH.STABLELM: [
1622 MODEL_TENSOR.TOKEN_EMBD,
1623 MODEL_TENSOR.OUTPUT_NORM,
1624 MODEL_TENSOR.OUTPUT,
1625 MODEL_TENSOR.ROPE_FREQS,
1626 MODEL_TENSOR.ATTN_NORM,
1627 MODEL_TENSOR.ATTN_Q,
1628 MODEL_TENSOR.ATTN_K,
1629 MODEL_TENSOR.ATTN_V,
1630 MODEL_TENSOR.ATTN_OUT,
1631 MODEL_TENSOR.FFN_NORM,
1632 MODEL_TENSOR.FFN_GATE,
1633 MODEL_TENSOR.FFN_DOWN,
1634 MODEL_TENSOR.FFN_UP,
1635 MODEL_TENSOR.ATTN_Q_NORM,
1636 MODEL_TENSOR.ATTN_K_NORM,
1637 ],
1638 MODEL_ARCH.QWEN: [
1639 MODEL_TENSOR.TOKEN_EMBD,
1640 MODEL_TENSOR.OUTPUT_NORM,
1641 MODEL_TENSOR.OUTPUT,
1642 MODEL_TENSOR.ROPE_FREQS,
1643 MODEL_TENSOR.ATTN_NORM,
1644 MODEL_TENSOR.ATTN_QKV,
1645 MODEL_TENSOR.ATTN_OUT,
1646 MODEL_TENSOR.ATTN_ROT_EMBD,
1647 MODEL_TENSOR.FFN_NORM,
1648 MODEL_TENSOR.FFN_GATE,
1649 MODEL_TENSOR.FFN_DOWN,
1650 MODEL_TENSOR.FFN_UP,
1651 ],
1652 MODEL_ARCH.QWEN2: [
1653 MODEL_TENSOR.TOKEN_EMBD,
1654 MODEL_TENSOR.OUTPUT_NORM,
1655 MODEL_TENSOR.OUTPUT,
1656 MODEL_TENSOR.ROPE_FREQS,
1657 MODEL_TENSOR.ATTN_NORM,
1658 MODEL_TENSOR.ATTN_Q,
1659 MODEL_TENSOR.ATTN_K,
1660 MODEL_TENSOR.ATTN_V,
1661 MODEL_TENSOR.ATTN_OUT,
1662 MODEL_TENSOR.FFN_NORM,
1663 MODEL_TENSOR.FFN_GATE,
1664 MODEL_TENSOR.FFN_DOWN,
1665 MODEL_TENSOR.FFN_UP,
1666 ],
1667 MODEL_ARCH.DREAM: [
1668 MODEL_TENSOR.TOKEN_EMBD,
1669 MODEL_TENSOR.OUTPUT_NORM,
1670 MODEL_TENSOR.OUTPUT,
1671 MODEL_TENSOR.ROPE_FREQS,
1672 MODEL_TENSOR.ATTN_NORM,
1673 MODEL_TENSOR.ATTN_Q,
1674 MODEL_TENSOR.ATTN_K,
1675 MODEL_TENSOR.ATTN_V,
1676 MODEL_TENSOR.ATTN_OUT,
1677 MODEL_TENSOR.FFN_NORM,
1678 MODEL_TENSOR.FFN_GATE,
1679 MODEL_TENSOR.FFN_DOWN,
1680 MODEL_TENSOR.FFN_UP,
1681 ],
1682 MODEL_ARCH.LLADA: [
1683 MODEL_TENSOR.TOKEN_EMBD,
1684 MODEL_TENSOR.OUTPUT_NORM,
1685 MODEL_TENSOR.OUTPUT,
1686 MODEL_TENSOR.ROPE_FREQS,
1687 MODEL_TENSOR.ATTN_NORM,
1688 MODEL_TENSOR.ATTN_Q,
1689 MODEL_TENSOR.ATTN_K,
1690 MODEL_TENSOR.ATTN_V,
1691 MODEL_TENSOR.ATTN_OUT,
1692 MODEL_TENSOR.FFN_NORM,
1693 MODEL_TENSOR.FFN_GATE,
1694 MODEL_TENSOR.FFN_DOWN,
1695 MODEL_TENSOR.FFN_UP,
1696 ],
1697 MODEL_ARCH.QWEN2VL: [
1698 MODEL_TENSOR.TOKEN_EMBD,
1699 MODEL_TENSOR.OUTPUT_NORM,
1700 MODEL_TENSOR.OUTPUT,
1701 MODEL_TENSOR.ATTN_NORM,
1702 MODEL_TENSOR.ATTN_Q,
1703 MODEL_TENSOR.ATTN_K,
1704 MODEL_TENSOR.ATTN_V,
1705 MODEL_TENSOR.ATTN_OUT,
1706 MODEL_TENSOR.FFN_NORM,
1707 MODEL_TENSOR.FFN_GATE,
1708 MODEL_TENSOR.FFN_DOWN,
1709 MODEL_TENSOR.FFN_UP,
1710 ],
1711 MODEL_ARCH.QWEN2MOE: [
1712 MODEL_TENSOR.TOKEN_EMBD,
1713 MODEL_TENSOR.OUTPUT_NORM,
1714 MODEL_TENSOR.OUTPUT,
1715 MODEL_TENSOR.ATTN_NORM,
1716 MODEL_TENSOR.ATTN_Q,
1717 MODEL_TENSOR.ATTN_K,
1718 MODEL_TENSOR.ATTN_V,
1719 MODEL_TENSOR.ATTN_OUT,
1720 MODEL_TENSOR.FFN_NORM,
1721 MODEL_TENSOR.FFN_GATE_INP,
1722 MODEL_TENSOR.FFN_GATE_EXP,
1723 MODEL_TENSOR.FFN_DOWN_EXP,
1724 MODEL_TENSOR.FFN_UP_EXP,
1725 MODEL_TENSOR.FFN_GATE_INP_SHEXP,
1726 MODEL_TENSOR.FFN_GATE_SHEXP,
1727 MODEL_TENSOR.FFN_DOWN_SHEXP,
1728 MODEL_TENSOR.FFN_UP_SHEXP,
1729 ],
1730 MODEL_ARCH.QWEN3: [
1731 MODEL_TENSOR.TOKEN_EMBD,
1732 MODEL_TENSOR.OUTPUT_NORM,
1733 MODEL_TENSOR.OUTPUT,
1734 MODEL_TENSOR.ROPE_FREQS,
1735 MODEL_TENSOR.ATTN_NORM,
1736 MODEL_TENSOR.ATTN_Q,
1737 MODEL_TENSOR.ATTN_Q_NORM,
1738 MODEL_TENSOR.ATTN_K,
1739 MODEL_TENSOR.ATTN_K_NORM,
1740 MODEL_TENSOR.ATTN_V,
1741 MODEL_TENSOR.ATTN_OUT,
1742 MODEL_TENSOR.FFN_NORM,
1743 MODEL_TENSOR.FFN_GATE,
1744 MODEL_TENSOR.FFN_DOWN,
1745 MODEL_TENSOR.FFN_UP,
1746 ],
1747 MODEL_ARCH.QWEN3MOE: [
1748 MODEL_TENSOR.TOKEN_EMBD,
1749 MODEL_TENSOR.OUTPUT_NORM,
1750 MODEL_TENSOR.OUTPUT,
1751 MODEL_TENSOR.ATTN_NORM,
1752 MODEL_TENSOR.ATTN_Q,
1753 MODEL_TENSOR.ATTN_Q_NORM,
1754 MODEL_TENSOR.ATTN_K,
1755 MODEL_TENSOR.ATTN_K_NORM,
1756 MODEL_TENSOR.ATTN_V,
1757 MODEL_TENSOR.ATTN_OUT,
1758 MODEL_TENSOR.FFN_NORM,
1759 MODEL_TENSOR.FFN_GATE_INP,
1760 MODEL_TENSOR.FFN_GATE_EXP,
1761 MODEL_TENSOR.FFN_DOWN_EXP,
1762 MODEL_TENSOR.FFN_UP_EXP,
1763 ],
1764 MODEL_ARCH.QWEN3NEXT: [
1765 MODEL_TENSOR.TOKEN_EMBD,
1766 MODEL_TENSOR.OUTPUT_NORM,
1767 MODEL_TENSOR.OUTPUT,
1768 MODEL_TENSOR.ATTN_NORM,
1769 MODEL_TENSOR.ATTN_Q,
1770 MODEL_TENSOR.ATTN_Q_NORM,
1771 MODEL_TENSOR.ATTN_K,
1772 MODEL_TENSOR.ATTN_K_NORM,
1773 MODEL_TENSOR.ATTN_V,
1774 MODEL_TENSOR.ATTN_OUT,
1775 MODEL_TENSOR.ATTN_POST_NORM,
1776 MODEL_TENSOR.ATTN_GATE,
1777 MODEL_TENSOR.ATTN_QKV,
1778 MODEL_TENSOR.FFN_GATE_INP,
1779 MODEL_TENSOR.FFN_GATE_INP_SHEXP,
1780 MODEL_TENSOR.FFN_UP_SHEXP,
1781 MODEL_TENSOR.FFN_DOWN_SHEXP,
1782 MODEL_TENSOR.FFN_GATE_SHEXP,
1783 MODEL_TENSOR.FFN_DOWN_EXP,
1784 MODEL_TENSOR.FFN_UP_EXP,
1785 MODEL_TENSOR.FFN_GATE_EXP,
1786 MODEL_TENSOR.SSM_A,
1787 MODEL_TENSOR.SSM_CONV1D,
1788 MODEL_TENSOR.SSM_DT,
1789 MODEL_TENSOR.SSM_NORM,
1790 MODEL_TENSOR.SSM_IN,
1791 MODEL_TENSOR.SSM_BETA_ALPHA,
1792 MODEL_TENSOR.SSM_OUT
1793 ],
1794 MODEL_ARCH.QWEN3VL: [
1795 MODEL_TENSOR.TOKEN_EMBD,
1796 MODEL_TENSOR.OUTPUT_NORM,
1797 MODEL_TENSOR.OUTPUT,
1798 MODEL_TENSOR.ROPE_FREQS,
1799 MODEL_TENSOR.ATTN_NORM,
1800 MODEL_TENSOR.ATTN_Q,
1801 MODEL_TENSOR.ATTN_Q_NORM,
1802 MODEL_TENSOR.ATTN_K,
1803 MODEL_TENSOR.ATTN_K_NORM,
1804 MODEL_TENSOR.ATTN_V,
1805 MODEL_TENSOR.ATTN_OUT,
1806 MODEL_TENSOR.FFN_NORM,
1807 MODEL_TENSOR.FFN_GATE,
1808 MODEL_TENSOR.FFN_DOWN,
1809 MODEL_TENSOR.FFN_UP,
1810 ],
1811 MODEL_ARCH.QWEN3VLMOE: [
1812 MODEL_TENSOR.TOKEN_EMBD,
1813 MODEL_TENSOR.OUTPUT_NORM,
1814 MODEL_TENSOR.OUTPUT,
1815 MODEL_TENSOR.ATTN_NORM,
1816 MODEL_TENSOR.ATTN_Q,
1817 MODEL_TENSOR.ATTN_Q_NORM,
1818 MODEL_TENSOR.ATTN_K,
1819 MODEL_TENSOR.ATTN_K_NORM,
1820 MODEL_TENSOR.ATTN_V,
1821 MODEL_TENSOR.ATTN_OUT,
1822 MODEL_TENSOR.FFN_NORM,
1823 MODEL_TENSOR.FFN_GATE_INP,
1824 MODEL_TENSOR.FFN_GATE_EXP,
1825 MODEL_TENSOR.FFN_DOWN_EXP,
1826 MODEL_TENSOR.FFN_UP_EXP,
1827 ],
1828 MODEL_ARCH.QWEN35: [
1829 MODEL_TENSOR.TOKEN_EMBD,
1830 MODEL_TENSOR.OUTPUT_NORM,
1831 MODEL_TENSOR.OUTPUT,
1832 MODEL_TENSOR.ATTN_NORM,
1833 MODEL_TENSOR.ATTN_Q,
1834 MODEL_TENSOR.ATTN_Q_NORM,
1835 MODEL_TENSOR.ATTN_K,
1836 MODEL_TENSOR.ATTN_K_NORM,
1837 MODEL_TENSOR.ATTN_V,
1838 MODEL_TENSOR.ATTN_OUT,
1839 MODEL_TENSOR.ATTN_POST_NORM,
1840 MODEL_TENSOR.ATTN_GATE,
1841 MODEL_TENSOR.ATTN_QKV,
1842 MODEL_TENSOR.FFN_GATE,
1843 MODEL_TENSOR.FFN_DOWN,
1844 MODEL_TENSOR.FFN_UP,
1845 MODEL_TENSOR.SSM_A,
1846 MODEL_TENSOR.SSM_CONV1D,
1847 MODEL_TENSOR.SSM_DT,
1848 MODEL_TENSOR.SSM_NORM,
1849 MODEL_TENSOR.SSM_BETA,
1850 MODEL_TENSOR.SSM_ALPHA,
1851 MODEL_TENSOR.SSM_OUT
1852 ],
1853 MODEL_ARCH.QWEN35MOE: [
1854 MODEL_TENSOR.TOKEN_EMBD,
1855 MODEL_TENSOR.OUTPUT_NORM,
1856 MODEL_TENSOR.OUTPUT,
1857 MODEL_TENSOR.ATTN_NORM,
1858 MODEL_TENSOR.ATTN_Q,
1859 MODEL_TENSOR.ATTN_Q_NORM,
1860 MODEL_TENSOR.ATTN_K,
1861 MODEL_TENSOR.ATTN_K_NORM,
1862 MODEL_TENSOR.ATTN_V,
1863 MODEL_TENSOR.ATTN_OUT,
1864 MODEL_TENSOR.ATTN_POST_NORM,
1865 MODEL_TENSOR.ATTN_GATE,
1866 MODEL_TENSOR.ATTN_QKV,
1867 MODEL_TENSOR.FFN_GATE_INP,
1868 MODEL_TENSOR.FFN_GATE_INP_SHEXP,
1869 MODEL_TENSOR.FFN_UP_SHEXP,
1870 MODEL_TENSOR.FFN_DOWN_SHEXP,
1871 MODEL_TENSOR.FFN_GATE_SHEXP,
1872 MODEL_TENSOR.FFN_DOWN_EXP,
1873 MODEL_TENSOR.FFN_UP_EXP,
1874 MODEL_TENSOR.FFN_GATE_EXP,
1875 MODEL_TENSOR.SSM_A,
1876 MODEL_TENSOR.SSM_CONV1D,
1877 MODEL_TENSOR.SSM_DT,
1878 MODEL_TENSOR.SSM_NORM,
1879 MODEL_TENSOR.SSM_BETA,
1880 MODEL_TENSOR.SSM_ALPHA,
1881 MODEL_TENSOR.SSM_OUT
1882 ],
1883 MODEL_ARCH.PLAMO: [
1884 MODEL_TENSOR.TOKEN_EMBD,
1885 MODEL_TENSOR.OUTPUT_NORM,
1886 MODEL_TENSOR.OUTPUT,
1887 MODEL_TENSOR.ROPE_FREQS,
1888 MODEL_TENSOR.ATTN_NORM,
1889 MODEL_TENSOR.ATTN_Q,
1890 MODEL_TENSOR.ATTN_K,
1891 MODEL_TENSOR.ATTN_V,
1892 MODEL_TENSOR.ATTN_OUT,
1893 MODEL_TENSOR.ATTN_ROT_EMBD,
1894 MODEL_TENSOR.FFN_GATE,
1895 MODEL_TENSOR.FFN_DOWN,
1896 MODEL_TENSOR.FFN_UP,
1897 ],
1898 MODEL_ARCH.PLAMO2: [
1899 MODEL_TENSOR.TOKEN_EMBD,
1900 MODEL_TENSOR.OUTPUT_NORM,
1901 MODEL_TENSOR.OUTPUT,
1902 MODEL_TENSOR.ROPE_FREQS,
1903 MODEL_TENSOR.ATTN_NORM,
1904 MODEL_TENSOR.ATTN_QKV,
1905 MODEL_TENSOR.ATTN_Q,
1906 MODEL_TENSOR.ATTN_K,
1907 MODEL_TENSOR.ATTN_OUT,
1908 MODEL_TENSOR.ATTN_ROT_EMBD,
1909 MODEL_TENSOR.ATTN_Q_NORM,
1910 MODEL_TENSOR.ATTN_K_NORM,
1911 MODEL_TENSOR.ATTN_POST_NORM,
1912 MODEL_TENSOR.FFN_NORM,
1913 MODEL_TENSOR.FFN_GATE,
1914 MODEL_TENSOR.FFN_DOWN,
1915 MODEL_TENSOR.FFN_UP,
1916 MODEL_TENSOR.FFN_POST_NORM,
1917 MODEL_TENSOR.SSM_IN,
1918 MODEL_TENSOR.SSM_CONV1D,
1919 MODEL_TENSOR.SSM_X,
1920 MODEL_TENSOR.SSM_DT,
1921 MODEL_TENSOR.SSM_A,
1922 MODEL_TENSOR.SSM_D,
1923 MODEL_TENSOR.SSM_OUT,
1924 MODEL_TENSOR.SSM_DT_NORM,
1925 MODEL_TENSOR.SSM_B_NORM,
1926 MODEL_TENSOR.SSM_C_NORM,
1927 ],
1928 MODEL_ARCH.PLAMO3: [
1929 MODEL_TENSOR.TOKEN_EMBD,
1930 MODEL_TENSOR.OUTPUT_NORM,
1931 MODEL_TENSOR.OUTPUT,
1932 MODEL_TENSOR.ATTN_NORM,
1933 MODEL_TENSOR.ATTN_QKV,
1934 MODEL_TENSOR.ATTN_Q_NORM,
1935 MODEL_TENSOR.ATTN_K_NORM,
1936 MODEL_TENSOR.ATTN_OUT,
1937 MODEL_TENSOR.ATTN_POST_NORM,
1938 MODEL_TENSOR.FFN_NORM,
1939 MODEL_TENSOR.FFN_DOWN,
1940 MODEL_TENSOR.FFN_UP,
1941 MODEL_TENSOR.FFN_POST_NORM,
1942 ],
1943 MODEL_ARCH.GPT2: [
1944 MODEL_TENSOR.TOKEN_EMBD,
1945 MODEL_TENSOR.POS_EMBD,
1946 MODEL_TENSOR.OUTPUT_NORM,
1947 MODEL_TENSOR.OUTPUT,
1948 MODEL_TENSOR.ATTN_NORM,
1949 MODEL_TENSOR.ATTN_QKV,
1950 MODEL_TENSOR.ATTN_OUT,
1951 MODEL_TENSOR.FFN_NORM,
1952 MODEL_TENSOR.FFN_DOWN,
1953 MODEL_TENSOR.FFN_UP,
1954 ],
1955 MODEL_ARCH.PHI2: [
1956 MODEL_TENSOR.TOKEN_EMBD,
1957 MODEL_TENSOR.OUTPUT_NORM,
1958 MODEL_TENSOR.OUTPUT,
1959 MODEL_TENSOR.ATTN_NORM,
1960 MODEL_TENSOR.ATTN_QKV,
1961 MODEL_TENSOR.ATTN_Q,
1962 MODEL_TENSOR.ATTN_K,
1963 MODEL_TENSOR.ATTN_V,
1964 MODEL_TENSOR.ATTN_OUT,
1965 MODEL_TENSOR.FFN_NORM,
1966 MODEL_TENSOR.FFN_DOWN,
1967 MODEL_TENSOR.FFN_UP,
1968 ],
1969 MODEL_ARCH.PHI3: [
1970 MODEL_TENSOR.TOKEN_EMBD,
1971 MODEL_TENSOR.OUTPUT_NORM,
1972 MODEL_TENSOR.OUTPUT,
1973 MODEL_TENSOR.ROPE_FACTORS_LONG,
1974 MODEL_TENSOR.ROPE_FACTORS_SHORT,
1975 MODEL_TENSOR.ATTN_NORM,
1976 MODEL_TENSOR.ATTN_QKV,
1977 MODEL_TENSOR.ATTN_Q,
1978 MODEL_TENSOR.ATTN_K,
1979 MODEL_TENSOR.ATTN_V,
1980 MODEL_TENSOR.ATTN_OUT,
1981 MODEL_TENSOR.FFN_NORM,
1982 MODEL_TENSOR.FFN_DOWN,
1983 MODEL_TENSOR.FFN_UP,
1984 ],
1985 MODEL_ARCH.PHIMOE: [
1986 MODEL_TENSOR.TOKEN_EMBD,
1987 MODEL_TENSOR.OUTPUT_NORM,
1988 MODEL_TENSOR.OUTPUT,
1989 MODEL_TENSOR.ROPE_FACTORS_LONG,
1990 MODEL_TENSOR.ROPE_FACTORS_SHORT,
1991 MODEL_TENSOR.ATTN_NORM,
1992 MODEL_TENSOR.ATTN_QKV,
1993 MODEL_TENSOR.ATTN_Q,
1994 MODEL_TENSOR.ATTN_K,
1995 MODEL_TENSOR.ATTN_V,
1996 MODEL_TENSOR.ATTN_OUT,
1997 MODEL_TENSOR.FFN_NORM,
1998 MODEL_TENSOR.FFN_GATE_INP,
1999 MODEL_TENSOR.FFN_GATE_EXP,
2000 MODEL_TENSOR.FFN_DOWN_EXP,
2001 MODEL_TENSOR.FFN_UP_EXP,
2002 ],
2003 MODEL_ARCH.CODESHELL: [
2004 MODEL_TENSOR.TOKEN_EMBD,
2005 MODEL_TENSOR.POS_EMBD,
2006 MODEL_TENSOR.OUTPUT_NORM,
2007 MODEL_TENSOR.OUTPUT,
2008 MODEL_TENSOR.ATTN_NORM,
2009 MODEL_TENSOR.ATTN_QKV,
2010 MODEL_TENSOR.ATTN_OUT,
2011 MODEL_TENSOR.ATTN_ROT_EMBD,
2012 MODEL_TENSOR.FFN_NORM,
2013 MODEL_TENSOR.FFN_DOWN,
2014 MODEL_TENSOR.FFN_UP,
2015 ],
2016 MODEL_ARCH.ORION: [
2017 MODEL_TENSOR.TOKEN_EMBD,
2018 MODEL_TENSOR.OUTPUT_NORM,
2019 MODEL_TENSOR.OUTPUT,
2020 MODEL_TENSOR.ROPE_FREQS,
2021 MODEL_TENSOR.ATTN_NORM,
2022 MODEL_TENSOR.ATTN_Q,
2023 MODEL_TENSOR.ATTN_K,
2024 MODEL_TENSOR.ATTN_V,
2025 MODEL_TENSOR.ATTN_OUT,
2026 MODEL_TENSOR.ATTN_ROT_EMBD,
2027 MODEL_TENSOR.FFN_NORM,
2028 MODEL_TENSOR.FFN_GATE,
2029 MODEL_TENSOR.FFN_DOWN,
2030 MODEL_TENSOR.FFN_UP,
2031 ],
2032 MODEL_ARCH.INTERNLM2: [
2033 MODEL_TENSOR.TOKEN_EMBD,
2034 MODEL_TENSOR.OUTPUT_NORM,
2035 MODEL_TENSOR.OUTPUT,
2036 MODEL_TENSOR.ATTN_NORM,
2037 MODEL_TENSOR.ATTN_Q,
2038 MODEL_TENSOR.ATTN_K,
2039 MODEL_TENSOR.ATTN_V,
2040 MODEL_TENSOR.ATTN_OUT,
2041 MODEL_TENSOR.ATTN_ROT_EMBD,
2042 MODEL_TENSOR.FFN_NORM,
2043 MODEL_TENSOR.FFN_GATE,
2044 MODEL_TENSOR.FFN_DOWN,
2045 MODEL_TENSOR.FFN_UP,
2046 ],
2047 MODEL_ARCH.MINICPM: [
2048 MODEL_TENSOR.TOKEN_EMBD,
2049 MODEL_TENSOR.OUTPUT,
2050 MODEL_TENSOR.OUTPUT_NORM,
2051 MODEL_TENSOR.ROPE_FREQS,
2052 MODEL_TENSOR.ROPE_FACTORS_LONG,
2053 MODEL_TENSOR.ROPE_FACTORS_SHORT,
2054 MODEL_TENSOR.ATTN_NORM,
2055 MODEL_TENSOR.ATTN_Q,
2056 MODEL_TENSOR.ATTN_K,
2057 MODEL_TENSOR.ATTN_V,
2058 MODEL_TENSOR.ATTN_OUT,
2059 MODEL_TENSOR.ATTN_ROT_EMBD,
2060 MODEL_TENSOR.FFN_GATE_INP,
2061 MODEL_TENSOR.FFN_NORM,
2062 MODEL_TENSOR.FFN_GATE,
2063 MODEL_TENSOR.FFN_DOWN,
2064 MODEL_TENSOR.FFN_UP,
2065 MODEL_TENSOR.FFN_GATE_EXP,
2066 MODEL_TENSOR.FFN_DOWN_EXP,
2067 MODEL_TENSOR.FFN_UP_EXP,
2068 ],
2069 MODEL_ARCH.MINICPM3: [
2070 MODEL_TENSOR.TOKEN_EMBD,
2071 MODEL_TENSOR.OUTPUT_NORM,
2072 MODEL_TENSOR.OUTPUT,
2073 MODEL_TENSOR.ROPE_FACTORS_LONG,
2074 MODEL_TENSOR.ROPE_FACTORS_SHORT,
2075 MODEL_TENSOR.ATTN_NORM,
2076 MODEL_TENSOR.ATTN_Q_A,
2077 MODEL_TENSOR.ATTN_Q_B,
2078 MODEL_TENSOR.ATTN_KV_A_MQA,
2079 MODEL_TENSOR.ATTN_KV_B,
2080 MODEL_TENSOR.ATTN_Q_A_NORM,
2081 MODEL_TENSOR.ATTN_KV_A_NORM,
2082 MODEL_TENSOR.ATTN_OUT,
2083 MODEL_TENSOR.FFN_NORM,
2084 MODEL_TENSOR.FFN_GATE,
2085 MODEL_TENSOR.FFN_DOWN,
2086 MODEL_TENSOR.FFN_UP,
2087 ],
2088 MODEL_ARCH.GEMMA: [
2089 MODEL_TENSOR.TOKEN_EMBD,
2090 MODEL_TENSOR.OUTPUT_NORM,
2091 MODEL_TENSOR.ATTN_NORM,
2092 MODEL_TENSOR.ATTN_Q,
2093 MODEL_TENSOR.ATTN_K,
2094 MODEL_TENSOR.ATTN_V,
2095 MODEL_TENSOR.ATTN_OUT,
2096 MODEL_TENSOR.FFN_GATE,
2097 MODEL_TENSOR.FFN_DOWN,
2098 MODEL_TENSOR.FFN_UP,
2099 MODEL_TENSOR.FFN_NORM,
2100 ],
2101 MODEL_ARCH.GEMMA2: [
2102 MODEL_TENSOR.TOKEN_EMBD,
2103 MODEL_TENSOR.OUTPUT_NORM,
2104 MODEL_TENSOR.ATTN_Q,
2105 MODEL_TENSOR.ATTN_K,
2106 MODEL_TENSOR.ATTN_V,
2107 MODEL_TENSOR.ATTN_OUT,
2108 MODEL_TENSOR.FFN_GATE,
2109 MODEL_TENSOR.FFN_DOWN,
2110 MODEL_TENSOR.FFN_UP,
2111 MODEL_TENSOR.ATTN_NORM,
2112 MODEL_TENSOR.ATTN_POST_NORM,
2113 MODEL_TENSOR.FFN_PRE_NORM,
2114 MODEL_TENSOR.FFN_POST_NORM,
2115 ],
2116 MODEL_ARCH.GEMMA3: [
2117 MODEL_TENSOR.TOKEN_EMBD,
2118 MODEL_TENSOR.OUTPUT,
2119 MODEL_TENSOR.OUTPUT_NORM,
2120 MODEL_TENSOR.ATTN_Q,
2121 MODEL_TENSOR.ATTN_Q_NORM,
2122 MODEL_TENSOR.ATTN_K,
2123 MODEL_TENSOR.ATTN_K_NORM,
2124 MODEL_TENSOR.ATTN_V,
2125 MODEL_TENSOR.ATTN_OUT,
2126 MODEL_TENSOR.FFN_GATE,
2127 MODEL_TENSOR.FFN_DOWN,
2128 MODEL_TENSOR.FFN_UP,
2129 MODEL_TENSOR.ATTN_NORM,
2130 MODEL_TENSOR.ATTN_POST_NORM,
2131 MODEL_TENSOR.FFN_PRE_NORM,
2132 MODEL_TENSOR.FFN_POST_NORM,
2133 ],
2134 MODEL_ARCH.GEMMA3N: [
2135 MODEL_TENSOR.TOKEN_EMBD,
2136 MODEL_TENSOR.OUTPUT,
2137 MODEL_TENSOR.OUTPUT_NORM,
2138 MODEL_TENSOR.ATTN_Q,
2139 MODEL_TENSOR.ATTN_Q_NORM,
2140 MODEL_TENSOR.ATTN_K,
2141 MODEL_TENSOR.ATTN_K_NORM,
2142 MODEL_TENSOR.ATTN_V,
2143 MODEL_TENSOR.ATTN_OUT,
2144 MODEL_TENSOR.FFN_GATE,
2145 MODEL_TENSOR.FFN_DOWN,
2146 MODEL_TENSOR.FFN_UP,
2147 MODEL_TENSOR.ATTN_NORM,
2148 MODEL_TENSOR.ATTN_POST_NORM,
2149 MODEL_TENSOR.FFN_PRE_NORM,
2150 MODEL_TENSOR.FFN_POST_NORM,
2151 # altup / laurel
2152 MODEL_TENSOR.PER_LAYER_TOKEN_EMBD,
2153 MODEL_TENSOR.PER_LAYER_MODEL_PROJ,
2154 MODEL_TENSOR.PER_LAYER_INP_GATE,
2155 MODEL_TENSOR.PER_LAYER_PROJ,
2156 MODEL_TENSOR.PER_LAYER_PROJ_NORM,
2157 MODEL_TENSOR.PER_LAYER_POST_NORM,
2158 MODEL_TENSOR.ALTUP_PROJ,
2159 MODEL_TENSOR.ALTUP_UNEMBD_PROJ,
2160 MODEL_TENSOR.ALTUP_CORRECT_COEF,
2161 MODEL_TENSOR.ALTUP_CORRECT_SCALE,
2162 MODEL_TENSOR.ALTUP_PREDICT_COEF,
2163 MODEL_TENSOR.ALTUP_ROUTER,
2164 MODEL_TENSOR.ALTUP_ROUTER_NORM,
2165 MODEL_TENSOR.LAUREL_L,
2166 MODEL_TENSOR.LAUREL_R,
2167 MODEL_TENSOR.LAUREL_POST_NORM,
2168 ],
2169 MODEL_ARCH.GEMMA_EMBEDDING: [
2170 MODEL_TENSOR.TOKEN_EMBD,
2171 MODEL_TENSOR.OUTPUT,
2172 MODEL_TENSOR.DENSE_2_OUT,
2173 MODEL_TENSOR.DENSE_3_OUT,
2174 MODEL_TENSOR.OUTPUT_NORM,
2175 MODEL_TENSOR.ATTN_Q,
2176 MODEL_TENSOR.ATTN_Q_NORM,
2177 MODEL_TENSOR.ATTN_K,
2178 MODEL_TENSOR.ATTN_K_NORM,
2179 MODEL_TENSOR.ATTN_V,
2180 MODEL_TENSOR.ATTN_OUT,
2181 MODEL_TENSOR.FFN_GATE,
2182 MODEL_TENSOR.FFN_DOWN,
2183 MODEL_TENSOR.FFN_UP,
2184 MODEL_TENSOR.ATTN_NORM,
2185 MODEL_TENSOR.ATTN_POST_NORM,
2186 MODEL_TENSOR.FFN_PRE_NORM,
2187 MODEL_TENSOR.FFN_POST_NORM,
2188 ],
2189 MODEL_ARCH.STARCODER2: [
2190 MODEL_TENSOR.TOKEN_EMBD,
2191 MODEL_TENSOR.OUTPUT_NORM,
2192 MODEL_TENSOR.OUTPUT,
2193 MODEL_TENSOR.ROPE_FREQS,
2194 MODEL_TENSOR.ATTN_NORM,
2195 MODEL_TENSOR.ATTN_Q,
2196 MODEL_TENSOR.ATTN_K,
2197 MODEL_TENSOR.ATTN_V,
2198 MODEL_TENSOR.ATTN_OUT,
2199 MODEL_TENSOR.ATTN_ROT_EMBD,
2200 MODEL_TENSOR.FFN_NORM,
2201 MODEL_TENSOR.FFN_DOWN,
2202 MODEL_TENSOR.FFN_UP,
2203 ],
2204 MODEL_ARCH.RWKV6: [
2205 MODEL_TENSOR.TOKEN_EMBD,
2206 MODEL_TENSOR.TOKEN_EMBD_NORM,
2207 MODEL_TENSOR.OUTPUT_NORM,
2208 MODEL_TENSOR.OUTPUT,
2209 MODEL_TENSOR.ATTN_NORM,
2210 MODEL_TENSOR.ATTN_NORM_2,
2211 MODEL_TENSOR.TIME_MIX_W1,
2212 MODEL_TENSOR.TIME_MIX_W2,
2213 MODEL_TENSOR.TIME_MIX_LERP_X,
2214 MODEL_TENSOR.TIME_MIX_LERP_K,
2215 MODEL_TENSOR.TIME_MIX_LERP_V,
2216 MODEL_TENSOR.TIME_MIX_LERP_R,
2217 MODEL_TENSOR.TIME_MIX_LERP_G,
2218 MODEL_TENSOR.TIME_MIX_LERP_W,
2219 MODEL_TENSOR.TIME_MIX_LERP_FUSED,
2220 MODEL_TENSOR.TIME_MIX_FIRST,
2221 MODEL_TENSOR.TIME_MIX_DECAY,
2222 MODEL_TENSOR.TIME_MIX_DECAY_W1,
2223 MODEL_TENSOR.TIME_MIX_DECAY_W2,
2224 MODEL_TENSOR.TIME_MIX_KEY,
2225 MODEL_TENSOR.TIME_MIX_VALUE,
2226 MODEL_TENSOR.TIME_MIX_RECEPTANCE,
2227 MODEL_TENSOR.TIME_MIX_GATE,
2228 MODEL_TENSOR.TIME_MIX_LN,
2229 MODEL_TENSOR.TIME_MIX_OUTPUT,
2230 MODEL_TENSOR.CHANNEL_MIX_LERP_K,
2231 MODEL_TENSOR.CHANNEL_MIX_LERP_R,
2232 MODEL_TENSOR.CHANNEL_MIX_KEY,
2233 MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
2234 MODEL_TENSOR.CHANNEL_MIX_VALUE,
2235 ],
2236 MODEL_ARCH.RWKV6QWEN2: [
2237 MODEL_TENSOR.TOKEN_EMBD,
2238 MODEL_TENSOR.OUTPUT_NORM,
2239 MODEL_TENSOR.OUTPUT,
2240 MODEL_TENSOR.ATTN_NORM,
2241 MODEL_TENSOR.TIME_MIX_W1,
2242 MODEL_TENSOR.TIME_MIX_W2,
2243 MODEL_TENSOR.TIME_MIX_LERP_X,
2244 MODEL_TENSOR.TIME_MIX_LERP_K,
2245 MODEL_TENSOR.TIME_MIX_LERP_V,
2246 MODEL_TENSOR.TIME_MIX_LERP_R,
2247 MODEL_TENSOR.TIME_MIX_LERP_G,
2248 MODEL_TENSOR.TIME_MIX_LERP_W,
2249 MODEL_TENSOR.TIME_MIX_LERP_FUSED,
2250 MODEL_TENSOR.TIME_MIX_FIRST,
2251 MODEL_TENSOR.TIME_MIX_DECAY,
2252 MODEL_TENSOR.TIME_MIX_DECAY_W1,
2253 MODEL_TENSOR.TIME_MIX_DECAY_W2,
2254 MODEL_TENSOR.TIME_MIX_KEY,
2255 MODEL_TENSOR.TIME_MIX_VALUE,
2256 MODEL_TENSOR.TIME_MIX_RECEPTANCE,
2257 MODEL_TENSOR.TIME_MIX_GATE,
2258 MODEL_TENSOR.TIME_MIX_LN,
2259 MODEL_TENSOR.TIME_MIX_OUTPUT,
2260 MODEL_TENSOR.FFN_NORM,
2261 MODEL_TENSOR.FFN_GATE,
2262 MODEL_TENSOR.FFN_DOWN,
2263 MODEL_TENSOR.FFN_UP,
2264 ],
2265 MODEL_ARCH.RWKV7: [
2266 MODEL_TENSOR.TOKEN_EMBD,
2267 MODEL_TENSOR.TOKEN_EMBD_NORM,
2268 MODEL_TENSOR.OUTPUT_NORM,
2269 MODEL_TENSOR.OUTPUT,
2270 MODEL_TENSOR.ATTN_NORM,
2271 MODEL_TENSOR.ATTN_NORM_2,
2272 MODEL_TENSOR.TIME_MIX_LERP_FUSED,
2273 MODEL_TENSOR.TIME_MIX_W0,
2274 MODEL_TENSOR.TIME_MIX_W1,
2275 MODEL_TENSOR.TIME_MIX_W2,
2276 MODEL_TENSOR.TIME_MIX_A0,
2277 MODEL_TENSOR.TIME_MIX_A1,
2278 MODEL_TENSOR.TIME_MIX_A2,
2279 MODEL_TENSOR.TIME_MIX_V0,
2280 MODEL_TENSOR.TIME_MIX_V1,
2281 MODEL_TENSOR.TIME_MIX_V2,
2282 MODEL_TENSOR.TIME_MIX_G1,
2283 MODEL_TENSOR.TIME_MIX_G2,
2284 MODEL_TENSOR.TIME_MIX_K_K,
2285 MODEL_TENSOR.TIME_MIX_K_A,
2286 MODEL_TENSOR.TIME_MIX_R_K,
2287 MODEL_TENSOR.TIME_MIX_KEY,
2288 MODEL_TENSOR.TIME_MIX_VALUE,
2289 MODEL_TENSOR.TIME_MIX_RECEPTANCE,
2290 MODEL_TENSOR.TIME_MIX_LN,
2291 MODEL_TENSOR.TIME_MIX_OUTPUT,
2292 MODEL_TENSOR.CHANNEL_MIX_LERP_K,
2293 MODEL_TENSOR.CHANNEL_MIX_KEY,
2294 MODEL_TENSOR.CHANNEL_MIX_VALUE,
2295 ],
2296 MODEL_ARCH.ARWKV7: [
2297 MODEL_TENSOR.TOKEN_EMBD,
2298 MODEL_TENSOR.TOKEN_EMBD_NORM,
2299 MODEL_TENSOR.OUTPUT_NORM,
2300 MODEL_TENSOR.OUTPUT,
2301 MODEL_TENSOR.ATTN_NORM,
2302 MODEL_TENSOR.TIME_MIX_LERP_FUSED,
2303 MODEL_TENSOR.TIME_MIX_W0,
2304 MODEL_TENSOR.TIME_MIX_W1,
2305 MODEL_TENSOR.TIME_MIX_W2,
2306 MODEL_TENSOR.TIME_MIX_A0,
2307 MODEL_TENSOR.TIME_MIX_A1,
2308 MODEL_TENSOR.TIME_MIX_A2,
2309 MODEL_TENSOR.TIME_MIX_V0,
2310 MODEL_TENSOR.TIME_MIX_V1,
2311 MODEL_TENSOR.TIME_MIX_V2,
2312 MODEL_TENSOR.TIME_MIX_G1,
2313 MODEL_TENSOR.TIME_MIX_G2,
2314 MODEL_TENSOR.TIME_MIX_K_K,
2315 MODEL_TENSOR.TIME_MIX_K_A,
2316 MODEL_TENSOR.TIME_MIX_R_K,
2317 MODEL_TENSOR.TIME_MIX_KEY,
2318 MODEL_TENSOR.TIME_MIX_VALUE,
2319 MODEL_TENSOR.TIME_MIX_RECEPTANCE,
2320 MODEL_TENSOR.TIME_MIX_LN,
2321 MODEL_TENSOR.TIME_MIX_OUTPUT,
2322 MODEL_TENSOR.FFN_NORM,
2323 MODEL_TENSOR.FFN_GATE,
2324 MODEL_TENSOR.FFN_DOWN,
2325 MODEL_TENSOR.FFN_UP,
2326 ],
2327 MODEL_ARCH.MAMBA: [
2328 MODEL_TENSOR.TOKEN_EMBD,
2329 MODEL_TENSOR.OUTPUT_NORM,
2330 MODEL_TENSOR.OUTPUT,
2331 MODEL_TENSOR.ATTN_NORM,
2332 MODEL_TENSOR.SSM_IN,
2333 MODEL_TENSOR.SSM_CONV1D,
2334 MODEL_TENSOR.SSM_X,
2335 MODEL_TENSOR.SSM_DT,
2336 MODEL_TENSOR.SSM_A,
2337 MODEL_TENSOR.SSM_D,
2338 MODEL_TENSOR.SSM_OUT,
2339 ],
2340 MODEL_ARCH.MAMBA2: [
2341 MODEL_TENSOR.TOKEN_EMBD,
2342 MODEL_TENSOR.OUTPUT_NORM,
2343 MODEL_TENSOR.OUTPUT,
2344 MODEL_TENSOR.ATTN_NORM,
2345 MODEL_TENSOR.SSM_IN,
2346 MODEL_TENSOR.SSM_CONV1D,
2347 MODEL_TENSOR.SSM_DT,
2348 MODEL_TENSOR.SSM_A,
2349 MODEL_TENSOR.SSM_D,
2350 MODEL_TENSOR.SSM_NORM,
2351 MODEL_TENSOR.SSM_OUT,
2352 ],
2353 MODEL_ARCH.JAMBA: [
2354 MODEL_TENSOR.TOKEN_EMBD,
2355 MODEL_TENSOR.OUTPUT_NORM,
2356 MODEL_TENSOR.OUTPUT,
2357 MODEL_TENSOR.ATTN_NORM,
2358 MODEL_TENSOR.ATTN_Q,
2359 MODEL_TENSOR.ATTN_K,
2360 MODEL_TENSOR.ATTN_V,
2361 MODEL_TENSOR.ATTN_OUT,
2362 MODEL_TENSOR.SSM_IN,
2363 MODEL_TENSOR.SSM_CONV1D,
2364 MODEL_TENSOR.SSM_X,
2365 MODEL_TENSOR.SSM_DT,
2366 MODEL_TENSOR.SSM_DT_NORM,
2367 MODEL_TENSOR.SSM_A,
2368 MODEL_TENSOR.SSM_B_NORM,
2369 MODEL_TENSOR.SSM_C_NORM,
2370 MODEL_TENSOR.SSM_D,
2371 MODEL_TENSOR.SSM_OUT,
2372 MODEL_TENSOR.FFN_GATE_INP,
2373 MODEL_TENSOR.FFN_NORM,
2374 MODEL_TENSOR.FFN_GATE,
2375 MODEL_TENSOR.FFN_DOWN,
2376 MODEL_TENSOR.FFN_UP,
2377 MODEL_TENSOR.FFN_GATE_EXP,
2378 MODEL_TENSOR.FFN_DOWN_EXP,
2379 MODEL_TENSOR.FFN_UP_EXP,
2380 ],
2381 MODEL_ARCH.XVERSE: [
2382 MODEL_TENSOR.TOKEN_EMBD,
2383 MODEL_TENSOR.OUTPUT_NORM,
2384 MODEL_TENSOR.OUTPUT,
2385 MODEL_TENSOR.ROPE_FREQS,
2386 MODEL_TENSOR.ATTN_NORM,
2387 MODEL_TENSOR.ATTN_Q,
2388 MODEL_TENSOR.ATTN_K,
2389 MODEL_TENSOR.ATTN_V,
2390 MODEL_TENSOR.ATTN_OUT,
2391 MODEL_TENSOR.ATTN_ROT_EMBD,
2392 MODEL_TENSOR.FFN_NORM,
2393 MODEL_TENSOR.FFN_GATE,
2394 MODEL_TENSOR.FFN_DOWN,
2395 MODEL_TENSOR.FFN_UP,
2396 ],
2397 MODEL_ARCH.COMMAND_R: [
2398 MODEL_TENSOR.TOKEN_EMBD,
2399 MODEL_TENSOR.OUTPUT_NORM,
2400 MODEL_TENSOR.ATTN_NORM,
2401 MODEL_TENSOR.ATTN_Q,
2402 MODEL_TENSOR.ATTN_K,
2403 MODEL_TENSOR.ATTN_V,
2404 MODEL_TENSOR.ATTN_OUT,
2405 MODEL_TENSOR.FFN_GATE,
2406 MODEL_TENSOR.FFN_DOWN,
2407 MODEL_TENSOR.FFN_UP,
2408 MODEL_TENSOR.ATTN_K_NORM,
2409 MODEL_TENSOR.ATTN_Q_NORM,
2410 ],
2411 MODEL_ARCH.COHERE2: [
2412 MODEL_TENSOR.TOKEN_EMBD,
2413 MODEL_TENSOR.OUTPUT_NORM,
2414 MODEL_TENSOR.ATTN_NORM,
2415 MODEL_TENSOR.ATTN_Q,
2416 MODEL_TENSOR.ATTN_K,
2417 MODEL_TENSOR.ATTN_V,
2418 MODEL_TENSOR.ATTN_OUT,
2419 MODEL_TENSOR.FFN_GATE,
2420 MODEL_TENSOR.FFN_DOWN,
2421 MODEL_TENSOR.FFN_UP,
2422 ],
2423 MODEL_ARCH.DBRX: [
2424 MODEL_TENSOR.TOKEN_EMBD,
2425 MODEL_TENSOR.OUTPUT_NORM,
2426 MODEL_TENSOR.OUTPUT,
2427 MODEL_TENSOR.ATTN_NORM,
2428 MODEL_TENSOR.ATTN_QKV,
2429 MODEL_TENSOR.ATTN_OUT,
2430 MODEL_TENSOR.ATTN_OUT_NORM,
2431 MODEL_TENSOR.FFN_GATE_INP,
2432 MODEL_TENSOR.FFN_GATE_EXP,
2433 MODEL_TENSOR.FFN_DOWN_EXP,
2434 MODEL_TENSOR.FFN_UP_EXP,
2435 ],
2436 MODEL_ARCH.OLMO: [
2437 MODEL_TENSOR.TOKEN_EMBD,
2438 MODEL_TENSOR.OUTPUT,
2439 MODEL_TENSOR.ATTN_Q,
2440 MODEL_TENSOR.ATTN_K,
2441 MODEL_TENSOR.ATTN_V,
2442 MODEL_TENSOR.ATTN_OUT,
2443 MODEL_TENSOR.FFN_GATE,
2444 MODEL_TENSOR.FFN_DOWN,
2445 MODEL_TENSOR.FFN_UP,
2446 ],
2447 MODEL_ARCH.OLMO2: [
2448 MODEL_TENSOR.TOKEN_EMBD,
2449 MODEL_TENSOR.OUTPUT_NORM,
2450 MODEL_TENSOR.OUTPUT,
2451 MODEL_TENSOR.ATTN_Q,
2452 MODEL_TENSOR.ATTN_K,
2453 MODEL_TENSOR.ATTN_V,
2454 MODEL_TENSOR.ATTN_OUT,
2455 MODEL_TENSOR.ATTN_POST_NORM,
2456 MODEL_TENSOR.ATTN_Q_NORM,
2457 MODEL_TENSOR.ATTN_K_NORM,
2458 MODEL_TENSOR.FFN_POST_NORM,
2459 MODEL_TENSOR.FFN_GATE,
2460 MODEL_TENSOR.FFN_DOWN,
2461 MODEL_TENSOR.FFN_UP,
2462 ],
2463 MODEL_ARCH.SEED_OSS: [
2464 MODEL_TENSOR.TOKEN_EMBD,
2465 MODEL_TENSOR.ATTN_NORM,
2466 MODEL_TENSOR.ATTN_Q,
2467 MODEL_TENSOR.ATTN_K,
2468 MODEL_TENSOR.ATTN_V,
2469 MODEL_TENSOR.ATTN_OUT,
2470 MODEL_TENSOR.ATTN_POST_NORM,
2471 MODEL_TENSOR.FFN_GATE,
2472 MODEL_TENSOR.FFN_DOWN,
2473 MODEL_TENSOR.FFN_UP,
2474 MODEL_TENSOR.OUTPUT_NORM,
2475 MODEL_TENSOR.OUTPUT,
2476 ],
2477 MODEL_ARCH.OLMOE: [
2478 MODEL_TENSOR.TOKEN_EMBD,
2479 MODEL_TENSOR.OUTPUT_NORM,
2480 MODEL_TENSOR.OUTPUT,
2481 MODEL_TENSOR.ATTN_OUT,
2482 MODEL_TENSOR.ATTN_Q,
2483 MODEL_TENSOR.ATTN_K,
2484 MODEL_TENSOR.ATTN_V,
2485 MODEL_TENSOR.ATTN_NORM,
2486 MODEL_TENSOR.ATTN_Q_NORM,
2487 MODEL_TENSOR.ATTN_K_NORM,
2488 MODEL_TENSOR.FFN_NORM,
2489 MODEL_TENSOR.FFN_GATE_INP,
2490 MODEL_TENSOR.FFN_GATE_EXP,
2491 MODEL_TENSOR.FFN_UP_EXP,
2492 MODEL_TENSOR.FFN_DOWN_EXP,
2493 ],
2494 MODEL_ARCH.OPENELM: [
2495 MODEL_TENSOR.TOKEN_EMBD,
2496 MODEL_TENSOR.OUTPUT_NORM,
2497 MODEL_TENSOR.ATTN_NORM,
2498 MODEL_TENSOR.ATTN_QKV,
2499 MODEL_TENSOR.ATTN_Q_NORM,
2500 MODEL_TENSOR.ATTN_K_NORM,
2501 MODEL_TENSOR.ATTN_OUT,
2502 MODEL_TENSOR.FFN_NORM,
2503 MODEL_TENSOR.FFN_GATE,
2504 MODEL_TENSOR.FFN_DOWN,
2505 MODEL_TENSOR.FFN_UP,
2506 ],
2507 MODEL_ARCH.ARCTIC: [
2508 MODEL_TENSOR.TOKEN_EMBD,
2509 MODEL_TENSOR.OUTPUT_NORM,
2510 MODEL_TENSOR.OUTPUT,
2511 MODEL_TENSOR.ROPE_FREQS,
2512 MODEL_TENSOR.ATTN_NORM,
2513 MODEL_TENSOR.ATTN_Q,
2514 MODEL_TENSOR.ATTN_K,
2515 MODEL_TENSOR.ATTN_V,
2516 MODEL_TENSOR.ATTN_OUT,
2517 MODEL_TENSOR.ATTN_ROT_EMBD,
2518 MODEL_TENSOR.FFN_GATE_INP,
2519 MODEL_TENSOR.FFN_NORM,
2520 MODEL_TENSOR.FFN_GATE,
2521 MODEL_TENSOR.FFN_DOWN,
2522 MODEL_TENSOR.FFN_UP,
2523 MODEL_TENSOR.FFN_NORM_EXP,
2524 MODEL_TENSOR.FFN_GATE_EXP,
2525 MODEL_TENSOR.FFN_DOWN_EXP,
2526 MODEL_TENSOR.FFN_UP_EXP,
2527 ],
2528 MODEL_ARCH.DEEPSEEK: [
2529 MODEL_TENSOR.TOKEN_EMBD,
2530 MODEL_TENSOR.OUTPUT_NORM,
2531 MODEL_TENSOR.OUTPUT,
2532 MODEL_TENSOR.ROPE_FREQS,
2533 MODEL_TENSOR.ATTN_NORM,
2534 MODEL_TENSOR.ATTN_Q,
2535 MODEL_TENSOR.ATTN_K,
2536 MODEL_TENSOR.ATTN_V,
2537 MODEL_TENSOR.ATTN_OUT,
2538 MODEL_TENSOR.ATTN_ROT_EMBD,
2539 MODEL_TENSOR.FFN_GATE_INP,
2540 MODEL_TENSOR.FFN_NORM,
2541 MODEL_TENSOR.FFN_GATE,
2542 MODEL_TENSOR.FFN_DOWN,
2543 MODEL_TENSOR.FFN_UP,
2544 MODEL_TENSOR.FFN_GATE_EXP,
2545 MODEL_TENSOR.FFN_DOWN_EXP,
2546 MODEL_TENSOR.FFN_UP_EXP,
2547 MODEL_TENSOR.FFN_GATE_SHEXP,
2548 MODEL_TENSOR.FFN_DOWN_SHEXP,
2549 MODEL_TENSOR.FFN_UP_SHEXP,
2550 ],
2551 MODEL_ARCH.DEEPSEEK2: [
2552 MODEL_TENSOR.TOKEN_EMBD,
2553 MODEL_TENSOR.OUTPUT_NORM,
2554 MODEL_TENSOR.OUTPUT,
2555 MODEL_TENSOR.ROPE_FREQS,
2556 MODEL_TENSOR.ATTN_NORM,
2557 MODEL_TENSOR.ATTN_Q,
2558 MODEL_TENSOR.ATTN_Q_A,
2559 MODEL_TENSOR.ATTN_Q_B,
2560 MODEL_TENSOR.ATTN_KV_A_MQA,
2561 MODEL_TENSOR.ATTN_KV_B,
2562 MODEL_TENSOR.ATTN_K_B,
2563 MODEL_TENSOR.ATTN_V_B,
2564 MODEL_TENSOR.ATTN_Q_A_NORM,
2565 MODEL_TENSOR.ATTN_KV_A_NORM,
2566 MODEL_TENSOR.ATTN_OUT,
2567 MODEL_TENSOR.ATTN_ROT_EMBD,
2568 MODEL_TENSOR.FFN_GATE_INP,
2569 MODEL_TENSOR.FFN_NORM,
2570 MODEL_TENSOR.FFN_GATE,
2571 MODEL_TENSOR.FFN_DOWN,
2572 MODEL_TENSOR.FFN_UP,
2573 MODEL_TENSOR.FFN_GATE_EXP,
2574 MODEL_TENSOR.FFN_DOWN_EXP,
2575 MODEL_TENSOR.FFN_UP_EXP,
2576 MODEL_TENSOR.FFN_GATE_SHEXP,
2577 MODEL_TENSOR.FFN_DOWN_SHEXP,
2578 MODEL_TENSOR.FFN_UP_SHEXP,
2579 MODEL_TENSOR.FFN_EXP_PROBS_B,
2580 ],
2581 MODEL_ARCH.ERNIE4_5_MOE: [
2582 MODEL_TENSOR.TOKEN_EMBD,
2583 MODEL_TENSOR.OUTPUT_NORM,
2584 MODEL_TENSOR.OUTPUT,
2585 MODEL_TENSOR.ATTN_NORM,
2586 MODEL_TENSOR.ATTN_Q,
2587 MODEL_TENSOR.ATTN_K,
2588 MODEL_TENSOR.ATTN_V,
2589 MODEL_TENSOR.ATTN_OUT,
2590 MODEL_TENSOR.FFN_NORM,
2591 MODEL_TENSOR.FFN_GATE,
2592 MODEL_TENSOR.FFN_DOWN,
2593 MODEL_TENSOR.FFN_UP,
2594 MODEL_TENSOR.FFN_GATE_INP,
2595 MODEL_TENSOR.FFN_GATE_EXP,
2596 MODEL_TENSOR.FFN_DOWN_EXP,
2597 MODEL_TENSOR.FFN_UP_EXP,
2598 MODEL_TENSOR.FFN_GATE_SHEXP,
2599 MODEL_TENSOR.FFN_DOWN_SHEXP,
2600 MODEL_TENSOR.FFN_UP_SHEXP,
2601 MODEL_TENSOR.FFN_EXP_PROBS_B,
2602 ],
2603 MODEL_ARCH.PLM: [
2604 MODEL_TENSOR.TOKEN_EMBD,
2605 MODEL_TENSOR.OUTPUT,
2606 MODEL_TENSOR.OUTPUT_NORM,
2607 MODEL_TENSOR.ATTN_NORM,
2608 MODEL_TENSOR.ATTN_Q,
2609 MODEL_TENSOR.ATTN_KV_A_MQA,
2610 MODEL_TENSOR.ATTN_KV_A_NORM,
2611 MODEL_TENSOR.ATTN_KV_B,
2612 MODEL_TENSOR.ATTN_OUT,
2613 MODEL_TENSOR.FFN_NORM,
2614 MODEL_TENSOR.FFN_UP,
2615 MODEL_TENSOR.FFN_DOWN,
2616 ],
2617 MODEL_ARCH.CHATGLM : [
2618 MODEL_TENSOR.TOKEN_EMBD,
2619 MODEL_TENSOR.ROPE_FREQS,
2620 MODEL_TENSOR.OUTPUT_NORM,
2621 MODEL_TENSOR.OUTPUT,
2622 MODEL_TENSOR.ATTN_NORM,
2623 MODEL_TENSOR.ATTN_QKV,
2624 MODEL_TENSOR.ATTN_Q,
2625 MODEL_TENSOR.ATTN_K,
2626 MODEL_TENSOR.ATTN_V,
2627 MODEL_TENSOR.ATTN_OUT,
2628 MODEL_TENSOR.FFN_NORM,
2629 MODEL_TENSOR.FFN_DOWN,
2630 MODEL_TENSOR.FFN_UP,
2631 ],
2632 MODEL_ARCH.GLM4 : [
2633 MODEL_TENSOR.TOKEN_EMBD,
2634 MODEL_TENSOR.ROPE_FREQS,
2635 MODEL_TENSOR.OUTPUT_NORM,
2636 MODEL_TENSOR.OUTPUT,
2637 MODEL_TENSOR.ATTN_NORM,
2638 MODEL_TENSOR.ATTN_QKV,
2639 MODEL_TENSOR.ATTN_Q,
2640 MODEL_TENSOR.ATTN_K,
2641 MODEL_TENSOR.ATTN_V,
2642 MODEL_TENSOR.ATTN_OUT,
2643 MODEL_TENSOR.FFN_NORM,
2644 MODEL_TENSOR.FFN_DOWN,
2645 MODEL_TENSOR.FFN_UP,
2646 MODEL_TENSOR.ATTN_POST_NORM,
2647 MODEL_TENSOR.FFN_POST_NORM,
2648 ],
2649 MODEL_ARCH.GLM4_MOE: [
2650 MODEL_TENSOR.TOKEN_EMBD,
2651 MODEL_TENSOR.OUTPUT_NORM,
2652 MODEL_TENSOR.OUTPUT,
2653 MODEL_TENSOR.ATTN_NORM,
2654 MODEL_TENSOR.ATTN_POST_NORM,
2655 MODEL_TENSOR.ATTN_Q,
2656 MODEL_TENSOR.ATTN_K,
2657 MODEL_TENSOR.ATTN_V,
2658 MODEL_TENSOR.ATTN_OUT,
2659 MODEL_TENSOR.ATTN_Q_NORM,
2660 MODEL_TENSOR.ATTN_K_NORM,
2661 MODEL_TENSOR.FFN_GATE,
2662 MODEL_TENSOR.FFN_DOWN,
2663 MODEL_TENSOR.FFN_UP,
2664 MODEL_TENSOR.FFN_GATE_INP,
2665 MODEL_TENSOR.FFN_GATE_EXP,
2666 MODEL_TENSOR.FFN_DOWN_EXP,
2667 MODEL_TENSOR.FFN_UP_EXP,
2668 MODEL_TENSOR.FFN_GATE_SHEXP,
2669 MODEL_TENSOR.FFN_DOWN_SHEXP,
2670 MODEL_TENSOR.FFN_UP_SHEXP,
2671 MODEL_TENSOR.FFN_EXP_PROBS_B,
2672 # NextN/MTP tensors - preserved but unused
2673 MODEL_TENSOR.NEXTN_EH_PROJ,
2674 MODEL_TENSOR.NEXTN_EMBED_TOKENS,
2675 MODEL_TENSOR.NEXTN_ENORM,
2676 MODEL_TENSOR.NEXTN_HNORM,
2677 MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD,
2678 MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM,
2679 ],
2680 MODEL_ARCH.BITNET: [
2681 MODEL_TENSOR.ATTN_Q,
2682 MODEL_TENSOR.ATTN_K,
2683 MODEL_TENSOR.ATTN_V,
2684 MODEL_TENSOR.TOKEN_EMBD,
2685 MODEL_TENSOR.OUTPUT_NORM,
2686 MODEL_TENSOR.ATTN_NORM,
2687 MODEL_TENSOR.ATTN_OUT,
2688 MODEL_TENSOR.FFN_NORM,
2689 MODEL_TENSOR.FFN_GATE,
2690 MODEL_TENSOR.FFN_DOWN,
2691 MODEL_TENSOR.FFN_UP,
2692 MODEL_TENSOR.ATTN_SUB_NORM,
2693 MODEL_TENSOR.FFN_SUB_NORM,
2694 ],
2695 MODEL_ARCH.T5: [
2696 MODEL_TENSOR.TOKEN_EMBD,
2697 MODEL_TENSOR.OUTPUT,
2698 MODEL_TENSOR.DEC_ATTN_NORM,
2699 MODEL_TENSOR.DEC_ATTN_Q,
2700 MODEL_TENSOR.DEC_ATTN_K,
2701 MODEL_TENSOR.DEC_ATTN_V,
2702 MODEL_TENSOR.DEC_ATTN_OUT,
2703 MODEL_TENSOR.DEC_ATTN_REL_B,
2704 MODEL_TENSOR.DEC_CROSS_ATTN_NORM,
2705 MODEL_TENSOR.DEC_CROSS_ATTN_Q,
2706 MODEL_TENSOR.DEC_CROSS_ATTN_K,
2707 MODEL_TENSOR.DEC_CROSS_ATTN_V,
2708 MODEL_TENSOR.DEC_CROSS_ATTN_OUT,
2709 MODEL_TENSOR.DEC_CROSS_ATTN_REL_B,
2710 MODEL_TENSOR.DEC_FFN_NORM,
2711 MODEL_TENSOR.DEC_FFN_GATE,
2712 MODEL_TENSOR.DEC_FFN_DOWN,
2713 MODEL_TENSOR.DEC_FFN_UP,
2714 MODEL_TENSOR.DEC_OUTPUT_NORM,
2715 MODEL_TENSOR.ENC_ATTN_NORM,
2716 MODEL_TENSOR.ENC_ATTN_Q,
2717 MODEL_TENSOR.ENC_ATTN_K,
2718 MODEL_TENSOR.ENC_ATTN_V,
2719 MODEL_TENSOR.ENC_ATTN_OUT,
2720 MODEL_TENSOR.ENC_ATTN_REL_B,
2721 MODEL_TENSOR.ENC_FFN_NORM,
2722 MODEL_TENSOR.ENC_FFN_GATE,
2723 MODEL_TENSOR.ENC_FFN_DOWN,
2724 MODEL_TENSOR.ENC_FFN_UP,
2725 MODEL_TENSOR.ENC_OUTPUT_NORM,
2726 ],
2727 MODEL_ARCH.T5ENCODER: [
2728 MODEL_TENSOR.TOKEN_EMBD,
2729 MODEL_TENSOR.OUTPUT,
2730 MODEL_TENSOR.ENC_ATTN_NORM,
2731 MODEL_TENSOR.ENC_ATTN_Q,
2732 MODEL_TENSOR.ENC_ATTN_K,
2733 MODEL_TENSOR.ENC_ATTN_V,
2734 MODEL_TENSOR.ENC_ATTN_OUT,
2735 MODEL_TENSOR.ENC_ATTN_REL_B,
2736 MODEL_TENSOR.ENC_FFN_NORM,
2737 MODEL_TENSOR.ENC_FFN_GATE,
2738 MODEL_TENSOR.ENC_FFN_DOWN,
2739 MODEL_TENSOR.ENC_FFN_UP,
2740 MODEL_TENSOR.ENC_OUTPUT_NORM,
2741 ],
2742 MODEL_ARCH.JAIS: [
2743 MODEL_TENSOR.TOKEN_EMBD,
2744 MODEL_TENSOR.OUTPUT_NORM,
2745 MODEL_TENSOR.OUTPUT,
2746 MODEL_TENSOR.ATTN_NORM,
2747 MODEL_TENSOR.ATTN_QKV,
2748 MODEL_TENSOR.ATTN_OUT,
2749 MODEL_TENSOR.FFN_NORM,
2750 MODEL_TENSOR.FFN_DOWN,
2751 MODEL_TENSOR.FFN_GATE,
2752 MODEL_TENSOR.FFN_UP,
2753 ],
2754 MODEL_ARCH.NEMOTRON: [
2755 MODEL_TENSOR.TOKEN_EMBD,
2756 MODEL_TENSOR.OUTPUT_NORM,
2757 MODEL_TENSOR.OUTPUT,
2758 MODEL_TENSOR.ROPE_FREQS,
2759 MODEL_TENSOR.ATTN_NORM,
2760 MODEL_TENSOR.ATTN_Q,
2761 MODEL_TENSOR.ATTN_K,
2762 MODEL_TENSOR.ATTN_V,
2763 MODEL_TENSOR.ATTN_OUT,
2764 MODEL_TENSOR.ATTN_ROT_EMBD,
2765 MODEL_TENSOR.FFN_NORM,
2766 MODEL_TENSOR.FFN_DOWN,
2767 MODEL_TENSOR.FFN_UP,
2768 ],
2769 MODEL_ARCH.NEMOTRON_H: [
2770 MODEL_TENSOR.TOKEN_EMBD,
2771 MODEL_TENSOR.OUTPUT_NORM,
2772 MODEL_TENSOR.OUTPUT,
2773 MODEL_TENSOR.ATTN_NORM,
2774 MODEL_TENSOR.SSM_IN,
2775 MODEL_TENSOR.SSM_CONV1D,
2776 MODEL_TENSOR.SSM_DT,
2777 MODEL_TENSOR.SSM_A,
2778 MODEL_TENSOR.SSM_D,
2779 MODEL_TENSOR.SSM_NORM,
2780 MODEL_TENSOR.SSM_OUT,
2781 MODEL_TENSOR.ATTN_Q,
2782 MODEL_TENSOR.ATTN_K,
2783 MODEL_TENSOR.ATTN_V,
2784 MODEL_TENSOR.ATTN_OUT,
2785 MODEL_TENSOR.FFN_DOWN,
2786 MODEL_TENSOR.FFN_UP,
2787 ],
2788 MODEL_ARCH.NEMOTRON_H_MOE: [
2789 MODEL_TENSOR.TOKEN_EMBD,
2790 MODEL_TENSOR.OUTPUT_NORM,
2791 MODEL_TENSOR.OUTPUT,
2792 MODEL_TENSOR.ATTN_NORM,
2793 MODEL_TENSOR.SSM_IN,
2794 MODEL_TENSOR.SSM_CONV1D,
2795 MODEL_TENSOR.SSM_DT,
2796 MODEL_TENSOR.SSM_A,
2797 MODEL_TENSOR.SSM_D,
2798 MODEL_TENSOR.SSM_NORM,
2799 MODEL_TENSOR.SSM_OUT,
2800 MODEL_TENSOR.ATTN_Q,
2801 MODEL_TENSOR.ATTN_K,
2802 MODEL_TENSOR.ATTN_V,
2803 MODEL_TENSOR.ATTN_OUT,
2804 MODEL_TENSOR.FFN_DOWN,
2805 MODEL_TENSOR.FFN_UP,
2806 # experts
2807 MODEL_TENSOR.FFN_GATE_INP,
2808 MODEL_TENSOR.FFN_UP_EXP,
2809 MODEL_TENSOR.FFN_DOWN_EXP,
2810 # shared expert
2811 MODEL_TENSOR.FFN_DOWN_SHEXP,
2812 MODEL_TENSOR.FFN_UP_SHEXP,
2813 MODEL_TENSOR.FFN_EXP_PROBS_B,
2814 ],
2815 MODEL_ARCH.EXAONE: [
2816 MODEL_TENSOR.TOKEN_EMBD,
2817 MODEL_TENSOR.OUTPUT_NORM,
2818 MODEL_TENSOR.OUTPUT,
2819 MODEL_TENSOR.ROPE_FREQS,
2820 MODEL_TENSOR.ATTN_NORM,
2821 MODEL_TENSOR.ATTN_Q,
2822 MODEL_TENSOR.ATTN_K,
2823 MODEL_TENSOR.ATTN_V,
2824 MODEL_TENSOR.ATTN_OUT,
2825 MODEL_TENSOR.ATTN_ROT_EMBD,
2826 MODEL_TENSOR.FFN_NORM,
2827 MODEL_TENSOR.FFN_GATE,
2828 MODEL_TENSOR.FFN_DOWN,
2829 MODEL_TENSOR.FFN_UP,
2830 ],
2831 MODEL_ARCH.EXAONE4: [
2832 MODEL_TENSOR.TOKEN_EMBD,
2833 MODEL_TENSOR.OUTPUT_NORM,
2834 MODEL_TENSOR.OUTPUT,
2835 MODEL_TENSOR.ROPE_FREQS,
2836 MODEL_TENSOR.ATTN_Q,
2837 MODEL_TENSOR.ATTN_Q_NORM,
2838 MODEL_TENSOR.ATTN_K,
2839 MODEL_TENSOR.ATTN_K_NORM,
2840 MODEL_TENSOR.ATTN_V,
2841 MODEL_TENSOR.ATTN_OUT,
2842 MODEL_TENSOR.ATTN_POST_NORM,
2843 MODEL_TENSOR.FFN_GATE,
2844 MODEL_TENSOR.FFN_DOWN,
2845 MODEL_TENSOR.FFN_UP,
2846 MODEL_TENSOR.FFN_POST_NORM,
2847 ],
2848 MODEL_ARCH.EXAONE_MOE: [
2849 MODEL_TENSOR.TOKEN_EMBD,
2850 MODEL_TENSOR.OUTPUT_NORM,
2851 MODEL_TENSOR.OUTPUT,
2852 MODEL_TENSOR.ROPE_FREQS,
2853 MODEL_TENSOR.ATTN_NORM,
2854 MODEL_TENSOR.ATTN_Q,
2855 MODEL_TENSOR.ATTN_Q_NORM,
2856 MODEL_TENSOR.ATTN_K,
2857 MODEL_TENSOR.ATTN_K_NORM,
2858 MODEL_TENSOR.ATTN_V,
2859 MODEL_TENSOR.ATTN_OUT,
2860 MODEL_TENSOR.FFN_NORM,
2861 MODEL_TENSOR.FFN_GATE,
2862 MODEL_TENSOR.FFN_DOWN,
2863 MODEL_TENSOR.FFN_UP,
2864 MODEL_TENSOR.FFN_GATE_INP,
2865 MODEL_TENSOR.FFN_GATE_EXP,
2866 MODEL_TENSOR.FFN_DOWN_EXP,
2867 MODEL_TENSOR.FFN_UP_EXP,
2868 MODEL_TENSOR.FFN_GATE_SHEXP,
2869 MODEL_TENSOR.FFN_DOWN_SHEXP,
2870 MODEL_TENSOR.FFN_UP_SHEXP,
2871 MODEL_TENSOR.FFN_EXP_PROBS_B,
2872 # NextN/MTP tensors - preserved but unused
2873 MODEL_TENSOR.NEXTN_EH_PROJ,
2874 MODEL_TENSOR.NEXTN_EMBED_TOKENS,
2875 MODEL_TENSOR.NEXTN_ENORM,
2876 MODEL_TENSOR.NEXTN_HNORM,
2877 MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD,
2878 MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM,
2879 ],
2880 MODEL_ARCH.GRANITE: [
2881 MODEL_TENSOR.TOKEN_EMBD,
2882 MODEL_TENSOR.OUTPUT_NORM,
2883 MODEL_TENSOR.OUTPUT,
2884 MODEL_TENSOR.ATTN_NORM,
2885 MODEL_TENSOR.ATTN_Q,
2886 MODEL_TENSOR.ATTN_K,
2887 MODEL_TENSOR.ATTN_V,
2888 MODEL_TENSOR.ATTN_OUT,
2889 MODEL_TENSOR.FFN_NORM,
2890 MODEL_TENSOR.FFN_GATE,
2891 MODEL_TENSOR.FFN_DOWN,
2892 MODEL_TENSOR.FFN_UP,
2893 ],
2894 MODEL_ARCH.GRANITE_MOE: [
2895 MODEL_TENSOR.TOKEN_EMBD,
2896 MODEL_TENSOR.OUTPUT_NORM,
2897 MODEL_TENSOR.OUTPUT,
2898 MODEL_TENSOR.ATTN_NORM,
2899 MODEL_TENSOR.ATTN_Q,
2900 MODEL_TENSOR.ATTN_K,
2901 MODEL_TENSOR.ATTN_V,
2902 MODEL_TENSOR.ATTN_OUT,
2903 MODEL_TENSOR.FFN_NORM,
2904 MODEL_TENSOR.FFN_GATE_INP,
2905 MODEL_TENSOR.FFN_GATE_EXP,
2906 MODEL_TENSOR.FFN_DOWN_EXP,
2907 MODEL_TENSOR.FFN_UP_EXP,
2908 MODEL_TENSOR.FFN_GATE_SHEXP,
2909 MODEL_TENSOR.FFN_UP_SHEXP,
2910 MODEL_TENSOR.FFN_DOWN_SHEXP,
2911 ],
2912 MODEL_ARCH.GRANITE_HYBRID: [
2913 MODEL_TENSOR.TOKEN_EMBD,
2914 MODEL_TENSOR.OUTPUT_NORM,
2915 MODEL_TENSOR.OUTPUT,
2916 MODEL_TENSOR.ATTN_NORM,
2917 MODEL_TENSOR.SSM_IN,
2918 MODEL_TENSOR.SSM_CONV1D,
2919 MODEL_TENSOR.SSM_DT,
2920 MODEL_TENSOR.SSM_A,
2921 MODEL_TENSOR.SSM_D,
2922 MODEL_TENSOR.SSM_NORM,
2923 MODEL_TENSOR.SSM_OUT,
2924 MODEL_TENSOR.ATTN_Q,
2925 MODEL_TENSOR.ATTN_K,
2926 MODEL_TENSOR.ATTN_V,
2927 MODEL_TENSOR.ATTN_OUT,
2928 MODEL_TENSOR.FFN_NORM,
2929 # MoE
2930 MODEL_TENSOR.FFN_GATE_INP,
2931 MODEL_TENSOR.FFN_GATE_EXP,
2932 MODEL_TENSOR.FFN_DOWN_EXP,
2933 MODEL_TENSOR.FFN_UP_EXP,
2934 MODEL_TENSOR.FFN_GATE_SHEXP,
2935 MODEL_TENSOR.FFN_UP_SHEXP,
2936 MODEL_TENSOR.FFN_DOWN_SHEXP,
2937 # Dense
2938 MODEL_TENSOR.FFN_GATE,
2939 MODEL_TENSOR.FFN_DOWN,
2940 MODEL_TENSOR.FFN_UP,
2941 ],
2942 MODEL_ARCH.CHAMELEON: [
2943 MODEL_TENSOR.TOKEN_EMBD,
2944 MODEL_TENSOR.OUTPUT_NORM,
2945 MODEL_TENSOR.OUTPUT,
2946 MODEL_TENSOR.ATTN_NORM,
2947 MODEL_TENSOR.ATTN_Q,
2948 MODEL_TENSOR.ATTN_Q_NORM,
2949 MODEL_TENSOR.ATTN_K,
2950 MODEL_TENSOR.ATTN_K_NORM,
2951 MODEL_TENSOR.ATTN_V,
2952 MODEL_TENSOR.ATTN_OUT,
2953 MODEL_TENSOR.FFN_NORM,
2954 MODEL_TENSOR.FFN_GATE,
2955 MODEL_TENSOR.FFN_DOWN,
2956 MODEL_TENSOR.FFN_UP,
2957 ],
2958 MODEL_ARCH.WAVTOKENIZER_DEC: [
2959 MODEL_TENSOR.TOKEN_EMBD,
2960 MODEL_TENSOR.TOKEN_EMBD_NORM,
2961 MODEL_TENSOR.CONV1D,
2962 MODEL_TENSOR.CONVNEXT_DW,
2963 MODEL_TENSOR.CONVNEXT_NORM,
2964 MODEL_TENSOR.CONVNEXT_PW1,
2965 MODEL_TENSOR.CONVNEXT_PW2,
2966 MODEL_TENSOR.CONVNEXT_GAMMA,
2967 MODEL_TENSOR.OUTPUT,
2968 MODEL_TENSOR.OUTPUT_NORM,
2969 MODEL_TENSOR.POSNET_CONV1,
2970 MODEL_TENSOR.POSNET_CONV2,
2971 MODEL_TENSOR.POSNET_NORM,
2972 MODEL_TENSOR.POSNET_NORM1,
2973 MODEL_TENSOR.POSNET_NORM2,
2974 MODEL_TENSOR.POSNET_ATTN_NORM,
2975 MODEL_TENSOR.POSNET_ATTN_Q,
2976 MODEL_TENSOR.POSNET_ATTN_K,
2977 MODEL_TENSOR.POSNET_ATTN_V,
2978 MODEL_TENSOR.POSNET_ATTN_OUT,
2979 ],
2980 MODEL_ARCH.BAILINGMOE: [
2981 MODEL_TENSOR.TOKEN_EMBD,
2982 MODEL_TENSOR.OUTPUT_NORM,
2983 MODEL_TENSOR.OUTPUT,
2984 MODEL_TENSOR.ROPE_FREQS,
2985 MODEL_TENSOR.ATTN_NORM,
2986 MODEL_TENSOR.ATTN_Q,
2987 MODEL_TENSOR.ATTN_K,
2988 MODEL_TENSOR.ATTN_V,
2989 MODEL_TENSOR.ATTN_OUT,
2990 MODEL_TENSOR.FFN_GATE_INP,
2991 MODEL_TENSOR.FFN_NORM,
2992 MODEL_TENSOR.FFN_GATE_EXP,
2993 MODEL_TENSOR.FFN_DOWN_EXP,
2994 MODEL_TENSOR.FFN_UP_EXP,
2995 MODEL_TENSOR.FFN_GATE_SHEXP,
2996 MODEL_TENSOR.FFN_DOWN_SHEXP,
2997 MODEL_TENSOR.FFN_UP_SHEXP,
2998 ],
2999 MODEL_ARCH.BAILINGMOE2: [
3000 MODEL_TENSOR.TOKEN_EMBD,
3001 MODEL_TENSOR.OUTPUT_NORM,
3002 MODEL_TENSOR.OUTPUT,
3003 MODEL_TENSOR.ATTN_NORM,
3004 MODEL_TENSOR.ATTN_Q_NORM,
3005 MODEL_TENSOR.ATTN_K_NORM,
3006 MODEL_TENSOR.ATTN_QKV,
3007 MODEL_TENSOR.ATTN_OUT,
3008 MODEL_TENSOR.FFN_GATE_INP,
3009 MODEL_TENSOR.FFN_EXP_PROBS_B,
3010 MODEL_TENSOR.FFN_NORM,
3011 MODEL_TENSOR.FFN_GATE,
3012 MODEL_TENSOR.FFN_DOWN,
3013 MODEL_TENSOR.FFN_UP,
3014 MODEL_TENSOR.FFN_GATE_EXP,
3015 MODEL_TENSOR.FFN_DOWN_EXP,
3016 MODEL_TENSOR.FFN_UP_EXP,
3017 MODEL_TENSOR.FFN_GATE_SHEXP,
3018 MODEL_TENSOR.FFN_DOWN_SHEXP,
3019 MODEL_TENSOR.FFN_UP_SHEXP,
3020 MODEL_TENSOR.NEXTN_EH_PROJ,
3021 MODEL_TENSOR.NEXTN_EMBED_TOKENS,
3022 MODEL_TENSOR.NEXTN_ENORM,
3023 MODEL_TENSOR.NEXTN_HNORM,
3024 MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD,
3025 MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM,
3026 MODEL_TENSOR.LAYER_OUT_NORM,
3027 ],
3028 MODEL_ARCH.DOTS1: [
3029 MODEL_TENSOR.TOKEN_EMBD,
3030 MODEL_TENSOR.OUTPUT_NORM,
3031 MODEL_TENSOR.OUTPUT,
3032 MODEL_TENSOR.ATTN_NORM,
3033 MODEL_TENSOR.ATTN_Q,
3034 MODEL_TENSOR.ATTN_Q_NORM,
3035 MODEL_TENSOR.ATTN_K,
3036 MODEL_TENSOR.ATTN_K_NORM,
3037 MODEL_TENSOR.ATTN_V,
3038 MODEL_TENSOR.ATTN_OUT,
3039 MODEL_TENSOR.FFN_EXP_PROBS_B,
3040 MODEL_TENSOR.FFN_NORM,
3041 MODEL_TENSOR.FFN_GATE,
3042 MODEL_TENSOR.FFN_GATE_EXP,
3043 MODEL_TENSOR.FFN_GATE_INP,
3044 MODEL_TENSOR.FFN_GATE_SHEXP,
3045 MODEL_TENSOR.FFN_DOWN,
3046 MODEL_TENSOR.FFN_DOWN_EXP,
3047 MODEL_TENSOR.FFN_DOWN_SHEXP,
3048 MODEL_TENSOR.FFN_UP,
3049 MODEL_TENSOR.FFN_UP_EXP,
3050 MODEL_TENSOR.FFN_UP_SHEXP,
3051 ],
3052 MODEL_ARCH.ARCEE: [
3053 MODEL_TENSOR.TOKEN_EMBD,
3054 MODEL_TENSOR.OUTPUT_NORM,
3055 MODEL_TENSOR.OUTPUT,
3056 MODEL_TENSOR.ROPE_FREQS,
3057 MODEL_TENSOR.ATTN_NORM,
3058 MODEL_TENSOR.ATTN_Q,
3059 MODEL_TENSOR.ATTN_K,
3060 MODEL_TENSOR.ATTN_V,
3061 MODEL_TENSOR.ATTN_OUT,
3062 MODEL_TENSOR.ATTN_ROT_EMBD,
3063 MODEL_TENSOR.FFN_NORM,
3064 MODEL_TENSOR.FFN_DOWN,
3065 MODEL_TENSOR.FFN_UP,
3066 ],
3067 MODEL_ARCH.AFMOE: [
3068 MODEL_TENSOR.TOKEN_EMBD,
3069 MODEL_TENSOR.OUTPUT_NORM,
3070 MODEL_TENSOR.OUTPUT,
3071 MODEL_TENSOR.ATTN_NORM,
3072 MODEL_TENSOR.ATTN_POST_NORM,
3073 MODEL_TENSOR.ATTN_Q,
3074 MODEL_TENSOR.ATTN_K,
3075 MODEL_TENSOR.ATTN_V,
3076 MODEL_TENSOR.ATTN_OUT,
3077 MODEL_TENSOR.ATTN_Q_NORM,
3078 MODEL_TENSOR.ATTN_K_NORM,
3079 MODEL_TENSOR.ATTN_GATE,
3080 MODEL_TENSOR.FFN_GATE,
3081 MODEL_TENSOR.FFN_DOWN,
3082 MODEL_TENSOR.FFN_UP,
3083 MODEL_TENSOR.FFN_GATE_INP,
3084 MODEL_TENSOR.FFN_GATE_EXP,
3085 MODEL_TENSOR.FFN_DOWN_EXP,
3086 MODEL_TENSOR.FFN_UP_EXP,
3087 MODEL_TENSOR.FFN_GATE_SHEXP,
3088 MODEL_TENSOR.FFN_UP_SHEXP,
3089 MODEL_TENSOR.FFN_DOWN_SHEXP,
3090 MODEL_TENSOR.FFN_PRE_NORM,
3091 MODEL_TENSOR.FFN_POST_NORM,
3092 MODEL_TENSOR.FFN_EXP_PROBS_B,
3093 ],
3094 MODEL_ARCH.ERNIE4_5: [
3095 MODEL_TENSOR.TOKEN_EMBD,
3096 MODEL_TENSOR.OUTPUT_NORM,
3097 MODEL_TENSOR.OUTPUT,
3098 MODEL_TENSOR.ATTN_NORM,
3099 MODEL_TENSOR.ATTN_Q,
3100 MODEL_TENSOR.ATTN_K,
3101 MODEL_TENSOR.ATTN_V,
3102 MODEL_TENSOR.ATTN_OUT,
3103 MODEL_TENSOR.FFN_NORM,
3104 MODEL_TENSOR.FFN_GATE,
3105 MODEL_TENSOR.FFN_DOWN,
3106 MODEL_TENSOR.FFN_UP,
3107 ],
3108 MODEL_ARCH.FALCON_H1: [
3109 # Token embedding
3110 MODEL_TENSOR.TOKEN_EMBD,
3111
3112 # Input layernorm
3113 MODEL_TENSOR.ATTN_NORM,
3114
3115 # Attention components
3116 MODEL_TENSOR.ATTN_Q, # Query projection
3117 MODEL_TENSOR.ATTN_K, # Key projection
3118 MODEL_TENSOR.ATTN_V, # Value projection
3119 MODEL_TENSOR.ATTN_OUT, # Output projection
3120
3121 # SSM components (Mamba2 specific)
3122 MODEL_TENSOR.SSM_IN, # Input projection for SSM
3123 MODEL_TENSOR.SSM_CONV1D, # Convolution layer
3124 MODEL_TENSOR.SSM_DT, # Delta time projection
3125 MODEL_TENSOR.SSM_A, # A parameter (log form)
3126 MODEL_TENSOR.SSM_D, # D parameter
3127 MODEL_TENSOR.SSM_NORM, # Normalization in SSM
3128 MODEL_TENSOR.SSM_OUT, # Output projection
3129
3130 # Pre-feedforward layernorm
3131 MODEL_TENSOR.FFN_PRE_NORM,
3132
3133 # Feed-forward network components
3134 MODEL_TENSOR.FFN_GATE, # Gate projection (SwiGLU)
3135 MODEL_TENSOR.FFN_DOWN, # Down projection
3136 MODEL_TENSOR.FFN_UP, # Up projection
3137
3138 # Post-feedforward layernorm
3139 MODEL_TENSOR.OUTPUT_NORM, # Final layer norm
3140 MODEL_TENSOR.OUTPUT, # Output projection (lm_head)
3141 ],
3142 MODEL_ARCH.HUNYUAN_MOE: [
3143 MODEL_TENSOR.TOKEN_EMBD,
3144 MODEL_TENSOR.OUTPUT_NORM,
3145 MODEL_TENSOR.OUTPUT,
3146 MODEL_TENSOR.ROPE_FREQS,
3147 MODEL_TENSOR.ATTN_NORM,
3148 MODEL_TENSOR.ATTN_Q,
3149 MODEL_TENSOR.ATTN_Q_NORM,
3150 MODEL_TENSOR.ATTN_K,
3151 MODEL_TENSOR.ATTN_K_NORM,
3152 MODEL_TENSOR.ATTN_V,
3153 MODEL_TENSOR.ATTN_OUT,
3154 MODEL_TENSOR.FFN_GATE_INP,
3155 MODEL_TENSOR.FFN_NORM,
3156 MODEL_TENSOR.FFN_GATE_EXP,
3157 MODEL_TENSOR.FFN_DOWN_EXP,
3158 MODEL_TENSOR.FFN_UP_EXP,
3159 MODEL_TENSOR.FFN_GATE_SHEXP,
3160 MODEL_TENSOR.FFN_DOWN_SHEXP,
3161 MODEL_TENSOR.FFN_UP_SHEXP,
3162 ],
3163 MODEL_ARCH.HUNYUAN_DENSE: [
3164 MODEL_TENSOR.TOKEN_EMBD,
3165 MODEL_TENSOR.OUTPUT_NORM,
3166 MODEL_TENSOR.OUTPUT,
3167 MODEL_TENSOR.ATTN_NORM,
3168 MODEL_TENSOR.ATTN_Q,
3169 MODEL_TENSOR.ATTN_Q_NORM,
3170 MODEL_TENSOR.ATTN_K,
3171 MODEL_TENSOR.ATTN_K_NORM,
3172 MODEL_TENSOR.ATTN_V,
3173 MODEL_TENSOR.ATTN_OUT,
3174 MODEL_TENSOR.FFN_NORM,
3175 MODEL_TENSOR.FFN_GATE,
3176 MODEL_TENSOR.FFN_DOWN,
3177 MODEL_TENSOR.FFN_UP,
3178 ],
3179 MODEL_ARCH.SMOLLM3: [
3180 MODEL_TENSOR.TOKEN_EMBD,
3181 MODEL_TENSOR.OUTPUT_NORM,
3182 MODEL_TENSOR.OUTPUT,
3183 MODEL_TENSOR.ROPE_FREQS,
3184 MODEL_TENSOR.ATTN_NORM,
3185 MODEL_TENSOR.ATTN_Q,
3186 MODEL_TENSOR.ATTN_K,
3187 MODEL_TENSOR.ATTN_V,
3188 MODEL_TENSOR.ATTN_OUT,
3189 MODEL_TENSOR.ATTN_ROT_EMBD,
3190 MODEL_TENSOR.FFN_NORM,
3191 MODEL_TENSOR.FFN_GATE,
3192 MODEL_TENSOR.FFN_DOWN,
3193 MODEL_TENSOR.FFN_UP,
3194 ],
3195 MODEL_ARCH.GPT_OSS: [
3196 MODEL_TENSOR.TOKEN_EMBD,
3197 MODEL_TENSOR.OUTPUT_NORM,
3198 MODEL_TENSOR.OUTPUT,
3199 MODEL_TENSOR.ATTN_NORM,
3200 MODEL_TENSOR.ATTN_POST_NORM,
3201 MODEL_TENSOR.ATTN_Q,
3202 MODEL_TENSOR.ATTN_K,
3203 MODEL_TENSOR.ATTN_V,
3204 MODEL_TENSOR.ATTN_OUT,
3205 MODEL_TENSOR.ATTN_SINKS,
3206 MODEL_TENSOR.FFN_GATE_INP,
3207 MODEL_TENSOR.FFN_GATE_EXP,
3208 MODEL_TENSOR.FFN_DOWN_EXP,
3209 MODEL_TENSOR.FFN_UP_EXP,
3210 ],
3211 MODEL_ARCH.LFM2: [
3212 MODEL_TENSOR.TOKEN_EMBD,
3213 MODEL_TENSOR.TOKEN_EMBD_NORM,
3214 MODEL_TENSOR.SHORTCONV_CONV,
3215 MODEL_TENSOR.SHORTCONV_INPROJ,
3216 MODEL_TENSOR.SHORTCONV_OUTPROJ,
3217 MODEL_TENSOR.FFN_GATE,
3218 MODEL_TENSOR.FFN_DOWN,
3219 MODEL_TENSOR.FFN_UP,
3220 MODEL_TENSOR.FFN_NORM,
3221 MODEL_TENSOR.ATTN_NORM, # operator_norm
3222 MODEL_TENSOR.ATTN_Q_NORM,
3223 MODEL_TENSOR.ATTN_K_NORM,
3224 MODEL_TENSOR.ATTN_Q,
3225 MODEL_TENSOR.ATTN_K,
3226 MODEL_TENSOR.ATTN_V,
3227 MODEL_TENSOR.ATTN_OUT,
3228 MODEL_TENSOR.OUTPUT,
3229 MODEL_TENSOR.DENSE_2_OUT, # LFM2-ColBert-350M
3230 ],
3231 MODEL_ARCH.LFM2MOE: [
3232 MODEL_TENSOR.TOKEN_EMBD,
3233 MODEL_TENSOR.TOKEN_EMBD_NORM,
3234 MODEL_TENSOR.SHORTCONV_CONV,
3235 MODEL_TENSOR.SHORTCONV_INPROJ,
3236 MODEL_TENSOR.SHORTCONV_OUTPROJ,
3237 MODEL_TENSOR.FFN_GATE,
3238 MODEL_TENSOR.FFN_DOWN,
3239 MODEL_TENSOR.FFN_UP,
3240 MODEL_TENSOR.FFN_NORM,
3241 MODEL_TENSOR.ATTN_NORM, # operator_norm
3242 MODEL_TENSOR.ATTN_Q_NORM,
3243 MODEL_TENSOR.ATTN_K_NORM,
3244 MODEL_TENSOR.ATTN_Q,
3245 MODEL_TENSOR.ATTN_K,
3246 MODEL_TENSOR.ATTN_V,
3247 MODEL_TENSOR.ATTN_OUT,
3248 MODEL_TENSOR.FFN_GATE_INP,
3249 MODEL_TENSOR.FFN_GATE_EXP,
3250 MODEL_TENSOR.FFN_DOWN_EXP,
3251 MODEL_TENSOR.FFN_UP_EXP,
3252 MODEL_TENSOR.FFN_EXP_PROBS_B,
3253 ],
3254 MODEL_ARCH.SMALLTHINKER: [
3255 MODEL_TENSOR.TOKEN_EMBD,
3256 MODEL_TENSOR.OUTPUT_NORM,
3257 MODEL_TENSOR.OUTPUT,
3258 MODEL_TENSOR.ATTN_NORM,
3259 MODEL_TENSOR.ATTN_Q,
3260 MODEL_TENSOR.ATTN_K,
3261 MODEL_TENSOR.ATTN_V,
3262 MODEL_TENSOR.ATTN_OUT,
3263 MODEL_TENSOR.FFN_NORM,
3264 MODEL_TENSOR.FFN_GATE,
3265 MODEL_TENSOR.FFN_DOWN,
3266 MODEL_TENSOR.FFN_UP,
3267 MODEL_TENSOR.FFN_GATE_INP,
3268 MODEL_TENSOR.FFN_GATE_EXP,
3269 MODEL_TENSOR.FFN_DOWN_EXP,
3270 MODEL_TENSOR.FFN_UP_EXP,
3271 ],
3272 MODEL_ARCH.APERTUS: [
3273 MODEL_TENSOR.TOKEN_EMBD,
3274 MODEL_TENSOR.OUTPUT_NORM,
3275 MODEL_TENSOR.OUTPUT,
3276 MODEL_TENSOR.ROPE_FREQS,
3277 MODEL_TENSOR.ATTN_NORM,
3278 MODEL_TENSOR.ATTN_Q,
3279 MODEL_TENSOR.ATTN_K,
3280 MODEL_TENSOR.ATTN_V,
3281 MODEL_TENSOR.ATTN_OUT,
3282 MODEL_TENSOR.ATTN_ROT_EMBD,
3283 MODEL_TENSOR.ATTN_Q_NORM,
3284 MODEL_TENSOR.ATTN_K_NORM,
3285 MODEL_TENSOR.FFN_NORM,
3286 MODEL_TENSOR.FFN_GATE,
3287 MODEL_TENSOR.FFN_DOWN,
3288 MODEL_TENSOR.FFN_UP,
3289 ],
3290 MODEL_ARCH.LLADA_MOE: [
3291 MODEL_TENSOR.TOKEN_EMBD,
3292 MODEL_TENSOR.OUTPUT_NORM,
3293 MODEL_TENSOR.OUTPUT,
3294 MODEL_TENSOR.ATTN_OUT,
3295 MODEL_TENSOR.ATTN_Q,
3296 MODEL_TENSOR.ATTN_K,
3297 MODEL_TENSOR.ATTN_V,
3298 MODEL_TENSOR.ATTN_NORM,
3299 MODEL_TENSOR.ATTN_Q_NORM,
3300 MODEL_TENSOR.ATTN_K_NORM,
3301 MODEL_TENSOR.FFN_NORM,
3302 MODEL_TENSOR.FFN_GATE_INP,
3303 MODEL_TENSOR.FFN_GATE_EXP,
3304 MODEL_TENSOR.FFN_UP_EXP,
3305 MODEL_TENSOR.FFN_DOWN_EXP,
3306 ],
3307 MODEL_ARCH.GROVEMOE: [
3308 MODEL_TENSOR.TOKEN_EMBD,
3309 MODEL_TENSOR.OUTPUT_NORM,
3310 MODEL_TENSOR.OUTPUT,
3311 MODEL_TENSOR.ATTN_NORM,
3312 MODEL_TENSOR.ATTN_Q,
3313 MODEL_TENSOR.ATTN_Q_NORM,
3314 MODEL_TENSOR.ATTN_K,
3315 MODEL_TENSOR.ATTN_K_NORM,
3316 MODEL_TENSOR.ATTN_V,
3317 MODEL_TENSOR.ATTN_OUT,
3318 MODEL_TENSOR.FFN_NORM,
3319 MODEL_TENSOR.FFN_GATE_INP,
3320 MODEL_TENSOR.FFN_GATE_EXP,
3321 MODEL_TENSOR.FFN_DOWN_EXP,
3322 MODEL_TENSOR.FFN_UP_EXP,
3323 MODEL_TENSOR.FFN_GATE_CHEXP,
3324 MODEL_TENSOR.FFN_DOWN_CHEXP,
3325 MODEL_TENSOR.FFN_UP_CHEXP,
3326 ],
3327 MODEL_ARCH.MINIMAXM2: [
3328 MODEL_TENSOR.TOKEN_EMBD,
3329 MODEL_TENSOR.OUTPUT_NORM,
3330 MODEL_TENSOR.OUTPUT,
3331 MODEL_TENSOR.ATTN_NORM,
3332 MODEL_TENSOR.ATTN_Q,
3333 MODEL_TENSOR.ATTN_Q_NORM,
3334 MODEL_TENSOR.ATTN_K,
3335 MODEL_TENSOR.ATTN_K_NORM,
3336 MODEL_TENSOR.ATTN_V,
3337 MODEL_TENSOR.ATTN_OUT,
3338 MODEL_TENSOR.FFN_NORM,
3339 MODEL_TENSOR.FFN_GATE_INP,
3340 MODEL_TENSOR.FFN_GATE_EXP,
3341 MODEL_TENSOR.FFN_DOWN_EXP,
3342 MODEL_TENSOR.FFN_UP_EXP,
3343 MODEL_TENSOR.FFN_EXP_PROBS_B,
3344 ],
3345 MODEL_ARCH.COGVLM: [
3346 MODEL_TENSOR.TOKEN_EMBD,
3347 MODEL_TENSOR.OUTPUT_NORM,
3348 MODEL_TENSOR.OUTPUT,
3349 MODEL_TENSOR.ATTN_NORM,
3350 MODEL_TENSOR.ATTN_QKV,
3351 MODEL_TENSOR.ATTN_OUT,
3352 MODEL_TENSOR.FFN_NORM,
3353 MODEL_TENSOR.FFN_GATE,
3354 MODEL_TENSOR.FFN_DOWN,
3355 MODEL_TENSOR.FFN_UP,
3356 MODEL_TENSOR.VISEXP_ATTN_QKV,
3357 MODEL_TENSOR.VISEXP_ATTN_OUT,
3358 MODEL_TENSOR.VISEXP_GATE,
3359 MODEL_TENSOR.VISEXP_UP,
3360 MODEL_TENSOR.VISEXP_DOWN,
3361 ],
3362 MODEL_ARCH.RND1: [
3363 MODEL_TENSOR.TOKEN_EMBD,
3364 MODEL_TENSOR.OUTPUT_NORM,
3365 MODEL_TENSOR.OUTPUT,
3366 MODEL_TENSOR.ATTN_NORM,
3367 MODEL_TENSOR.ATTN_Q,
3368 MODEL_TENSOR.ATTN_Q_NORM,
3369 MODEL_TENSOR.ATTN_K,
3370 MODEL_TENSOR.ATTN_K_NORM,
3371 MODEL_TENSOR.ATTN_V,
3372 MODEL_TENSOR.ATTN_OUT,
3373 MODEL_TENSOR.FFN_NORM,
3374 MODEL_TENSOR.FFN_GATE_INP,
3375 MODEL_TENSOR.FFN_GATE_EXP,
3376 MODEL_TENSOR.FFN_DOWN_EXP,
3377 MODEL_TENSOR.FFN_UP_EXP,
3378 ],
3379 MODEL_ARCH.PANGU_EMBED: [
3380 MODEL_TENSOR.TOKEN_EMBD,
3381 MODEL_TENSOR.OUTPUT_NORM,
3382 MODEL_TENSOR.OUTPUT,
3383 MODEL_TENSOR.ATTN_NORM,
3384 MODEL_TENSOR.ATTN_Q,
3385 MODEL_TENSOR.ATTN_K,
3386 MODEL_TENSOR.ATTN_V,
3387 MODEL_TENSOR.ATTN_OUT,
3388 MODEL_TENSOR.FFN_NORM,
3389 MODEL_TENSOR.FFN_GATE,
3390 MODEL_TENSOR.FFN_DOWN,
3391 MODEL_TENSOR.FFN_UP,
3392 ],
3393 MODEL_ARCH.MISTRAL3: [
3394 MODEL_TENSOR.TOKEN_EMBD,
3395 MODEL_TENSOR.OUTPUT_NORM,
3396 MODEL_TENSOR.OUTPUT,
3397 MODEL_TENSOR.ROPE_FREQS,
3398 MODEL_TENSOR.ATTN_NORM,
3399 MODEL_TENSOR.ATTN_Q,
3400 MODEL_TENSOR.ATTN_K,
3401 MODEL_TENSOR.ATTN_V,
3402 MODEL_TENSOR.ATTN_OUT,
3403 MODEL_TENSOR.ATTN_ROT_EMBD,
3404 MODEL_TENSOR.FFN_GATE_INP,
3405 MODEL_TENSOR.FFN_NORM,
3406 MODEL_TENSOR.FFN_GATE,
3407 MODEL_TENSOR.FFN_DOWN,
3408 MODEL_TENSOR.FFN_UP,
3409 MODEL_TENSOR.FFN_GATE_EXP,
3410 MODEL_TENSOR.FFN_DOWN_EXP,
3411 MODEL_TENSOR.FFN_UP_EXP,
3412 ],
3413 MODEL_ARCH.MIMO2: [
3414 MODEL_TENSOR.TOKEN_EMBD,
3415 MODEL_TENSOR.OUTPUT_NORM,
3416 MODEL_TENSOR.OUTPUT,
3417 MODEL_TENSOR.ATTN_NORM,
3418 MODEL_TENSOR.ATTN_Q,
3419 MODEL_TENSOR.ATTN_K,
3420 MODEL_TENSOR.ATTN_V,
3421 MODEL_TENSOR.ATTN_SINKS,
3422 MODEL_TENSOR.ATTN_OUT,
3423 MODEL_TENSOR.FFN_NORM,
3424 MODEL_TENSOR.FFN_GATE,
3425 MODEL_TENSOR.FFN_DOWN,
3426 MODEL_TENSOR.FFN_UP,
3427 MODEL_TENSOR.FFN_GATE_INP,
3428 MODEL_TENSOR.FFN_GATE_EXP,
3429 MODEL_TENSOR.FFN_DOWN_EXP,
3430 MODEL_TENSOR.FFN_UP_EXP,
3431 MODEL_TENSOR.FFN_EXP_PROBS_B,
3432 ],
3433 MODEL_ARCH.STEP35: [
3434 MODEL_TENSOR.TOKEN_EMBD,
3435 MODEL_TENSOR.OUTPUT_NORM,
3436 MODEL_TENSOR.OUTPUT,
3437 MODEL_TENSOR.ROPE_FREQS,
3438 MODEL_TENSOR.ATTN_NORM,
3439 MODEL_TENSOR.ATTN_Q,
3440 MODEL_TENSOR.ATTN_Q_NORM,
3441 MODEL_TENSOR.ATTN_K,
3442 MODEL_TENSOR.ATTN_K_NORM,
3443 MODEL_TENSOR.ATTN_V,
3444 MODEL_TENSOR.ATTN_GATE,
3445 MODEL_TENSOR.ATTN_OUT,
3446 MODEL_TENSOR.FFN_NORM,
3447 MODEL_TENSOR.FFN_GATE,
3448 MODEL_TENSOR.FFN_DOWN,
3449 MODEL_TENSOR.FFN_UP,
3450 MODEL_TENSOR.FFN_GATE_INP,
3451 MODEL_TENSOR.FFN_GATE_EXP,
3452 MODEL_TENSOR.FFN_DOWN_EXP,
3453 MODEL_TENSOR.FFN_UP_EXP,
3454 MODEL_TENSOR.FFN_UP_SHEXP,
3455 MODEL_TENSOR.FFN_GATE_SHEXP,
3456 MODEL_TENSOR.FFN_DOWN_SHEXP,
3457 MODEL_TENSOR.FFN_EXP_PROBS_B,
3458 ],
3459 MODEL_ARCH.LLAMA_EMBED: [
3460 MODEL_TENSOR.TOKEN_EMBD,
3461 MODEL_TENSOR.OUTPUT_NORM,
3462 MODEL_TENSOR.OUTPUT,
3463 MODEL_TENSOR.ROPE_FREQS,
3464 MODEL_TENSOR.ATTN_NORM,
3465 MODEL_TENSOR.ATTN_Q,
3466 MODEL_TENSOR.ATTN_K,
3467 MODEL_TENSOR.ATTN_V,
3468 MODEL_TENSOR.ATTN_OUT,
3469 MODEL_TENSOR.ATTN_ROT_EMBD,
3470 MODEL_TENSOR.FFN_GATE_INP,
3471 MODEL_TENSOR.FFN_NORM,
3472 MODEL_TENSOR.FFN_GATE,
3473 MODEL_TENSOR.FFN_DOWN,
3474 MODEL_TENSOR.FFN_UP,
3475 MODEL_TENSOR.FFN_GATE_EXP,
3476 MODEL_TENSOR.FFN_DOWN_EXP,
3477 MODEL_TENSOR.FFN_UP_EXP,
3478 ],
3479 MODEL_ARCH.MAINCODER: [
3480 MODEL_TENSOR.TOKEN_EMBD,
3481 MODEL_TENSOR.OUTPUT_NORM,
3482 MODEL_TENSOR.OUTPUT,
3483 MODEL_TENSOR.ATTN_NORM,
3484 MODEL_TENSOR.ATTN_Q,
3485 MODEL_TENSOR.ATTN_Q_NORM,
3486 MODEL_TENSOR.ATTN_K,
3487 MODEL_TENSOR.ATTN_K_NORM,
3488 MODEL_TENSOR.ATTN_V,
3489 MODEL_TENSOR.ATTN_OUT,
3490 MODEL_TENSOR.FFN_NORM,
3491 MODEL_TENSOR.FFN_GATE,
3492 MODEL_TENSOR.FFN_DOWN,
3493 MODEL_TENSOR.FFN_UP,
3494 ],
3495 MODEL_ARCH.KIMI_LINEAR: [
3496 MODEL_TENSOR.TOKEN_EMBD,
3497 MODEL_TENSOR.OUTPUT_NORM,
3498 MODEL_TENSOR.OUTPUT,
3499 MODEL_TENSOR.ATTN_NORM,
3500 MODEL_TENSOR.ATTN_Q,
3501 MODEL_TENSOR.ATTN_K,
3502 MODEL_TENSOR.ATTN_V,
3503 MODEL_TENSOR.ATTN_OUT,
3504 MODEL_TENSOR.ATTN_Q_A,
3505 MODEL_TENSOR.ATTN_Q_B,
3506 MODEL_TENSOR.ATTN_KV_A_MQA,
3507 MODEL_TENSOR.ATTN_KV_B,
3508 MODEL_TENSOR.ATTN_K_B,
3509 MODEL_TENSOR.ATTN_V_B,
3510 MODEL_TENSOR.ATTN_Q_A_NORM,
3511 MODEL_TENSOR.ATTN_KV_A_NORM,
3512 MODEL_TENSOR.FFN_NORM,
3513 MODEL_TENSOR.FFN_GATE,
3514 MODEL_TENSOR.FFN_DOWN,
3515 MODEL_TENSOR.FFN_UP,
3516 MODEL_TENSOR.FFN_GATE_INP,
3517 MODEL_TENSOR.FFN_GATE_EXP,
3518 MODEL_TENSOR.FFN_DOWN_EXP,
3519 MODEL_TENSOR.FFN_UP_EXP,
3520 MODEL_TENSOR.SSM_CONV1D_Q,
3521 MODEL_TENSOR.SSM_CONV1D_K,
3522 MODEL_TENSOR.SSM_CONV1D_V,
3523 MODEL_TENSOR.SSM_F_A,
3524 MODEL_TENSOR.SSM_F_B,
3525 MODEL_TENSOR.SSM_BETA,
3526 MODEL_TENSOR.SSM_A,
3527 MODEL_TENSOR.SSM_G_A,
3528 MODEL_TENSOR.SSM_G_B,
3529 MODEL_TENSOR.SSM_DT,
3530 MODEL_TENSOR.SSM_NORM,
3531 MODEL_TENSOR.FFN_EXP_PROBS_B,
3532 MODEL_TENSOR.FFN_GATE_SHEXP,
3533 MODEL_TENSOR.FFN_DOWN_SHEXP,
3534 MODEL_TENSOR.FFN_UP_SHEXP,
3535 ],
3536 # TODO
3537}
3538
3539# tensors that will not be serialized
3540MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
3541 MODEL_ARCH.LLAMA: [
3542 MODEL_TENSOR.ROPE_FREQS,
3543 MODEL_TENSOR.ATTN_ROT_EMBD,
3544 ],
3545 MODEL_ARCH.DECI: [
3546 MODEL_TENSOR.ROPE_FREQS,
3547 MODEL_TENSOR.ATTN_ROT_EMBD,
3548 ],
3549 MODEL_ARCH.BAICHUAN: [
3550 MODEL_TENSOR.ROPE_FREQS,
3551 MODEL_TENSOR.ATTN_ROT_EMBD,
3552 ],
3553 MODEL_ARCH.QWEN: [
3554 MODEL_TENSOR.ROPE_FREQS,
3555 MODEL_TENSOR.ATTN_ROT_EMBD,
3556 ],
3557 MODEL_ARCH.CODESHELL: [
3558 MODEL_TENSOR.ROPE_FREQS,
3559 MODEL_TENSOR.ATTN_ROT_EMBD,
3560 ],
3561 MODEL_ARCH.ORION: [
3562 MODEL_TENSOR.ROPE_FREQS,
3563 MODEL_TENSOR.ATTN_ROT_EMBD,
3564 ],
3565 MODEL_ARCH.STARCODER2: [
3566 MODEL_TENSOR.ROPE_FREQS,
3567 MODEL_TENSOR.ATTN_ROT_EMBD,
3568 ],
3569 MODEL_ARCH.XVERSE: [
3570 MODEL_TENSOR.ROPE_FREQS,
3571 MODEL_TENSOR.ATTN_ROT_EMBD,
3572 ],
3573 MODEL_ARCH.DEEPSEEK: [
3574 MODEL_TENSOR.ROPE_FREQS,
3575 MODEL_TENSOR.ATTN_ROT_EMBD,
3576 ],
3577 MODEL_ARCH.DEEPSEEK2: [
3578 MODEL_TENSOR.ROPE_FREQS,
3579 MODEL_TENSOR.ATTN_ROT_EMBD,
3580 ],
3581 MODEL_ARCH.CHATGLM: [
3582 MODEL_TENSOR.ROPE_FREQS,
3583 ],
3584 MODEL_ARCH.NEMOTRON: [
3585 MODEL_TENSOR.ROPE_FREQS,
3586 MODEL_TENSOR.ATTN_ROT_EMBD,
3587 ],
3588 MODEL_ARCH.BAILINGMOE: [
3589 MODEL_TENSOR.ROPE_FREQS,
3590 ],
3591 MODEL_ARCH.PANGU_EMBED: [
3592 MODEL_TENSOR.ROPE_FREQS,
3593 MODEL_TENSOR.ATTN_ROT_EMBD,
3594 ],
3595}
3596
3597#
3598# types
3599#
3600
3601
3602class TokenType(IntEnum):
3603 NORMAL = 1
3604 UNKNOWN = 2
3605 CONTROL = 3
3606 USER_DEFINED = 4
3607 UNUSED = 5
3608 BYTE = 6
3609
3610
3611class RopeScalingType(Enum):
3612 NONE = 'none'
3613 LINEAR = 'linear'
3614 YARN = 'yarn'
3615 LONGROPE = 'longrope'
3616
3617
3618class PoolingType(IntEnum):
3619 NONE = 0
3620 MEAN = 1
3621 CLS = 2
3622 LAST = 3
3623 RANK = 4
3624
3625
3626class GGMLQuantizationType(IntEnum):
3627 F32 = 0
3628 F16 = 1
3629 Q4_0 = 2
3630 Q4_1 = 3
3631 Q5_0 = 6
3632 Q5_1 = 7
3633 Q8_0 = 8
3634 Q8_1 = 9
3635 Q2_K = 10
3636 Q3_K = 11
3637 Q4_K = 12
3638 Q5_K = 13
3639 Q6_K = 14
3640 Q8_K = 15
3641 IQ2_XXS = 16
3642 IQ2_XS = 17
3643 IQ3_XXS = 18
3644 IQ1_S = 19
3645 IQ4_NL = 20
3646 IQ3_S = 21
3647 IQ2_S = 22
3648 IQ4_XS = 23
3649 I8 = 24
3650 I16 = 25
3651 I32 = 26
3652 I64 = 27
3653 F64 = 28
3654 IQ1_M = 29
3655 BF16 = 30
3656 TQ1_0 = 34
3657 TQ2_0 = 35
3658 MXFP4 = 39
3659
3660
3661class ExpertGatingFuncType(IntEnum):
3662 SOFTMAX = 1
3663 SIGMOID = 2
3664
3665
3666# TODO: add GGMLFileType from ggml_ftype in ggml.h
3667
3668
3669# from llama_ftype in llama.h
3670# ALL VALUES SHOULD BE THE SAME HERE AS THEY ARE OVER THERE.
3671class LlamaFileType(IntEnum):
3672 ALL_F32 = 0
3673 MOSTLY_F16 = 1 # except 1d tensors
3674 MOSTLY_Q4_0 = 2 # except 1d tensors
3675 MOSTLY_Q4_1 = 3 # except 1d tensors
3676 # MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
3677 # MOSTLY_Q4_2 = 5 # support has been removed
3678 # MOSTLY_Q4_3 = 6 # support has been removed
3679 MOSTLY_Q8_0 = 7 # except 1d tensors
3680 MOSTLY_Q5_0 = 8 # except 1d tensors
3681 MOSTLY_Q5_1 = 9 # except 1d tensors
3682 MOSTLY_Q2_K = 10 # except 1d tensors
3683 MOSTLY_Q3_K_S = 11 # except 1d tensors
3684 MOSTLY_Q3_K_M = 12 # except 1d tensors
3685 MOSTLY_Q3_K_L = 13 # except 1d tensors
3686 MOSTLY_Q4_K_S = 14 # except 1d tensors
3687 MOSTLY_Q4_K_M = 15 # except 1d tensors
3688 MOSTLY_Q5_K_S = 16 # except 1d tensors
3689 MOSTLY_Q5_K_M = 17 # except 1d tensors
3690 MOSTLY_Q6_K = 18 # except 1d tensors
3691 MOSTLY_IQ2_XXS = 19 # except 1d tensors
3692 MOSTLY_IQ2_XS = 20 # except 1d tensors
3693 MOSTLY_Q2_K_S = 21 # except 1d tensors
3694 MOSTLY_IQ3_XS = 22 # except 1d tensors
3695 MOSTLY_IQ3_XXS = 23 # except 1d tensors
3696 MOSTLY_IQ1_S = 24 # except 1d tensors
3697 MOSTLY_IQ4_NL = 25 # except 1d tensors
3698 MOSTLY_IQ3_S = 26 # except 1d tensors
3699 MOSTLY_IQ3_M = 27 # except 1d tensors
3700 MOSTLY_IQ2_S = 28 # except 1d tensors
3701 MOSTLY_IQ2_M = 29 # except 1d tensors
3702 MOSTLY_IQ4_XS = 30 # except 1d tensors
3703 MOSTLY_IQ1_M = 31 # except 1d tensors
3704 MOSTLY_BF16 = 32 # except 1d tensors
3705 # MOSTLY_Q4_0_4_4 = 33 # removed from gguf files, use Q4_0 and runtime repack
3706 # MOSTLY_Q4_0_4_8 = 34 # removed from gguf files, use Q4_0 and runtime repack
3707 # MOSTLY_Q4_0_8_8 = 35 # removed from gguf files, use Q4_0 and runtime repack
3708 MOSTLY_TQ1_0 = 36 # except 1d tensors
3709 MOSTLY_TQ2_0 = 37 # except 1d tensors
3710
3711 GUESSED = 1024 # not specified in the model file
3712
3713
3714class GGUFEndian(IntEnum):
3715 LITTLE = 0
3716 BIG = 1
3717
3718
3719class GGUFValueType(IntEnum):
3720 UINT8 = 0
3721 INT8 = 1
3722 UINT16 = 2
3723 INT16 = 3
3724 UINT32 = 4
3725 INT32 = 5
3726 FLOAT32 = 6
3727 BOOL = 7
3728 STRING = 8
3729 ARRAY = 9
3730 UINT64 = 10
3731 INT64 = 11
3732 FLOAT64 = 12
3733
3734 @staticmethod
3735 def get_type(val: Any) -> GGUFValueType:
3736 if isinstance(val, (str, bytes, bytearray)):
3737 return GGUFValueType.STRING
3738 elif isinstance(val, list):
3739 return GGUFValueType.ARRAY
3740 elif isinstance(val, float):
3741 return GGUFValueType.FLOAT32
3742 elif isinstance(val, bool):
3743 return GGUFValueType.BOOL
3744 elif isinstance(val, int):
3745 return GGUFValueType.INT32
3746 # TODO: need help with 64-bit types in Python
3747 else:
3748 raise ValueError(f"Unknown type: {type(val)}")
3749
3750
3751class VisionProjectorType:
3752 GEMMA3 = "gemma3"
3753 GEMMA3NV = "gemma3nv"
3754 GEMMA3NA = "gemma3na"
3755 IDEFICS3 = "idefics3"
3756 PIXTRAL = "pixtral"
3757 LLAMA4 = "llama4"
3758 QWEN2VL = "qwen2vl_merger"
3759 QWEN25VL = "qwen2.5vl_merger"
3760 QWEN3VL = "qwen3vl_merger"
3761 ULTRAVOX = "ultravox"
3762 INTERNVL = "internvl"
3763 QWEN2A = "qwen2a" # audio
3764 GLMA = "glma" # audio
3765 QWEN25O = "qwen2.5o" # omni
3766 VOXTRAL = "voxtral"
3767 LFM2 = "lfm2"
3768 KIMIVL = "kimivl"
3769 KIMIK25 = "kimik25"
3770 LIGHTONOCR = "lightonocr"
3771 COGVLM = "cogvlm"
3772 JANUS_PRO = "janus_pro"
3773 LFM2A = "lfm2a" # audio
3774 MUSIC_FLAMINGO = "musicflamingo" # audio
3775 GLM4V = "glm4v"
3776 YOUTUVL = "youtuvl"
3777
3778
3779# Items here are (block size, type size)
3780QK_K = 256
3781GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
3782 GGMLQuantizationType.F32: (1, 4),
3783 GGMLQuantizationType.F16: (1, 2),
3784 GGMLQuantizationType.Q4_0: (32, 2 + 16),
3785 GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
3786 GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
3787 GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
3788 GGMLQuantizationType.Q8_0: (32, 2 + 32),
3789 GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
3790 GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
3791 GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
3792 GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
3793 GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
3794 GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
3795 GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
3796 GGMLQuantizationType.IQ2_XXS: (256, 2 + QK_K // 4),
3797 GGMLQuantizationType.IQ2_XS: (256, 2 + QK_K // 4 + QK_K // 32),
3798 GGMLQuantizationType.IQ3_XXS: (256, 2 + QK_K // 4 + QK_K // 8),
3799 GGMLQuantizationType.IQ1_S: (256, 2 + QK_K // 8 + QK_K // 16),
3800 GGMLQuantizationType.IQ4_NL: (32, 2 + 16),
3801 GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
3802 GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
3803 GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
3804 GGMLQuantizationType.I8: (1, 1),
3805 GGMLQuantizationType.I16: (1, 2),
3806 GGMLQuantizationType.I32: (1, 4),
3807 GGMLQuantizationType.I64: (1, 8),
3808 GGMLQuantizationType.F64: (1, 8),
3809 GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
3810 GGMLQuantizationType.BF16: (1, 2),
3811 GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
3812 GGMLQuantizationType.TQ2_0: (256, 2 + 64),
3813 GGMLQuantizationType.MXFP4: (32, 1 + 16),
3814}
3815
3816
3817# Aliases for backward compatibility.
3818
3819# general
3820KEY_GENERAL_ARCHITECTURE = Keys.General.ARCHITECTURE
3821KEY_GENERAL_QUANTIZATION_VERSION = Keys.General.QUANTIZATION_VERSION
3822KEY_GENERAL_ALIGNMENT = Keys.General.ALIGNMENT
3823KEY_GENERAL_NAME = Keys.General.NAME
3824KEY_GENERAL_AUTHOR = Keys.General.AUTHOR
3825KEY_GENERAL_URL = Keys.General.URL
3826KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION
3827KEY_GENERAL_LICENSE = Keys.General.LICENSE
3828KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL
3829KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
3830
3831# LLM
3832KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE
3833KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH
3834KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH
3835KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT
3836KEY_FEED_FORWARD_LENGTH = Keys.LLM.FEED_FORWARD_LENGTH
3837KEY_USE_PARALLEL_RESIDUAL = Keys.LLM.USE_PARALLEL_RESIDUAL
3838KEY_TENSOR_DATA_LAYOUT = Keys.LLM.TENSOR_DATA_LAYOUT
3839
3840# attention
3841KEY_ATTENTION_HEAD_COUNT = Keys.Attention.HEAD_COUNT
3842KEY_ATTENTION_HEAD_COUNT_KV = Keys.Attention.HEAD_COUNT_KV
3843KEY_ATTENTION_MAX_ALIBI_BIAS = Keys.Attention.MAX_ALIBI_BIAS
3844KEY_ATTENTION_CLAMP_KQV = Keys.Attention.CLAMP_KQV
3845KEY_ATTENTION_LAYERNORM_EPS = Keys.Attention.LAYERNORM_EPS
3846KEY_ATTENTION_LAYERNORM_RMS_EPS = Keys.Attention.LAYERNORM_RMS_EPS
3847
3848# RoPE
3849KEY_ROPE_DIMENSION_COUNT = Keys.Rope.DIMENSION_COUNT
3850KEY_ROPE_FREQ_BASE = Keys.Rope.FREQ_BASE
3851KEY_ROPE_SCALING_TYPE = Keys.Rope.SCALING_TYPE
3852KEY_ROPE_SCALING_FACTOR = Keys.Rope.SCALING_FACTOR
3853KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN
3854KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED
3855
3856# SSM
3857KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
3858KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
3859KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
3860KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
3861KEY_SSM_GROUP_COUNT = Keys.SSM.GROUP_COUNT
3862KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
3863
3864# KDA
3865KEY_KDA_HEAD_DIM = Keys.KDA.HEAD_DIM
3866
3867# tokenization
3868KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
3869KEY_TOKENIZER_PRE = Keys.Tokenizer.PRE
3870KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
3871KEY_TOKENIZER_TOKEN_TYPE = Keys.Tokenizer.TOKEN_TYPE
3872KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
3873KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
3874KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
3875KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
3876KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
3877KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
3878KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
3879KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
3880KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
3881KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
3882KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
3883KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
3884
3885KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
3886KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
3887KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
3888KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
3889KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
3890KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
3891
3892# deprecated
3893KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
3894KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
3895KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID