summaryrefslogtreecommitdiff
path: root/llama.cpp/src/models/models.h
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/src/models/models.h
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/src/models/models.h')
-rw-r--r--llama.cpp/src/models/models.h723
1 files changed, 723 insertions, 0 deletions
diff --git a/llama.cpp/src/models/models.h b/llama.cpp/src/models/models.h
new file mode 100644
index 0000000..3c66d32
--- /dev/null
+++ b/llama.cpp/src/models/models.h
@@ -0,0 +1,723 @@
1#pragma once
2
3#include "../llama-model.h"
4#include "../llama-graph.h"
5
6// TODO: remove in follow-up PR - move to .cpp files
7#include "../llama-memory-recurrent.h"
8#include <cmath>
9
10struct llm_graph_context_mamba : public llm_graph_context {
11 llm_graph_context_mamba(const llm_graph_params & params);
12
13 virtual ~llm_graph_context_mamba() = default;
14
15 ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il);
16 ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il) const;
17
18};
19
20// Base class for RWKV-related models
21struct llm_build_rwkv6_base : public llm_graph_context {
22 const llama_model & model;
23
24 llm_build_rwkv6_base(const llama_model & model, const llm_graph_params & params);
25
26 virtual ~llm_build_rwkv6_base() = default;
27
28 ggml_tensor * build_rwkv6_channel_mix(const llama_layer * layer,
29 ggml_tensor * cur,
30 ggml_tensor * x_prev,
31 llm_arch arch) const;
32
33 ggml_tensor * build_rwkv6_time_mix(llm_graph_input_rs * inp,
34 ggml_tensor * cur,
35 ggml_tensor * x_prev,
36 const llama_ubatch & ubatch,
37 int il) const;
38};
39
40// Base class for RWKV7-related models
41struct llm_build_rwkv7_base : public llm_graph_context {
42 const llama_model & model;
43
44 llm_build_rwkv7_base(const llama_model & model, const llm_graph_params & params);
45
46 virtual ~llm_build_rwkv7_base() = default;
47
48 // RWKV7-specific graph building methods
49 ggml_tensor * build_rwkv7_channel_mix(const llama_layer * layer,
50 ggml_tensor * cur,
51 ggml_tensor * x_prev,
52 llm_arch arch) const;
53 ggml_tensor * build_rwkv7_time_mix(llm_graph_input_rs * inp,
54 ggml_tensor * cur,
55 ggml_tensor * x_prev,
56 ggml_tensor *& first_layer_value,
57 const llama_ubatch & ubatch,
58 int il) const;
59};
60
61struct llm_build_afmoe : public llm_graph_context {
62 llm_build_afmoe(const llama_model & model, const llm_graph_params & params);
63};
64
65struct llm_build_apertus : public llm_graph_context {
66 llm_build_apertus(const llama_model & model, const llm_graph_params & params);
67};
68
69struct llm_build_arcee : public llm_graph_context {
70 llm_build_arcee(const llama_model & model, const llm_graph_params & params);
71};
72
73struct llm_build_arctic : public llm_graph_context {
74 llm_build_arctic(const llama_model & model, const llm_graph_params & params);
75};
76
77struct llm_build_arwkv7 : public llm_build_rwkv7_base {
78 llm_build_arwkv7(const llama_model & model, const llm_graph_params & params);
79};
80
81struct llm_build_baichuan : public llm_graph_context {
82 llm_build_baichuan(const llama_model & model, const llm_graph_params & params);
83};
84
85struct llm_build_bailingmoe2 : public llm_graph_context {
86 llm_build_bailingmoe2(const llama_model & model, const llm_graph_params & params);
87};
88
89struct llm_build_bailingmoe : public llm_graph_context {
90 llm_build_bailingmoe(const llama_model & model, const llm_graph_params & params);
91};
92
93struct llm_build_bert : public llm_graph_context {
94 llm_build_bert(const llama_model & model, const llm_graph_params & params);
95};
96
97struct llm_build_bitnet : public llm_graph_context {
98 llm_build_bitnet(const llama_model & model, const llm_graph_params & params);
99};
100
101struct llm_build_bloom : public llm_graph_context {
102 llm_build_bloom(const llama_model & model, const llm_graph_params & params);
103};
104
105struct llm_build_chameleon : public llm_graph_context {
106 llm_build_chameleon(const llama_model & model, const llm_graph_params & params);
107};
108
109struct llm_build_chatglm : public llm_graph_context {
110 llm_build_chatglm(const llama_model & model, const llm_graph_params & params);
111};
112
113struct llm_build_codeshell : public llm_graph_context {
114 llm_build_codeshell(const llama_model & model, const llm_graph_params & params);
115};
116
117struct llm_build_cogvlm : public llm_graph_context {
118 llm_build_cogvlm(const llama_model & model, const llm_graph_params & params);
119};
120
121struct llm_build_cohere2_iswa : public llm_graph_context {
122 llm_build_cohere2_iswa(const llama_model & model, const llm_graph_params & params);
123};
124
125struct llm_build_command_r : public llm_graph_context {
126 llm_build_command_r(const llama_model & model, const llm_graph_params & params);
127};
128
129struct llm_build_dbrx : public llm_graph_context {
130 llm_build_dbrx(const llama_model & model, const llm_graph_params & params);
131};
132
133struct llm_build_deci : public llm_graph_context {
134 llm_build_deci(const llama_model & model, const llm_graph_params & params);
135};
136
137struct llm_build_deepseek2 : public llm_graph_context {
138 llm_build_deepseek2(const llama_model & model, const llm_graph_params & params);
139};
140
141struct llm_build_deepseek : public llm_graph_context {
142 llm_build_deepseek(const llama_model & model, const llm_graph_params & params);
143};
144
145struct llm_build_dots1 : public llm_graph_context {
146 llm_build_dots1(const llama_model & model, const llm_graph_params & params);
147};
148
149struct llm_build_dream : public llm_graph_context {
150 llm_build_dream(const llama_model & model, const llm_graph_params & params);
151};
152
153struct llm_build_ernie4_5 : public llm_graph_context {
154 llm_build_ernie4_5(const llama_model & model, const llm_graph_params & params);
155};
156
157struct llm_build_ernie4_5_moe : public llm_graph_context {
158 llm_build_ernie4_5_moe(const llama_model & model, const llm_graph_params & params);
159};
160
161template <bool iswa>
162struct llm_build_exaone4 : public llm_graph_context {
163 llm_build_exaone4(const llama_model & model, const llm_graph_params & params);
164};
165
166struct llm_build_exaone : public llm_graph_context {
167 llm_build_exaone(const llama_model & model, const llm_graph_params & params);
168};
169
170struct llm_build_exaone_moe : public llm_graph_context {
171 llm_build_exaone_moe(const llama_model & model, const llm_graph_params & params);
172};
173
174struct llm_build_falcon : public llm_graph_context {
175 llm_build_falcon(const llama_model & model, const llm_graph_params & params);
176};
177
178struct llm_build_falcon_h1 : public llm_graph_context_mamba {
179 llm_build_falcon_h1(const llama_model & model, const llm_graph_params & params);
180};
181
182struct llm_build_gemma2_iswa : public llm_graph_context {
183 llm_build_gemma2_iswa(const llama_model & model, const llm_graph_params & params);
184};
185
186template <bool iswa>
187struct llm_build_gemma3 : public llm_graph_context {
188 llm_build_gemma3(const llama_model & model, const llm_graph_params & params);
189};
190
191struct llm_build_gemma3n_iswa : public llm_graph_context {
192 const llama_model & model;
193
194 const int64_t n_embd_head;
195 const int64_t n_embd_altup;
196 const int64_t n_altup;
197 const int i_altup_act;
198 const int n_layer_sparsity = 10; // number of layers using activation sparsity
199 const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95)
200
201 llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params);
202 ggml_tensor * calc_magnitude(ggml_tensor * x);
203 ggml_tensor * view_2d_slice(ggml_tensor * x, int idx);
204 ggml_tensor * get_per_layer_inputs();
205 ggml_tensor * project_per_layer_inputs(ggml_tensor * inputs_embeds, ggml_tensor * inp_per_layer);
206 ggml_tensor * gaussian_topk(ggml_tensor * x);
207 ggml_tensor * altup_compute_router_modalities(ggml_tensor * x, int il);
208 ggml_tensor * altup_predict(ggml_tensor * cur, int il);
209 ggml_tensor * laurel(ggml_tensor * cur, int il);
210 ggml_tensor * altup_correct(ggml_tensor * predictions, ggml_tensor * activated, int il);
211};
212
213struct llm_build_gemma_embedding : public llm_graph_context {
214 llm_build_gemma_embedding(const llama_model & model, const llm_graph_params & params);
215};
216
217struct llm_build_gemma : public llm_graph_context {
218 llm_build_gemma(const llama_model & model, const llm_graph_params & params);
219};
220
221struct llm_build_glm4 : public llm_graph_context {
222 llm_build_glm4(const llama_model & model, const llm_graph_params & params);
223};
224
225struct llm_build_glm4_moe : public llm_graph_context {
226 llm_build_glm4_moe(const llama_model & model, const llm_graph_params & params);
227};
228
229struct llm_build_gpt2 : public llm_graph_context {
230 llm_build_gpt2(const llama_model & model, const llm_graph_params & params);
231};
232
233struct llm_build_gptneox : public llm_graph_context {
234 llm_build_gptneox(const llama_model & model, const llm_graph_params & params);
235};
236
237struct llm_build_granite : public llm_graph_context {
238 llm_build_granite(const llama_model & model, const llm_graph_params & params);
239
240private:
241 ggml_tensor * build_attention_layer(
242 ggml_tensor * cur,
243 ggml_tensor * inp_pos,
244 llm_graph_input_attn_kv * inp_attn,
245 const llama_model & model,
246 const int64_t n_embd_head,
247 const int il);
248
249 ggml_tensor * build_layer_ffn(
250 ggml_tensor * cur,
251 ggml_tensor * inpSA,
252 const llama_model & model,
253 const int il);
254};
255
256struct llm_build_granite_hybrid : public llm_graph_context_mamba {
257 llm_build_granite_hybrid(const llama_model & model, const llm_graph_params & params);
258 ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il);
259 ggml_tensor * build_attention_layer(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn,
260 const llama_model & model,const int64_t n_embd_head, const int il);
261};
262
263struct llm_build_grok : public llm_graph_context {
264 llm_build_grok(const llama_model & model, const llm_graph_params & params);
265};
266
267struct llm_build_grovemoe : public llm_graph_context {
268 llm_build_grovemoe(const llama_model & model, const llm_graph_params & params);
269};
270
271struct llm_build_hunyuan_dense : public llm_graph_context {
272 llm_build_hunyuan_dense(const llama_model & model, const llm_graph_params & params);
273};
274
275struct llm_build_hunyuan_moe : public llm_graph_context {
276 llm_build_hunyuan_moe(const llama_model & model, const llm_graph_params & params);
277};
278
279struct llm_build_internlm2 : public llm_graph_context {
280 llm_build_internlm2(const llama_model & model, const llm_graph_params & params);
281};
282
283struct llm_build_jais : public llm_graph_context {
284 llm_build_jais(const llama_model & model, const llm_graph_params & params);
285};
286
287struct llm_build_jamba : public llm_graph_context_mamba {
288 llm_build_jamba(const llama_model & model, const llm_graph_params & params);
289};
290
291struct llm_build_kimi_linear : public llm_graph_context_mamba {
292 llm_build_kimi_linear(const llama_model & model, const llm_graph_params & params);
293
294 std::pair<ggml_tensor *, ggml_tensor *> build_kda_autoregressive(
295 ggml_tensor * q,
296 ggml_tensor * k,
297 ggml_tensor * v,
298 ggml_tensor * gk,
299 ggml_tensor * beta,
300 ggml_tensor * state,
301 int il);
302
303 std::pair<ggml_tensor *, ggml_tensor *> build_kda_chunking(
304 ggml_tensor * q,
305 ggml_tensor * k,
306 ggml_tensor * v,
307 ggml_tensor * gk,
308 ggml_tensor * beta,
309 ggml_tensor * state,
310 ggml_tensor * causal_mask,
311 ggml_tensor * identity,
312 ggml_tensor * diag_mask,
313 int il);
314
315 const llama_model & model;
316};
317
318struct llm_build_lfm2 : public llm_graph_context {
319 const llama_model & model;
320
321 llm_build_lfm2(const llama_model & model, const llm_graph_params & params);
322 ggml_tensor * build_moe_feed_forward(ggml_tensor * cur, int il) const;
323 ggml_tensor * build_dense_feed_forward(ggml_tensor * cur, int il) const;
324 ggml_tensor * build_attn_block(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn, int il) const;
325 ggml_tensor * build_shortconv_block(ggml_tensor * cur, llm_graph_input_rs * inp_recr, int il);
326
327};
328
329struct llm_build_llada : public llm_graph_context {
330 llm_build_llada(const llama_model & model, const llm_graph_params & params);
331};
332
333struct llm_build_llada_moe : public llm_graph_context {
334 llm_build_llada_moe(const llama_model & model, const llm_graph_params & params);
335};
336
337template <bool embed>
338struct llm_build_llama : public llm_graph_context {
339 llm_build_llama(const llama_model & model, const llm_graph_params & params);
340};
341
342struct llm_build_llama_iswa : public llm_graph_context {
343 llm_build_llama_iswa(const llama_model & model, const llm_graph_params & params);
344};
345
346struct llm_build_maincoder : public llm_graph_context {
347 llm_build_maincoder(const llama_model & model, const llm_graph_params & params);
348};
349
350struct llm_build_mamba : public llm_graph_context_mamba {
351 llm_build_mamba(const llama_model & model, const llm_graph_params & params);
352};
353
354struct llm_build_mimo2_iswa : public llm_graph_context {
355 llm_build_mimo2_iswa(const llama_model & model, const llm_graph_params & params);
356};
357
358struct llm_build_minicpm3 : public llm_graph_context {
359 llm_build_minicpm3(const llama_model & model, const llm_graph_params & params);
360};
361
362struct llm_build_minimax_m2 : public llm_graph_context {
363 llm_build_minimax_m2(const llama_model & model, const llm_graph_params & params);
364};
365
366struct llm_build_mistral3 : public llm_graph_context {
367 llm_build_mistral3(const llama_model & model, const llm_graph_params & params);
368};
369
370struct llm_build_modern_bert : public llm_graph_context {
371 llm_build_modern_bert(const llama_model & model, const llm_graph_params & params);
372};
373
374struct llm_build_mpt : public llm_graph_context {
375 llm_build_mpt(const llama_model & model, const llm_graph_params & params);
376};
377
378struct llm_build_nemotron : public llm_graph_context {
379 llm_build_nemotron(const llama_model & model, const llm_graph_params & params);
380};
381
382struct llm_build_nemotron_h : public llm_graph_context_mamba {
383 llm_build_nemotron_h(const llama_model & model, const llm_graph_params & params);
384 ggml_tensor * build_ffn_layer(ggml_tensor * cur, const llama_model & model, const int il);
385 ggml_tensor * build_attention_layer(ggml_tensor * cur, llm_graph_input_attn_kv * inp_attn,
386 const llama_model & model, const int64_t n_embd_head, const int il);
387};
388
389struct llm_build_neo_bert : public llm_graph_context {
390 llm_build_neo_bert(const llama_model & model, const llm_graph_params & params);
391};
392
393template <bool iswa>
394struct llm_build_olmo2 : public llm_graph_context {
395 llm_build_olmo2(const llama_model & model, const llm_graph_params & params);
396};
397
398struct llm_build_olmoe : public llm_graph_context {
399 llm_build_olmoe(const llama_model & model, const llm_graph_params & params);
400};
401
402struct llm_build_olmo : public llm_graph_context {
403 llm_build_olmo(const llama_model & model, const llm_graph_params & params);
404};
405
406struct llm_build_openai_moe_iswa : public llm_graph_context {
407 llm_build_openai_moe_iswa(const llama_model & model, const llm_graph_params & params);
408};
409
410struct llm_build_openelm : public llm_graph_context {
411 llm_build_openelm(const llama_model & model, const llm_graph_params & params);
412};
413
414struct llm_build_orion : public llm_graph_context {
415 llm_build_orion(const llama_model & model, const llm_graph_params & params);
416};
417
418struct llm_build_pangu_embedded : public llm_graph_context {
419 llm_build_pangu_embedded(const llama_model & model, const llm_graph_params & params);
420};
421
422struct llm_build_phi2 : public llm_graph_context {
423 llm_build_phi2(const llama_model & model, const llm_graph_params & params);
424};
425
426template<bool iswa>
427struct llm_build_phi3 : public llm_graph_context {
428 llm_build_phi3(const llama_model & model, const llm_graph_params & params);
429};
430
431struct llm_build_plamo2 : public llm_graph_context_mamba {
432 llm_build_plamo2(const llama_model & model, const llm_graph_params & params);
433 private:
434 ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il);
435 ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp, ggml_tensor * inp_pos, ggml_tensor * cur,
436 const llama_model & model, int il);
437};
438
439struct llm_build_plamo : public llm_graph_context {
440 llm_build_plamo(const llama_model & model, const llm_graph_params & params);
441};
442
443template <bool iswa>
444struct llm_build_plamo3 : public llm_graph_context {
445 llm_build_plamo3(const llama_model & model, const llm_graph_params & params);
446};
447
448struct llm_build_plm : public llm_graph_context {
449 llm_build_plm(const llama_model & model, const llm_graph_params & params);
450};
451
452struct llm_build_qwen2 : public llm_graph_context {
453 llm_build_qwen2(const llama_model & model, const llm_graph_params & params);
454};
455
456struct llm_build_qwen2moe : public llm_graph_context {
457 llm_build_qwen2moe(const llama_model & model, const llm_graph_params & params);
458};
459
460struct llm_build_qwen2vl : public llm_graph_context {
461 llm_build_qwen2vl(const llama_model & model, const llm_graph_params & params);
462};
463
464struct llm_build_qwen3 : public llm_graph_context {
465 llm_build_qwen3(const llama_model & model, const llm_graph_params & params);
466};
467
468struct llm_build_qwen3moe : public llm_graph_context {
469 llm_build_qwen3moe(const llama_model & model, const llm_graph_params & params);
470};
471
472struct llm_build_qwen3vl : public llm_graph_context {
473 llm_build_qwen3vl(const llama_model & model, const llm_graph_params & params);
474};
475
476struct llm_build_qwen3vlmoe : public llm_graph_context {
477 llm_build_qwen3vlmoe(const llama_model & model, const llm_graph_params & params);
478};
479
480struct llm_build_qwen3next : public llm_graph_context_mamba {
481 llm_build_qwen3next(const llama_model & model, const llm_graph_params & params);
482private:
483 ggml_tensor * build_layer_attn(
484 llm_graph_input_attn_kv * inp_attn,
485 ggml_tensor * cur,
486 ggml_tensor * inp_pos,
487 int il);
488
489 ggml_tensor * build_layer_attn_linear(
490 llm_graph_input_rs * inp,
491 ggml_tensor * cur,
492 ggml_tensor * causal_mask,
493 ggml_tensor * identity,
494 ggml_tensor * diag_mask,
495 int il);
496
497 ggml_tensor * build_layer_ffn(
498 ggml_tensor * cur,
499 int il);
500
501 // returns pair of output and new state
502 std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_chunking(
503 ggml_tensor * q,
504 ggml_tensor * k,
505 ggml_tensor * v,
506 ggml_tensor * g,
507 ggml_tensor * beta,
508 ggml_tensor * state,
509 ggml_tensor * causal_mask,
510 ggml_tensor * identity,
511 ggml_tensor * diag_mask,
512 int il);
513
514 // returns pair of output and new state
515 std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_autoregressive(
516 ggml_tensor * q,
517 ggml_tensor * k,
518 ggml_tensor * v,
519 ggml_tensor * g,
520 ggml_tensor * beta,
521 ggml_tensor * state,
522 int il);
523
524 ggml_tensor * build_norm_gated(
525 ggml_tensor * input,
526 ggml_tensor * weights,
527 ggml_tensor * gate,
528 int layer);
529
530 // returns pair of qkv, z
531 std::pair<ggml_tensor *, ggml_tensor *> build_qkvz(
532 ggml_tensor * input,
533 int il);
534
535 const llama_model & model;
536};
537
538struct llm_build_qwen35 : public llm_graph_context_mamba {
539 llm_build_qwen35(const llama_model & model, const llm_graph_params & params);
540private:
541 ggml_tensor * build_layer_attn(
542 llm_graph_input_attn_kv * inp_attn,
543 ggml_tensor * cur,
544 ggml_tensor * inp_pos,
545 int * sections,
546 int il);
547
548 ggml_tensor * build_layer_attn_linear(
549 llm_graph_input_rs * inp,
550 ggml_tensor * cur,
551 ggml_tensor * causal_mask,
552 ggml_tensor * identity,
553 ggml_tensor * diag_mask,
554 int il);
555
556 ggml_tensor * build_layer_ffn(
557 ggml_tensor * cur,
558 int il);
559
560 // returns pair of output and new state
561 std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_chunking(
562 ggml_tensor * q,
563 ggml_tensor * k,
564 ggml_tensor * v,
565 ggml_tensor * g,
566 ggml_tensor * beta,
567 ggml_tensor * state,
568 ggml_tensor * causal_mask,
569 ggml_tensor * identity,
570 ggml_tensor * diag_mask,
571 int il);
572
573 // returns pair of output and new state
574 std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_autoregressive(
575 ggml_tensor * q,
576 ggml_tensor * k,
577 ggml_tensor * v,
578 ggml_tensor * g,
579 ggml_tensor * beta,
580 ggml_tensor * state,
581 int il);
582
583 ggml_tensor * build_norm_gated(
584 ggml_tensor * input,
585 ggml_tensor * weights,
586 ggml_tensor * gate,
587 int layer);
588
589 // returns pair of qkv, z
590 std::pair<ggml_tensor *, ggml_tensor *> build_qkvz(
591 ggml_tensor * input,
592 int il);
593
594 const llama_model & model;
595};
596
597struct llm_build_qwen35moe : public llm_graph_context_mamba {
598 llm_build_qwen35moe(const llama_model & model, const llm_graph_params & params);
599private:
600 ggml_tensor * build_layer_attn(
601 llm_graph_input_attn_kv * inp_attn,
602 ggml_tensor * cur,
603 ggml_tensor * inp_pos,
604 int * sections,
605 int il);
606
607 ggml_tensor * build_layer_attn_linear(
608 llm_graph_input_rs * inp,
609 ggml_tensor * cur,
610 ggml_tensor * causal_mask,
611 ggml_tensor * identity,
612 ggml_tensor * diag_mask,
613 int il);
614
615 ggml_tensor * build_layer_ffn(
616 ggml_tensor * cur,
617 int il);
618
619 // returns pair of output and new state
620 std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_chunking(
621 ggml_tensor * q,
622 ggml_tensor * k,
623 ggml_tensor * v,
624 ggml_tensor * g,
625 ggml_tensor * beta,
626 ggml_tensor * state,
627 ggml_tensor * causal_mask,
628 ggml_tensor * identity,
629 ggml_tensor * diag_mask,
630 int il);
631
632 // returns pair of output and new state
633 std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_autoregressive(
634 ggml_tensor * q,
635 ggml_tensor * k,
636 ggml_tensor * v,
637 ggml_tensor * g,
638 ggml_tensor * beta,
639 ggml_tensor * state,
640 int il);
641
642 ggml_tensor * build_norm_gated(
643 ggml_tensor * input,
644 ggml_tensor * weights,
645 ggml_tensor * gate,
646 int layer);
647
648 // returns pair of qkv, z
649 std::pair<ggml_tensor *, ggml_tensor *> build_qkvz(
650 ggml_tensor * input,
651 int il);
652
653 const llama_model & model;
654};
655
656struct llm_build_qwen : public llm_graph_context {
657 llm_build_qwen(const llama_model & model, const llm_graph_params & params);
658};
659
660struct llm_build_refact : public llm_graph_context {
661 llm_build_refact(const llama_model & model, const llm_graph_params & params);
662};
663
664struct llm_build_rnd1 : public llm_graph_context {
665 llm_build_rnd1(const llama_model & model, const llm_graph_params & params);
666};
667
668struct llm_build_rwkv6 : public llm_build_rwkv6_base {
669 llm_build_rwkv6(const llama_model & model, const llm_graph_params & params);
670};
671
672struct llm_build_rwkv6qwen2 : public llm_build_rwkv6_base {
673 llm_build_rwkv6qwen2(const llama_model & model, const llm_graph_params & params);
674};
675
676struct llm_build_rwkv7 : public llm_build_rwkv7_base {
677 llm_build_rwkv7(const llama_model & model, const llm_graph_params & params);
678};
679
680struct llm_build_seed_oss : public llm_graph_context {
681 llm_build_seed_oss(const llama_model & model, const llm_graph_params & params);
682};
683
684template <bool iswa>
685struct llm_build_smallthinker : public llm_graph_context {
686 llm_build_smallthinker(const llama_model & model, const llm_graph_params & params);
687};
688
689struct llm_build_smollm3 : public llm_graph_context {
690 llm_build_smollm3(const llama_model & model, const llm_graph_params & params);
691};
692
693struct llm_build_stablelm : public llm_graph_context {
694 llm_build_stablelm(const llama_model & model, const llm_graph_params & params);
695};
696
697struct llm_build_starcoder2 : public llm_graph_context {
698 llm_build_starcoder2(const llama_model & model, const llm_graph_params & params);
699};
700
701struct llm_build_starcoder : public llm_graph_context {
702 llm_build_starcoder(const llama_model & model, const llm_graph_params & params);
703};
704
705struct llm_build_step35_iswa : public llm_graph_context {
706 llm_build_step35_iswa(const llama_model & model, const llm_graph_params & params);
707};
708
709struct llm_build_t5_dec : public llm_graph_context {
710 llm_build_t5_dec(const llama_model & model, const llm_graph_params & params);
711};
712
713struct llm_build_t5_enc : public llm_graph_context {
714 llm_build_t5_enc(const llama_model & model, const llm_graph_params & params);
715};
716
717struct llm_build_wavtokenizer_dec : public llm_graph_context {
718 llm_build_wavtokenizer_dec(const llama_model & model, const llm_graph_params & params);
719};
720
721struct llm_build_xverse : public llm_graph_context {
722 llm_build_xverse(const llama_model & model, const llm_graph_params & params);
723};