Engage!

author: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-02-12 20:57:17 +0100
committer: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-02-12 20:57:17 +0100
commit: b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree: 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/src/models/arwkv7.cpp
download: llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
1 files changed, 86 insertions, 0 deletions
diff --git a/llama.cpp/src/models/arwkv7.cpp b/llama.cpp/src/models/arwkv7.cpp
new file mode 100644
index 0000000..107a3be
--- /dev/null
+++ b/llama.cpp/src/models/arwkv7.cpp
@@ -0,0 +1,86 @@
+#include "models.h"
+llm_build_arwkv7::llm_build_arwkv7(const llama_model & model, const llm_graph_params & params) : llm_build_rwkv7_base(model, params) {
+    GGML_ASSERT(n_embd == hparams.n_embd_r());
+    ggml_tensor * cur;
+    ggml_tensor * inpL;
+    ggml_tensor * v_first = nullptr;
+    inpL = build_inp_embd(model.tok_embd);
+    auto * rs_inp = build_rs_inp();
+    const auto n_embd = hparams.n_embd;
+    const auto n_seq_tokens = ubatch.n_seq_tokens;
+    const auto n_seqs = ubatch.n_seqs;
+    ggml_tensor * inp_out_ids = build_inp_out_ids();
+    for (int il = 0; il < n_layer; ++il) {
+        const llama_layer * layer = &model.layers[il];
+        inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_seq_tokens, n_seqs);
+        ggml_tensor * token_shift = build_rwkv_token_shift_load(rs_inp, ubatch, il);
+        ggml_tensor * att_norm = build_norm(inpL, layer->attn_norm, layer->attn_norm_b, LLM_NORM_RMS, il);
+        cb(att_norm, "attn_norm", il);
+        ggml_tensor * x_prev = ggml_concat(
+                ctx0,
+                token_shift,
+                ggml_view_3d(ctx0, att_norm, n_embd, n_seq_tokens - 1, n_seqs, att_norm->nb[1], att_norm->nb[2], 0),
+                1
+                );
+        cur = build_rwkv7_time_mix(rs_inp, att_norm, x_prev, v_first, ubatch, il);
+        token_shift = ggml_view_3d(ctx0, att_norm, n_embd, 1, n_seqs, att_norm->nb[1], att_norm->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(att_norm));
+        ggml_build_forward_expand(gf, build_rwkv_token_shift_store(token_shift, ubatch, il));
+        ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL);
+        cb(ffn_inp, "ffn_inp", il);
+        cur     = ggml_reshape_2d(ctx0, cur,     n_embd, n_tokens);
+        ffn_inp = ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens);
+        if (il == n_layer - 1 && inp_out_ids) {
+            cur     = ggml_get_rows(ctx0, cur,     inp_out_ids);
+            ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
+        }
+        // feed-forward network
+        cur = build_norm(ffn_inp,
+                model.layers[il].ffn_norm, NULL,
+                LLM_NORM_RMS, il);
+        cb(cur, "ffn_norm", il);
+        cur = build_ffn(cur,
+                model.layers[il].ffn_up,   NULL, NULL,
+                model.layers[il].ffn_gate, NULL, NULL,
+                model.layers[il].ffn_down, NULL, NULL,
+                NULL,
+                LLM_FFN_SILU, LLM_FFN_PAR, il);
+        cb(cur, "ffn_out", il);
+        cur = ggml_add(ctx0, cur, ffn_inp);
+        cur = build_cvec(cur, il);
+        cb(cur, "l_out", il);
+        // input for next layer
+        inpL = cur;
+    }
+    cur = inpL;
+    cur = build_norm(cur, model.output_norm, model.output_norm_b, LLM_NORM_RMS, -1);
+    cb(cur, "result_norm", -1);
+    res->t_embd = cur;
+    cur = build_lora_mm(model.output, cur);
+    cb(cur, "result_output", -1);
+    res->t_logits = cur;
+    ggml_build_forward_expand(gf, cur);
+}
author	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-02-12 20:57:17 +0100
committer	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-02-12 20:57:17 +0100
commit	b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree	211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/src/models/arwkv7.cpp
download	llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz

diff --git a/llama.cpp/src/models/arwkv7.cpp b/llama.cpp/src/models/arwkv7.cpp new file mode 100644 index 0000000..107a3be --- /dev/null +++ b/llama.cpp/src/models/arwkv7.cpp
@@ -0,0 +1,86 @@
	1	#include "models.h"
	2
	3
	4	llm_build_arwkv7::llm_build_arwkv7(const llama_model & model, const llm_graph_params & params) : llm_build_rwkv7_base(model, params) {
	5	GGML_ASSERT(n_embd == hparams.n_embd_r());
	6
	7	ggml_tensor * cur;
	8	ggml_tensor * inpL;
	9	ggml_tensor * v_first = nullptr;
	10
	11	inpL = build_inp_embd(model.tok_embd);
	12
	13	auto * rs_inp = build_rs_inp();
	14
	15	const auto n_embd = hparams.n_embd;
	16	const auto n_seq_tokens = ubatch.n_seq_tokens;
	17	const auto n_seqs = ubatch.n_seqs;
	18
	19	ggml_tensor * inp_out_ids = build_inp_out_ids();
	20
	21	for (int il = 0; il < n_layer; ++il) {
	22	const llama_layer * layer = &model.layers[il];
	23	inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_seq_tokens, n_seqs);
	24
	25	ggml_tensor * token_shift = build_rwkv_token_shift_load(rs_inp, ubatch, il);
	26
	27	ggml_tensor * att_norm = build_norm(inpL, layer->attn_norm, layer->attn_norm_b, LLM_NORM_RMS, il);
	28	cb(att_norm, "attn_norm", il);
	29
	30	ggml_tensor * x_prev = ggml_concat(
	31	ctx0,
	32	token_shift,
	33	ggml_view_3d(ctx0, att_norm, n_embd, n_seq_tokens - 1, n_seqs, att_norm->nb[1], att_norm->nb[2], 0),
	34	1
	35	);
	36
	37	cur = build_rwkv7_time_mix(rs_inp, att_norm, x_prev, v_first, ubatch, il);
	38
	39	token_shift = ggml_view_3d(ctx0, att_norm, n_embd, 1, n_seqs, att_norm->nb[1], att_norm->nb[2], (n_seq_tokens-1)n_embdggml_element_size(att_norm));
	40	ggml_build_forward_expand(gf, build_rwkv_token_shift_store(token_shift, ubatch, il));
	41
	42	ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL);
	43	cb(ffn_inp, "ffn_inp", il);
	44
	45	cur = ggml_reshape_2d(ctx0, cur, n_embd, n_tokens);
	46	ffn_inp = ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens);
	47
	48	if (il == n_layer - 1 && inp_out_ids) {
	49	cur = ggml_get_rows(ctx0, cur, inp_out_ids);
	50	ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
	51	}
	52	// feed-forward network
	53	cur = build_norm(ffn_inp,
	54	model.layers[il].ffn_norm, NULL,
	55	LLM_NORM_RMS, il);
	56	cb(cur, "ffn_norm", il);
	57
	58	cur = build_ffn(cur,
	59	model.layers[il].ffn_up, NULL, NULL,
	60	model.layers[il].ffn_gate, NULL, NULL,
	61	model.layers[il].ffn_down, NULL, NULL,
	62	NULL,
	63	LLM_FFN_SILU, LLM_FFN_PAR, il);
	64	cb(cur, "ffn_out", il);
	65
	66	cur = ggml_add(ctx0, cur, ffn_inp);
	67
	68	cur = build_cvec(cur, il);
	69	cb(cur, "l_out", il);
	70
	71	// input for next layer
	72	inpL = cur;
	73	}
	74	cur = inpL;
	75	cur = build_norm(cur, model.output_norm, model.output_norm_b, LLM_NORM_RMS, -1);
	76
	77	cb(cur, "result_norm", -1);
	78	res->t_embd = cur;
	79
	80	cur = build_lora_mm(model.output, cur);
	81
	82	cb(cur, "result_output", -1);
	83	res->t_logits = cur;
	84
	85	ggml_build_forward_expand(gf, cur);
	86	}