From b333b06772c89d96aacb5490d6a219fba7c09cc6 Mon Sep 17 00:00:00 2001
From: Mitja Felicijan <mitja.felicijan@gmail.com>
Date: Thu, 12 Feb 2026 20:57:17 +0100
Subject: Engage!

---
 llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh

(limited to 'llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh')

diff --git a/llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh b/llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh
new file mode 100644
index 0000000..243dc2f
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh
@@ -0,0 +1,27 @@
+#include "common.cuh"
+#include "ggml.h"
+
+#include <initializer_list>
+
+struct ggml_cuda_topk_moe_args {
+    bool sigmoid{};
+    bool softmax{};
+    bool delayed_softmax{};
+    bool prob_bias{};
+    bool norm{};
+    bool scale{};
+};
+
+void ggml_cuda_op_topk_moe(ggml_backend_cuda_context &     ctx,
+                           const ggml_tensor *             logits,
+                           ggml_tensor *                   weights,
+                           ggml_tensor *                   ids,
+                           const ggml_tensor *             clamp,
+                           const ggml_tensor *             scale,
+                           const ggml_tensor *             bias,
+                           const ggml_cuda_topk_moe_args & args);
+
+bool ggml_cuda_should_use_topk_moe(const ggml_tensor * gating_op,
+                                   const ggml_tensor * weights,
+                                   const ggml_tensor * logits,
+                                   const ggml_tensor * ids);
-- 
cgit v1.2.3