summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py')
-rwxr-xr-xllama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py99
1 files changed, 99 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py b/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py
new file mode 100755
index 0000000..e382df1
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+
+from glob import glob
+import os
+
+HEAD_SIZES_KQ = [40, 64, 72, 80, 96, 112, 128, 256, 576]
+
+TYPES_KV = ["GGML_TYPE_F16", "GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0"]
+
+SOURCE_FATTN_TILE = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../fattn-tile.cuh"
+
+DECL_FATTN_TILE_CASE({head_size_kq}, {head_size_v});
+"""
+
+SOURCE_FATTN_VEC = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../fattn-vec.cuh"
+
+DECL_FATTN_VEC_CASE( 64, {type_k}, {type_v});
+DECL_FATTN_VEC_CASE(128, {type_k}, {type_v});
+DECL_FATTN_VEC_CASE(256, {type_k}, {type_v});
+"""
+
+SOURCE_FATTN_MMA_START = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../fattn-mma-f16.cuh"
+
+"""
+
+SOURCE_FATTN_MMA_CASE = "DECL_FATTN_MMA_F16_CASE({head_size_kq}, {head_size_v}, {ncols1}, {ncols2});\n"
+
+TYPES_MMQ = [
+ "GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0",
+ "GGML_TYPE_Q2_K", "GGML_TYPE_Q3_K", "GGML_TYPE_Q4_K", "GGML_TYPE_Q5_K", "GGML_TYPE_Q6_K",
+ "GGML_TYPE_IQ2_XXS", "GGML_TYPE_IQ2_XS", "GGML_TYPE_IQ2_S", "GGML_TYPE_IQ3_XXS", "GGML_TYPE_IQ3_S",
+ "GGML_TYPE_IQ1_S", "GGML_TYPE_IQ4_NL", "GGML_TYPE_IQ4_XS", "GGML_TYPE_MXFP4"
+]
+
+SOURCE_MMQ = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE({type});
+"""
+
+SOURCE_MMF = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmf.cuh"
+
+DECL_MMF_CASE({type});
+"""
+
+
+def get_short_name(long_quant_name):
+ return long_quant_name.replace("GGML_TYPE_", "").lower()
+
+
+for filename in glob("*.cu"):
+ os.remove(filename)
+
+for head_size_kq in HEAD_SIZES_KQ:
+ head_size_v = head_size_kq if head_size_kq != 576 else 512
+ with open(f"fattn-tile-instance-dkq{head_size_kq}-dv{head_size_v}.cu", "w") as f:
+ f.write(SOURCE_FATTN_TILE.format(head_size_kq=head_size_kq, head_size_v=head_size_v))
+
+for type_k in TYPES_KV:
+ for type_v in TYPES_KV:
+ with open(f"fattn-vec-instance-{get_short_name(type_k)}-{get_short_name(type_v)}.cu", "w") as f:
+ f.write(SOURCE_FATTN_VEC.format(type_k=type_k, type_v=type_v))
+
+for ncols in [8, 16, 32, 64]:
+ for ncols2 in [1, 2, 4, 8, 16, 32]:
+ if ncols2 > ncols:
+ continue
+ ncols1 = ncols // ncols2
+ with open(f"fattn-mma-f16-instance-ncols1_{ncols1}-ncols2_{ncols2}.cu", "w") as f:
+ f.write(SOURCE_FATTN_MMA_START)
+
+ for head_size_kq in HEAD_SIZES_KQ:
+ if head_size_kq == 40:
+ continue
+ if head_size_kq == 72:
+ continue
+ if head_size_kq != 576 and ncols2 in (16, 32):
+ continue
+ if head_size_kq == 576 and ncols2 not in (4, 16, 32):
+ continue
+ head_size_v = head_size_kq if head_size_kq != 576 else 512
+ f.write(SOURCE_FATTN_MMA_CASE.format(ncols1=ncols1, ncols2=ncols2, head_size_kq=head_size_kq, head_size_v=head_size_v))
+
+for type in TYPES_MMQ:
+ with open(f"mmq-instance-{get_short_name(type)}.cu", "w") as f:
+ f.write(SOURCE_MMQ.format(type=type))
+
+for type in range(1, 17):
+ with open(f"mmf-instance-ncols_{type}.cu", "w") as f:
+ f.write(SOURCE_MMF.format(type=type))