summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-cpu/common.h
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-cpu/common.h
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-cpu/common.h')
-rw-r--r--llama.cpp/ggml/src/ggml-cpu/common.h95
1 files changed, 95 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-cpu/common.h b/llama.cpp/ggml/src/ggml-cpu/common.h
new file mode 100644
index 0000000..1057b5b
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-cpu/common.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include "ggml.h"
+#include "traits.h"
+#include "ggml-cpu-impl.h"
+#include "ggml-impl.h"
+#include "simd-mappings.h"
+
+#define GGML_FA_TILE_Q 32
+#define GGML_FA_TILE_KV 16
+
+#ifdef __cplusplus
+
+#include <utility>
+
+// convenience functions/macros for use in template calls
+// note: these won't be required after the 'traits' lookup table is used.
+static inline ggml_fp16_t f32_to_f16(float x) {
+ return GGML_CPU_FP32_TO_FP16(x);
+}
+
+static inline float f16_to_f32(ggml_fp16_t x) {
+ return GGML_CPU_FP16_TO_FP32(x);
+}
+
+static inline ggml_bf16_t f32_to_bf16(float x) {
+ return GGML_FP32_TO_BF16(x);
+}
+
+static inline float bf16_to_f32(ggml_bf16_t x) {
+ return GGML_BF16_TO_FP32(x);
+}
+
+static inline float i32_to_f32(int32_t x) {
+ return x;
+}
+
+static inline int32_t f32_to_i32(float x) {
+ return x;
+}
+
+static inline float f32_to_f32(float x) {
+ return x;
+}
+
+// TODO - merge this into the traits table, after using row-based conversions
+template <class T>
+struct type_conversion_table;
+
+template <>
+struct type_conversion_table<ggml_fp16_t> {
+ static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32;
+ static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16;
+};
+
+template <>
+struct type_conversion_table<float> {
+ static constexpr float (*to_f32)(float) = f32_to_f32;
+ static constexpr float (*from_f32)(float) = f32_to_f32;
+};
+
+template <>
+struct type_conversion_table<ggml_bf16_t> {
+ static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32;
+ static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
+};
+
+template <>
+struct type_conversion_table<int32_t> {
+ static constexpr float (*to_f32)(int32_t) = i32_to_f32;
+ static constexpr int32_t (*from_f32)(float) = f32_to_i32;
+};
+
+static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
+ const int64_t ith = params->ith;
+ const int64_t nth = params->nth;
+
+ const int64_t nr = ggml_nrows(src0);
+
+ // rows per thread
+ const int64_t dr = (nr + nth - 1)/nth;
+
+ // row range for this thread
+ const int64_t ir0 = dr*ith;
+ const int64_t ir1 = MIN(ir0 + dr, nr);
+
+ return {ir0, ir1};
+}
+
+struct ggml_fa_tile_config {
+ static constexpr size_t Q = GGML_FA_TILE_Q;
+ static constexpr size_t KV = GGML_FA_TILE_KV;
+};
+
+#endif