diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
| commit | b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch) | |
| tree | 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-cpu/common.h | |
| download | llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz | |
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-cpu/common.h')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-cpu/common.h | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-cpu/common.h b/llama.cpp/ggml/src/ggml-cpu/common.h new file mode 100644 index 0000000..1057b5b --- /dev/null +++ b/llama.cpp/ggml/src/ggml-cpu/common.h @@ -0,0 +1,95 @@ +#pragma once + +#include "ggml.h" +#include "traits.h" +#include "ggml-cpu-impl.h" +#include "ggml-impl.h" +#include "simd-mappings.h" + +#define GGML_FA_TILE_Q 32 +#define GGML_FA_TILE_KV 16 + +#ifdef __cplusplus + +#include <utility> + +// convenience functions/macros for use in template calls +// note: these won't be required after the 'traits' lookup table is used. +static inline ggml_fp16_t f32_to_f16(float x) { + return GGML_CPU_FP32_TO_FP16(x); +} + +static inline float f16_to_f32(ggml_fp16_t x) { + return GGML_CPU_FP16_TO_FP32(x); +} + +static inline ggml_bf16_t f32_to_bf16(float x) { + return GGML_FP32_TO_BF16(x); +} + +static inline float bf16_to_f32(ggml_bf16_t x) { + return GGML_BF16_TO_FP32(x); +} + +static inline float i32_to_f32(int32_t x) { + return x; +} + +static inline int32_t f32_to_i32(float x) { + return x; +} + +static inline float f32_to_f32(float x) { + return x; +} + +// TODO - merge this into the traits table, after using row-based conversions +template <class T> +struct type_conversion_table; + +template <> +struct type_conversion_table<ggml_fp16_t> { + static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32; + static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16; +}; + +template <> +struct type_conversion_table<float> { + static constexpr float (*to_f32)(float) = f32_to_f32; + static constexpr float (*from_f32)(float) = f32_to_f32; +}; + +template <> +struct type_conversion_table<ggml_bf16_t> { + static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32; + static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16; +}; + +template <> +struct type_conversion_table<int32_t> { + static constexpr float (*to_f32)(int32_t) = i32_to_f32; + static constexpr int32_t (*from_f32)(float) = f32_to_i32; +}; + +static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) { + const int64_t ith = params->ith; + const int64_t nth = params->nth; + + const int64_t nr = ggml_nrows(src0); + + // rows per thread + const int64_t dr = (nr + nth - 1)/nth; + + // row range for this thread + const int64_t ir0 = dr*ith; + const int64_t ir1 = MIN(ir0 + dr, nr); + + return {ir0, ir1}; +} + +struct ggml_fa_tile_config { + static constexpr size_t Q = GGML_FA_TILE_Q; + static constexpr size_t KV = GGML_FA_TILE_KV; +}; + +#endif |
