llmnpc - llama.cpp/ggml/src/ggml-cpu/common.h

Path: llmnpc / llama.cpp / ggml / src / ggml-cpu / common.h (raw)
 1#pragma once
 2
 3#include "ggml.h"
 4#include "traits.h"
 5#include "ggml-cpu-impl.h"
 6#include "ggml-impl.h"
 7#include "simd-mappings.h"
 8
 9#define GGML_FA_TILE_Q  32
10#define GGML_FA_TILE_KV 16
11
12#ifdef __cplusplus
13
14#include <utility>
15
16// convenience functions/macros for use in template calls
17// note: these won't be required after the 'traits' lookup table is used.
18static inline ggml_fp16_t f32_to_f16(float x) {
19    return GGML_CPU_FP32_TO_FP16(x);
20}
21
22static inline float f16_to_f32(ggml_fp16_t x) {
23    return GGML_CPU_FP16_TO_FP32(x);
24}
25
26static inline ggml_bf16_t f32_to_bf16(float x) {
27    return GGML_FP32_TO_BF16(x);
28}
29
30static inline float bf16_to_f32(ggml_bf16_t x) {
31    return GGML_BF16_TO_FP32(x);
32}
33
34static inline float i32_to_f32(int32_t x) {
35    return x;
36}
37
38static inline int32_t f32_to_i32(float x) {
39    return x;
40}
41
42static inline float f32_to_f32(float x) {
43    return x;
44}
45
46// TODO - merge this into the traits table, after using row-based conversions
47template <class T>
48struct type_conversion_table;
49
50template <>
51struct type_conversion_table<ggml_fp16_t> {
52    static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32;
53    static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16;
54};
55
56template <>
57struct type_conversion_table<float> {
58    static constexpr float (*to_f32)(float) = f32_to_f32;
59    static constexpr float (*from_f32)(float) = f32_to_f32;
60};
61
62template <>
63struct type_conversion_table<ggml_bf16_t> {
64    static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32;
65    static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
66};
67
68template <>
69struct type_conversion_table<int32_t> {
70    static constexpr float (*to_f32)(int32_t) = i32_to_f32;
71    static constexpr int32_t (*from_f32)(float) = f32_to_i32;
72};
73
74static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
75    const int64_t ith = params->ith;
76    const int64_t nth = params->nth;
77
78    const int64_t nr  = ggml_nrows(src0);
79
80    // rows per thread
81    const int64_t dr = (nr + nth - 1)/nth;
82
83    // row range for this thread
84    const int64_t ir0 = dr*ith;
85    const int64_t ir1 = MIN(ir0 + dr, nr);
86
87    return {ir0, ir1};
88}
89
90struct ggml_fa_tile_config {
91    static constexpr size_t Q  = GGML_FA_TILE_Q;
92    static constexpr size_t KV = GGML_FA_TILE_KV;
93};
94
95#endif