llmnpc - llama.cpp/ggml/include/ggml-cpu.h

Path: llmnpc / llama.cpp / ggml / include / ggml-cpu.h (raw)
  1#pragma once
  2
  3#include "ggml.h"
  4#include "ggml-backend.h"
  5
  6#ifdef  __cplusplus
  7extern "C" {
  8#endif
  9
 10    // the compute plan that needs to be prepared for ggml_graph_compute()
 11    // since https://github.com/ggml-org/ggml/issues/287
 12    struct ggml_cplan {
 13        size_t    work_size; // size of work buffer, calculated by `ggml_graph_plan()`
 14        uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
 15
 16        int n_threads;
 17        struct ggml_threadpool * threadpool;
 18
 19        // abort ggml_graph_compute when true
 20        ggml_abort_callback abort_callback;
 21        void *              abort_callback_data;
 22
 23        // use only reference implementations
 24        bool use_ref;
 25    };
 26
 27    // numa strategies
 28    enum ggml_numa_strategy {
 29        GGML_NUMA_STRATEGY_DISABLED   = 0,
 30        GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
 31        GGML_NUMA_STRATEGY_ISOLATE    = 2,
 32        GGML_NUMA_STRATEGY_NUMACTL    = 3,
 33        GGML_NUMA_STRATEGY_MIRROR     = 4,
 34        GGML_NUMA_STRATEGY_COUNT
 35    };
 36
 37    GGML_BACKEND_API void    ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
 38    GGML_BACKEND_API bool    ggml_is_numa(void); // true if init detected that system has >1 NUMA node
 39
 40    GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
 41    GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
 42
 43    GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
 44    GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
 45
 46    GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
 47    GGML_BACKEND_API void    ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
 48
 49    GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
 50    GGML_BACKEND_API void    ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
 51
 52    GGML_BACKEND_API float   ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
 53    GGML_BACKEND_API void    ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
 54
 55    GGML_BACKEND_API float   ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
 56    GGML_BACKEND_API void    ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
 57
 58    GGML_BACKEND_API struct ggml_threadpool *      ggml_threadpool_new           (struct ggml_threadpool_params  * params);
 59    GGML_BACKEND_API void                          ggml_threadpool_free          (struct ggml_threadpool * threadpool);
 60    GGML_BACKEND_API int                           ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
 61    GGML_BACKEND_API void                          ggml_threadpool_pause         (struct ggml_threadpool * threadpool);
 62    GGML_BACKEND_API void                          ggml_threadpool_resume        (struct ggml_threadpool * threadpool);
 63
 64    // ggml_graph_plan() has to be called before ggml_graph_compute()
 65    // when plan.work_size > 0, caller must allocate memory for plan.work_data
 66    GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
 67                  const struct ggml_cgraph * cgraph,
 68                                       int   n_threads, /* = GGML_DEFAULT_N_THREADS */
 69                    struct ggml_threadpool * threadpool /* = NULL */ );
 70    GGML_BACKEND_API enum ggml_status  ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
 71
 72    // same as ggml_graph_compute() but the work data is allocated as a part of the context
 73    // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
 74    GGML_BACKEND_API enum ggml_status  ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
 75
 76    //
 77    // system info
 78    //
 79
 80    // x86
 81    GGML_BACKEND_API int ggml_cpu_has_sse3       (void);
 82    GGML_BACKEND_API int ggml_cpu_has_ssse3      (void);
 83    GGML_BACKEND_API int ggml_cpu_has_avx        (void);
 84    GGML_BACKEND_API int ggml_cpu_has_avx_vnni   (void);
 85    GGML_BACKEND_API int ggml_cpu_has_avx2       (void);
 86    GGML_BACKEND_API int ggml_cpu_has_bmi2       (void);
 87    GGML_BACKEND_API int ggml_cpu_has_f16c       (void);
 88    GGML_BACKEND_API int ggml_cpu_has_fma        (void);
 89    GGML_BACKEND_API int ggml_cpu_has_avx512     (void);
 90    GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
 91    GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
 92    GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
 93    GGML_BACKEND_API int ggml_cpu_has_amx_int8   (void);
 94    // ARM
 95    GGML_BACKEND_API int ggml_cpu_has_neon       (void);
 96    GGML_BACKEND_API int ggml_cpu_has_arm_fma    (void);
 97    GGML_BACKEND_API int ggml_cpu_has_fp16_va    (void);
 98    GGML_BACKEND_API int ggml_cpu_has_dotprod    (void);
 99    GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
100    GGML_BACKEND_API int ggml_cpu_has_sve        (void);
101    GGML_BACKEND_API int ggml_cpu_get_sve_cnt    (void);  // sve vector length in bytes
102    GGML_BACKEND_API int ggml_cpu_has_sme        (void);
103    // other
104    GGML_BACKEND_API int ggml_cpu_has_riscv_v    (void);
105    GGML_BACKEND_API int ggml_cpu_get_rvv_vlen   (void);  // risc-v vector length in bytes
106    GGML_BACKEND_API int ggml_cpu_has_vsx        (void);
107    GGML_BACKEND_API int ggml_cpu_has_vxe        (void);
108    GGML_BACKEND_API int ggml_cpu_has_wasm_simd  (void);
109    GGML_BACKEND_API int ggml_cpu_has_llamafile  (void);
110
111    // Internal types and functions exposed for tests and benchmarks
112
113    typedef void (*ggml_vec_dot_t)  (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
114                                       const void * GGML_RESTRICT y, size_t by, int nrc);
115
116    struct ggml_type_traits_cpu {
117        ggml_from_float_t        from_float;
118        ggml_vec_dot_t           vec_dot;
119        enum ggml_type           vec_dot_type;
120        int64_t                  nrows; // number of rows to process simultaneously
121    };
122
123    GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
124
125    GGML_BACKEND_API void ggml_cpu_init(void);
126
127    //
128    // CPU backend
129    //
130
131    GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
132
133    GGML_BACKEND_API bool ggml_backend_is_cpu                (ggml_backend_t backend);
134    GGML_BACKEND_API void ggml_backend_cpu_set_n_threads     (ggml_backend_t backend_cpu, int n_threads);
135    GGML_BACKEND_API void ggml_backend_cpu_set_threadpool    (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
136    GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
137
138    GGML_BACKEND_API void ggml_backend_cpu_set_use_ref(ggml_backend_t backend_cpu, bool use_ref);
139
140    GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
141
142    GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *,       float *, int64_t);
143    GGML_BACKEND_API void ggml_cpu_fp32_to_i32 (const float *,     int32_t *, int64_t);
144    GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
145    GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
146    GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
147    GGML_BACKEND_API void ggml_cpu_bf16_to_fp32(const ggml_bf16_t *, float *, int64_t);
148
149#ifdef __cplusplus
150}
151#endif