1#pragma once
2
3#include "ggml.h"
4#include "ggml-backend.h"
5
6#ifdef __cplusplus
7extern "C" {
8#endif
9
10 // the compute plan that needs to be prepared for ggml_graph_compute()
11 // since https://github.com/ggml-org/ggml/issues/287
12 struct ggml_cplan {
13 size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
14 uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
15
16 int n_threads;
17 struct ggml_threadpool * threadpool;
18
19 // abort ggml_graph_compute when true
20 ggml_abort_callback abort_callback;
21 void * abort_callback_data;
22
23 // use only reference implementations
24 bool use_ref;
25 };
26
27 // numa strategies
28 enum ggml_numa_strategy {
29 GGML_NUMA_STRATEGY_DISABLED = 0,
30 GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
31 GGML_NUMA_STRATEGY_ISOLATE = 2,
32 GGML_NUMA_STRATEGY_NUMACTL = 3,
33 GGML_NUMA_STRATEGY_MIRROR = 4,
34 GGML_NUMA_STRATEGY_COUNT
35 };
36
37 GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
38 GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
39
40 GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
41 GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
42
43 GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
44 GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
45
46 GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
47 GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
48
49 GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
50 GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
51
52 GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
53 GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
54
55 GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
56 GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
57
58 GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
59 GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
60 GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
61 GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
62 GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
63
64 // ggml_graph_plan() has to be called before ggml_graph_compute()
65 // when plan.work_size > 0, caller must allocate memory for plan.work_data
66 GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
67 const struct ggml_cgraph * cgraph,
68 int n_threads, /* = GGML_DEFAULT_N_THREADS */
69 struct ggml_threadpool * threadpool /* = NULL */ );
70 GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
71
72 // same as ggml_graph_compute() but the work data is allocated as a part of the context
73 // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
74 GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
75
76 //
77 // system info
78 //
79
80 // x86
81 GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
82 GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
83 GGML_BACKEND_API int ggml_cpu_has_avx (void);
84 GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
85 GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
86 GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
87 GGML_BACKEND_API int ggml_cpu_has_f16c (void);
88 GGML_BACKEND_API int ggml_cpu_has_fma (void);
89 GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
90 GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
91 GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
92 GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
93 GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
94 // ARM
95 GGML_BACKEND_API int ggml_cpu_has_neon (void);
96 GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
97 GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
98 GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
99 GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
100 GGML_BACKEND_API int ggml_cpu_has_sve (void);
101 GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
102 GGML_BACKEND_API int ggml_cpu_has_sme (void);
103 // other
104 GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
105 GGML_BACKEND_API int ggml_cpu_get_rvv_vlen (void); // risc-v vector length in bytes
106 GGML_BACKEND_API int ggml_cpu_has_vsx (void);
107 GGML_BACKEND_API int ggml_cpu_has_vxe (void);
108 GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
109 GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
110
111 // Internal types and functions exposed for tests and benchmarks
112
113 typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
114 const void * GGML_RESTRICT y, size_t by, int nrc);
115
116 struct ggml_type_traits_cpu {
117 ggml_from_float_t from_float;
118 ggml_vec_dot_t vec_dot;
119 enum ggml_type vec_dot_type;
120 int64_t nrows; // number of rows to process simultaneously
121 };
122
123 GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
124
125 GGML_BACKEND_API void ggml_cpu_init(void);
126
127 //
128 // CPU backend
129 //
130
131 GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
132
133 GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
134 GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
135 GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
136 GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
137
138 GGML_BACKEND_API void ggml_backend_cpu_set_use_ref(ggml_backend_t backend_cpu, bool use_ref);
139
140 GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
141
142 GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
143 GGML_BACKEND_API void ggml_cpu_fp32_to_i32 (const float *, int32_t *, int64_t);
144 GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
145 GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
146 GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
147 GGML_BACKEND_API void ggml_cpu_bf16_to_fp32(const ggml_bf16_t *, float *, int64_t);
148
149#ifdef __cplusplus
150}
151#endif