1#include "ggml-backend.h"
2#include "ggml-backend-impl.h"
3#include "ggml-cpu.h"
4#include "repack.h"
5#include "traits.h"
6#include "ggml-impl.h"
7#include "amx/amx.h"
8
9#include <cctype>
10#include <string>
11#include <vector>
12
13#ifdef GGML_USE_CPU_HBM
14# include "hbm.h"
15#endif
16
17#ifdef GGML_USE_CPU_KLEIDIAI
18# include "kleidiai/kleidiai.h"
19#endif
20
21#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
22# include "spacemit/ime.h"
23#endif
24
25#if defined(_WIN32)
26# define WIN32_LEAN_AND_MEAN
27# ifndef NOMINMAX
28# define NOMINMAX
29# endif
30# include <windows.h>
31#else
32# include <unistd.h>
33#endif
34
35#if defined(__APPLE__)
36# include <sys/sysctl.h>
37# include <sys/types.h>
38#endif
39
40// ggml-backend interface
41
42std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types() {
43 static std::vector<ggml_backend_buffer_type_t> bufts = []() {
44 std::vector<ggml_backend_buffer_type_t> bufts;
45
46#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
47 if (ggml_backend_amx_buffer_type()) {
48 bufts.push_back(ggml_backend_amx_buffer_type());
49 }
50#endif
51
52#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
53 if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
54 bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
55 }
56#endif
57
58#ifdef GGML_USE_CPU_KLEIDIAI
59 if (ggml_backend_cpu_kleidiai_buffer_type()) {
60 bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());
61 }
62#endif
63
64#ifdef GGML_USE_CPU_REPACK
65 if (ggml_backend_cpu_repack_buffer_type()) {
66 bufts.push_back(ggml_backend_cpu_repack_buffer_type());
67 }
68#endif
69
70 return bufts;
71 }();
72
73 return bufts;
74}
75
76static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
77 static std::vector<ggml_backend_buffer_type_t> extra_bufts = [] {
78 std::vector<ggml_backend_buffer_type_t> bufts = ggml_backend_cpu_get_extra_buffer_types();
79 bufts.push_back(nullptr);
80 return bufts;
81 }();
82
83 return extra_bufts.data();
84
85 GGML_UNUSED(device);
86}
87
88static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
89 for (auto * extra : ggml_backend_cpu_get_extra_buffer_types()) {
90 if (extra == buft) {
91 return true;
92 }
93 }
94 return false;
95}
96
97// CPU backend - backend (stream)
98
99struct ggml_backend_cpu_context {
100 int n_threads;
101 ggml_threadpool_t threadpool;
102
103 uint8_t * work_data;
104 size_t work_size;
105
106 ggml_abort_callback abort_callback;
107 void * abort_callback_data;
108
109 bool use_ref; // use reference implementation
110};
111
112static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) {
113 return "CPU";
114
115 GGML_UNUSED(backend);
116}
117
118static void ggml_backend_cpu_free(ggml_backend_t backend) {
119 struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
120 delete[] cpu_ctx->work_data;
121 delete cpu_ctx;
122 delete backend;
123}
124
125struct ggml_backend_plan_cpu {
126 struct ggml_cplan cplan;
127 struct ggml_cgraph cgraph;
128};
129
130static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, const struct ggml_cgraph * cgraph) {
131 struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
132
133 struct ggml_backend_plan_cpu * cpu_plan = new ggml_backend_plan_cpu;
134
135 cpu_plan->cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
136 cpu_plan->cgraph = *cgraph; // FIXME: deep copy
137
138 if (cpu_plan->cplan.work_size > 0) {
139 cpu_plan->cplan.work_data = new uint8_t[cpu_plan->cplan.work_size];
140 if (cpu_plan->cplan.work_data == NULL) {
141 delete cpu_plan;
142 return NULL;
143 }
144 }
145
146 cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
147 cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
148 cpu_plan->cplan.use_ref = cpu_ctx->use_ref;
149
150 return cpu_plan;
151}
152
153static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
154 struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
155
156 delete[] cpu_plan->cplan.work_data;
157 delete cpu_plan;
158
159 GGML_UNUSED(backend);
160}
161
162static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
163 struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
164
165 return ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
166
167 GGML_UNUSED(backend);
168}
169
170static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
171 struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
172
173 struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
174
175 if (cpu_ctx->work_size < cplan.work_size) {
176 delete[] cpu_ctx->work_data;
177 cpu_ctx->work_data = new uint8_t[cplan.work_size];
178 if (cpu_ctx->work_data == NULL) {
179 cpu_ctx->work_size = 0;
180 return GGML_STATUS_ALLOC_FAILED;
181 }
182 cpu_ctx->work_size = cplan.work_size;
183 }
184 cplan.work_data = (uint8_t *)cpu_ctx->work_data;
185
186 cplan.abort_callback = cpu_ctx->abort_callback;
187 cplan.abort_callback_data = cpu_ctx->abort_callback_data;
188 cplan.use_ref = cpu_ctx->use_ref;
189
190 return ggml_graph_compute(cgraph, &cplan);
191}
192
193static const struct ggml_backend_i ggml_backend_cpu_i = {
194 /* .get_name = */ ggml_backend_cpu_get_name,
195 /* .free = */ ggml_backend_cpu_free,
196 /* .set_tensor_async = */ NULL,
197 /* .get_tensor_async = */ NULL,
198 /* .cpy_tensor_async = */ NULL,
199 /* .synchronize = */ NULL,
200 /* .graph_plan_create = */ ggml_backend_cpu_graph_plan_create,
201 /* .graph_plan_free = */ ggml_backend_cpu_graph_plan_free,
202 /* .graph_plan_update = */ NULL,
203 /* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
204 /* .graph_compute = */ ggml_backend_cpu_graph_compute,
205 /* .event_record = */ NULL,
206 /* .event_wait = */ NULL,
207 /* .graph_optimize = */ NULL,
208};
209
210static ggml_guid_t ggml_backend_cpu_guid(void) {
211 static ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
212 return &guid;
213}
214
215ggml_backend_t ggml_backend_cpu_init(void) {
216 // initialize CPU backend now to avoid slowing the first graph computation
217 ggml_cpu_init();
218
219 struct ggml_backend_cpu_context * ctx = new ggml_backend_cpu_context;
220 if (ctx == NULL) {
221 return NULL;
222 }
223
224 ctx->n_threads = GGML_DEFAULT_N_THREADS;
225 ctx->threadpool = NULL;
226 ctx->work_data = NULL;
227 ctx->work_size = 0;
228 ctx->abort_callback = NULL;
229 ctx->abort_callback_data = NULL;
230 ctx->use_ref = false;
231
232 ggml_backend_t cpu_backend = new ggml_backend {
233 /* .guid = */ ggml_backend_cpu_guid(),
234 /* .iface = */ ggml_backend_cpu_i,
235 /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
236 /* .context = */ ctx,
237 };
238
239 if (cpu_backend == NULL) {
240 delete ctx;
241 return NULL;
242 }
243
244 return cpu_backend;
245}
246
247bool ggml_backend_is_cpu(ggml_backend_t backend) {
248 return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_cpu_guid());
249}
250
251void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
252 GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
253
254 struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
255 ctx->n_threads = n_threads;
256}
257
258void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) {
259 GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
260
261 struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
262
263 if (ctx->threadpool && ctx->threadpool != threadpool) {
264 // already had a different threadpool, pause/suspend it before switching
265 ggml_threadpool_pause(ctx->threadpool);
266 }
267 ctx->threadpool = threadpool;
268}
269
270void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) {
271 GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
272
273 struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
274 ctx->abort_callback = abort_callback;
275 ctx->abort_callback_data = abort_callback_data;
276}
277
278void ggml_backend_cpu_set_use_ref(ggml_backend_t backend_cpu, bool use_ref) {
279 GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
280
281 struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
282 ctx->use_ref = use_ref;
283}
284
285// CPU backend - device
286
287struct ggml_backend_cpu_device_context {
288 std::string description = "CPU";
289
290 ggml_backend_cpu_device_context() {
291#ifdef __APPLE__
292 size_t len = 0;
293 if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, 0)) {
294 description.resize(len);
295 sysctlbyname("machdep.cpu.brand_string", &description[0], &len, NULL, 0); // NOLINT
296 }
297#elif defined(__linux__)
298 FILE * f = fopen("/proc/cpuinfo", "r");
299 if (f) {
300 char buf[1024];
301 while (fgets(buf, sizeof(buf), f)) {
302 if (strncmp(buf, "model name", 10) == 0) {
303 char * p = strchr(buf, ':');
304 if (p) {
305 p++;
306 while (std::isspace(*p)) {
307 p++;
308 }
309 while (std::isspace(p[strlen(p) - 1])) {
310 p[strlen(p) - 1] = '\0';
311 }
312 description = p;
313 break;
314 }
315 }
316 }
317 fclose(f);
318 }
319#elif defined(_WIN32)
320 HKEY hKey;
321 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
322 TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
323 0,
324 KEY_READ,
325 &hKey) == ERROR_SUCCESS) {
326 DWORD cpu_brand_size = 0;
327 if (RegQueryValueExA(hKey,
328 "ProcessorNameString",
329 NULL,
330 NULL,
331 NULL,
332 &cpu_brand_size) == ERROR_SUCCESS) {
333 description.resize(cpu_brand_size);
334 if (RegQueryValueExA(hKey,
335 "ProcessorNameString",
336 NULL,
337 NULL,
338 (LPBYTE)&description[0], // NOLINT
339 &cpu_brand_size) == ERROR_SUCCESS) {
340 if (description.find('\0') != std::string::npos) {
341 description.resize(description.find('\0'));
342 }
343 }
344 }
345 RegCloseKey(hKey);
346 }
347#endif
348 }
349};
350
351static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
352 return "CPU";
353
354 GGML_UNUSED(dev);
355}
356
357static const char * ggml_backend_cpu_device_get_description(ggml_backend_dev_t dev) {
358 struct ggml_backend_cpu_device_context * ctx = (struct ggml_backend_cpu_device_context *)dev->context;
359
360 return ctx->description.c_str();
361}
362
363static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
364#ifdef _WIN32
365 MEMORYSTATUSEX status;
366 status.dwLength = sizeof(status);
367 GlobalMemoryStatusEx(&status);
368 *total = status.ullTotalPhys;
369 *free = status.ullAvailPhys;
370#else
371 long pages = sysconf(_SC_PHYS_PAGES);
372 long page_size = sysconf(_SC_PAGE_SIZE);
373 *total = pages * page_size;
374
375 // "free" system memory is ill-defined, for practical purposes assume that all of it is free:
376 *free = *total;
377#endif // _WIN32
378
379 GGML_UNUSED(dev);
380}
381
382static enum ggml_backend_dev_type ggml_backend_cpu_device_get_type(ggml_backend_dev_t dev) {
383 return GGML_BACKEND_DEVICE_TYPE_CPU;
384
385 GGML_UNUSED(dev);
386}
387
388static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
389 props->name = ggml_backend_cpu_device_get_name(dev);
390 props->description = ggml_backend_cpu_device_get_description(dev);
391 props->type = ggml_backend_cpu_device_get_type(dev);
392 ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
393 props->caps = {
394 /* .async = */ false,
395 /* .host_buffer = */ false,
396 /* .buffer_from_host_ptr = */ true,
397 /* .events = */ false,
398 };
399}
400
401static ggml_backend_t ggml_backend_cpu_device_init_backend(ggml_backend_dev_t dev, const char * params) {
402 return ggml_backend_cpu_init();
403
404 GGML_UNUSED(dev);
405 GGML_UNUSED(params);
406}
407
408static ggml_backend_buffer_type_t ggml_backend_cpu_device_get_buffer_type(ggml_backend_dev_t dev) {
409 return ggml_backend_cpu_buffer_type();
410
411 GGML_UNUSED(dev);
412}
413
414static ggml_backend_buffer_t ggml_backend_cpu_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
415 return ggml_backend_cpu_buffer_from_ptr(ptr, size);
416
417 GGML_UNUSED(dev);
418 GGML_UNUSED(max_tensor_size);
419}
420
421static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
422 const struct ggml_tensor * src0 = op->src[0];
423 const struct ggml_tensor * src1 = op->src[1];
424
425 if (op->op == GGML_OP_NONE || op->op == GGML_OP_RESHAPE || op->op == GGML_OP_VIEW || op->op == GGML_OP_PERMUTE || op->op == GGML_OP_TRANSPOSE) {
426 return true;
427 }
428
429 // check extra buffer types
430 // note: only the first sources are checked for extra buffer types to reduce overhead, increase if necessary
431 for (int i = 0; i < 4; i++) {
432 if (op->src[i] && op->src[i]->buffer &&
433 ggml_backend_cpu_is_extra_buffer_type(op->src[i]->buffer->buft)) {
434 auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
435 return buf_extra->supports_op(dev, op);
436 }
437 }
438
439 switch (op->op) {
440 case GGML_OP_CPY:
441 case GGML_OP_SET_ROWS:
442 return
443 op->type != GGML_TYPE_IQ3_XXS &&
444 op->type != GGML_TYPE_IQ3_S &&
445 op->type != GGML_TYPE_IQ2_XXS &&
446 op->type != GGML_TYPE_IQ2_XS &&
447 op->type != GGML_TYPE_IQ2_S &&
448 op->type != GGML_TYPE_IQ1_S &&
449 op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
450 case GGML_OP_MUL_MAT:
451 return src1->type == GGML_TYPE_F32 || src1->type == ggml_get_type_traits_cpu(src0->type)->vec_dot_type;
452 case GGML_OP_SOFT_MAX_BACK: {
453 if (op->src[0]->type != GGML_TYPE_F32 || op->src[1]->type != GGML_TYPE_F32) {
454 return false;
455 }
456 float max_bias = 0.0f;
457
458 memcpy(&max_bias, (const float *) op->op_params + 1, sizeof(float));
459
460 return max_bias == 0.0f;
461 }
462 case GGML_OP_IM2COL_BACK:
463 return src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32;
464 case GGML_OP_GET_ROWS_BACK:
465 return src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16;
466 case GGML_OP_OUT_PROD:
467 return (src0->type == GGML_TYPE_F32 || (ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
468 src1->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
469 default:
470 return true;
471 }
472}
473
474static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
475 return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_is_extra_buffer_type(buft);
476 GGML_UNUSED(dev);
477}
478
479static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {
480 /* .get_name = */ ggml_backend_cpu_device_get_name,
481 /* .get_description = */ ggml_backend_cpu_device_get_description,
482 /* .get_memory = */ ggml_backend_cpu_device_get_memory,
483 /* .get_type = */ ggml_backend_cpu_device_get_type,
484 /* .get_props = */ ggml_backend_cpu_device_get_props,
485 /* .init_backend = */ ggml_backend_cpu_device_init_backend,
486 /* .get_buffer_type = */ ggml_backend_cpu_device_get_buffer_type,
487 /* .get_host_buffer_type = */ NULL,
488 /* .buffer_from_host_ptr = */ ggml_backend_cpu_device_buffer_from_host_ptr,
489 /* .supports_op = */ ggml_backend_cpu_device_supports_op,
490 /* .supports_buft = */ ggml_backend_cpu_device_supports_buft,
491 /* .offload_op = */ NULL,
492 /* .event_new = */ NULL,
493 /* .event_free = */ NULL,
494 /* .event_synchronize = */ NULL,
495};
496
497// CPU backend - backend (reg)
498
499static const char * ggml_backend_cpu_reg_get_name(ggml_backend_reg_t reg) {
500 return "CPU";
501
502 GGML_UNUSED(reg);
503}
504
505static size_t ggml_backend_cpu_reg_get_device_count(ggml_backend_reg_t reg) {
506 return 1;
507
508 GGML_UNUSED(reg);
509}
510
511static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg, size_t index) {
512 GGML_ASSERT(index == 0);
513
514 static ggml_backend_cpu_device_context ctx;
515 static ggml_backend_device ggml_backend_cpu_device = {
516 /* .iface = */ ggml_backend_cpu_device_i,
517 /* .reg = */ reg,
518 /* .context = */ &ctx,
519 };
520
521 return &ggml_backend_cpu_device;
522}
523
524// This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
525// and additionally to allow other backends to expose their own list of features that applications can query using the same API
526static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
527 static std::vector<ggml_backend_feature> features = []() {
528 ggml_cpu_init();
529
530 std::vector<ggml_backend_feature> features;
531 if (ggml_cpu_has_sse3()) {
532 features.push_back({ "SSE3", "1" });
533 }
534 if (ggml_cpu_has_ssse3()) {
535 features.push_back({ "SSSE3", "1" });
536 }
537 if (ggml_cpu_has_avx()) {
538 features.push_back({ "AVX", "1" });
539 }
540 if (ggml_cpu_has_avx_vnni()) {
541 features.push_back({ "AVX_VNNI", "1" });
542 }
543 if (ggml_cpu_has_avx2()) {
544 features.push_back({ "AVX2", "1" });
545 }
546 if (ggml_cpu_has_f16c()) {
547 features.push_back({ "F16C", "1" });
548 }
549 if (ggml_cpu_has_fma()) {
550 features.push_back({ "FMA", "1" });
551 }
552 if (ggml_cpu_has_bmi2()) {
553 features.push_back({ "BMI2", "1" });
554 }
555 if (ggml_cpu_has_avx512()) {
556 features.push_back({ "AVX512", "1" });
557 }
558 if (ggml_cpu_has_avx512_vbmi()) {
559 features.push_back({ "AVX512_VBMI", "1" });
560 }
561 if (ggml_cpu_has_avx512_vnni()) {
562 features.push_back({ "AVX512_VNNI", "1" });
563 }
564 if (ggml_cpu_has_avx512_bf16()) {
565 features.push_back({ "AVX512_BF16", "1" });
566 }
567 if (ggml_cpu_has_amx_int8()) {
568 features.push_back({ "AMX_INT8", "1" });
569 }
570 if (ggml_cpu_has_neon()) {
571 features.push_back({ "NEON", "1" });
572 }
573 if (ggml_cpu_has_arm_fma()) {
574 features.push_back({ "ARM_FMA", "1" });
575 }
576 if (ggml_cpu_has_fp16_va()) {
577 features.push_back({ "FP16_VA", "1" });
578 }
579 if (ggml_cpu_has_matmul_int8()) {
580 features.push_back({ "MATMUL_INT8", "1" });
581 }
582 if (ggml_cpu_has_sve()) {
583 features.push_back({ "SVE", "1" });
584 }
585 if (ggml_cpu_has_dotprod()) {
586 features.push_back({ "DOTPROD", "1" });
587 }
588 if (ggml_cpu_get_sve_cnt() > 0) {
589 static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
590 features.push_back({ "SVE_CNT", sve_cnt.c_str() });
591 }
592 if (ggml_cpu_has_sme()) {
593 features.push_back({ "SME", "1" });
594 }
595 if (ggml_cpu_has_riscv_v()) {
596 features.push_back({ "RISCV_V", "1" });
597 }
598 if (ggml_cpu_get_rvv_vlen() > 0) {
599 static std::string rvv_vlen = std::to_string(ggml_cpu_get_rvv_vlen());
600 features.push_back({ "RVV_VLEN", rvv_vlen.c_str() });
601 }
602 if (ggml_cpu_has_vsx()) {
603 features.push_back({ "VSX", "1" });
604 }
605 if (ggml_cpu_has_vxe()) {
606 features.push_back({ "VXE", "1" });
607 }
608 if (ggml_cpu_has_wasm_simd()) {
609 features.push_back({ "WASM_SIMD", "1" });
610 }
611 if (ggml_cpu_has_llamafile()) {
612 features.push_back({ "LLAMAFILE", "1" });
613 }
614 #ifdef GGML_USE_ACCELERATE
615 features.push_back({ "ACCELERATE", "1" });
616 #endif
617 #ifdef GGML_USE_CPU_HBM
618 features.push_back({ "CPU_HBM", "1" });
619 #endif
620 #ifdef GGML_USE_OPENMP
621 features.push_back({ "OPENMP", "1" });
622 #endif
623 #ifdef GGML_USE_CPU_KLEIDIAI
624 features.push_back({ "KLEIDIAI", "1" });
625 #endif
626 #ifdef GGML_USE_CPU_REPACK
627 features.push_back({ "REPACK", "1" });
628 #endif
629
630 features.push_back({ nullptr, nullptr });
631
632 return features;
633 }();
634
635 return features.data();
636
637 GGML_UNUSED(reg);
638}
639
640static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
641 if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
642 ggml_backend_set_n_threads_t fct = ggml_backend_cpu_set_n_threads;
643 return (void *)fct;
644 }
645 if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
646 ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_device_get_extra_buffers_type;
647 return (void *)fct;
648 }
649 if (strcmp(name, "ggml_backend_get_features") == 0) {
650 return (void *)ggml_backend_cpu_get_features;
651 }
652 if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
653 return (void *)ggml_backend_cpu_set_abort_callback;
654 }
655 if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
656 return (void *)ggml_numa_init;
657 }
658 if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
659 return (void *)ggml_is_numa;
660 }
661 if (strcmp(name, "ggml_backend_cpu_set_use_ref") == 0) {
662 return (void *)ggml_backend_cpu_set_use_ref;
663 }
664
665 // threadpool - TODO: move to ggml-base
666 if (strcmp(name, "ggml_threadpool_new") == 0) {
667 return (void *)ggml_threadpool_new;
668 }
669 if (strcmp(name, "ggml_threadpool_free") == 0) {
670 return (void *)ggml_threadpool_free;
671 }
672 if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
673 return (void *)ggml_backend_cpu_set_threadpool;
674 }
675
676 return NULL;
677
678 GGML_UNUSED(reg);
679}
680
681static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
682 /* .get_name = */ ggml_backend_cpu_reg_get_name,
683 /* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
684 /* .get_device = */ ggml_backend_cpu_reg_get_device,
685 /* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
686};
687
688ggml_backend_reg_t ggml_backend_cpu_reg(void) {
689 // init CPU feature detection
690 ggml_cpu_init();
691
692 static struct ggml_backend_reg ggml_backend_cpu_reg = {
693 /* .api_version = */ GGML_BACKEND_API_VERSION,
694 /* .iface = */ ggml_backend_cpu_reg_i,
695 /* .context = */ NULL,
696 };
697
698 return &ggml_backend_cpu_reg;
699}
700
701GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)