summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp')
-rw-r--r--llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp701
1 files changed, 701 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp b/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp
new file mode 100644
index 0000000..ddf1737
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp
@@ -0,0 +1,701 @@
+#include "ggml-backend.h"
+#include "ggml-backend-impl.h"
+#include "ggml-cpu.h"
+#include "repack.h"
+#include "traits.h"
+#include "ggml-impl.h"
+#include "amx/amx.h"
+
+#include <cctype>
+#include <string>
+#include <vector>
+
+#ifdef GGML_USE_CPU_HBM
+# include "hbm.h"
+#endif
+
+#ifdef GGML_USE_CPU_KLEIDIAI
+# include "kleidiai/kleidiai.h"
+#endif
+
+#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
+# include "spacemit/ime.h"
+#endif
+
+#if defined(_WIN32)
+# define WIN32_LEAN_AND_MEAN
+# ifndef NOMINMAX
+# define NOMINMAX
+# endif
+# include <windows.h>
+#else
+# include <unistd.h>
+#endif
+
+#if defined(__APPLE__)
+# include <sys/sysctl.h>
+# include <sys/types.h>
+#endif
+
+// ggml-backend interface
+
+std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types() {
+ static std::vector<ggml_backend_buffer_type_t> bufts = []() {
+ std::vector<ggml_backend_buffer_type_t> bufts;
+
+#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
+ if (ggml_backend_amx_buffer_type()) {
+ bufts.push_back(ggml_backend_amx_buffer_type());
+ }
+#endif
+
+#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
+ if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
+ bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
+ }
+#endif
+
+#ifdef GGML_USE_CPU_KLEIDIAI
+ if (ggml_backend_cpu_kleidiai_buffer_type()) {
+ bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());
+ }
+#endif
+
+#ifdef GGML_USE_CPU_REPACK
+ if (ggml_backend_cpu_repack_buffer_type()) {
+ bufts.push_back(ggml_backend_cpu_repack_buffer_type());
+ }
+#endif
+
+ return bufts;
+ }();
+
+ return bufts;
+}
+
+static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
+ static std::vector<ggml_backend_buffer_type_t> extra_bufts = [] {
+ std::vector<ggml_backend_buffer_type_t> bufts = ggml_backend_cpu_get_extra_buffer_types();
+ bufts.push_back(nullptr);
+ return bufts;
+ }();
+
+ return extra_bufts.data();
+
+ GGML_UNUSED(device);
+}
+
+static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
+ for (auto * extra : ggml_backend_cpu_get_extra_buffer_types()) {
+ if (extra == buft) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// CPU backend - backend (stream)
+
+struct ggml_backend_cpu_context {
+ int n_threads;
+ ggml_threadpool_t threadpool;
+
+ uint8_t * work_data;
+ size_t work_size;
+
+ ggml_abort_callback abort_callback;
+ void * abort_callback_data;
+
+ bool use_ref; // use reference implementation
+};
+
+static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) {
+ return "CPU";
+
+ GGML_UNUSED(backend);
+}
+
+static void ggml_backend_cpu_free(ggml_backend_t backend) {
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
+ delete[] cpu_ctx->work_data;
+ delete cpu_ctx;
+ delete backend;
+}
+
+struct ggml_backend_plan_cpu {
+ struct ggml_cplan cplan;
+ struct ggml_cgraph cgraph;
+};
+
+static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, const struct ggml_cgraph * cgraph) {
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
+
+ struct ggml_backend_plan_cpu * cpu_plan = new ggml_backend_plan_cpu;
+
+ cpu_plan->cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
+ cpu_plan->cgraph = *cgraph; // FIXME: deep copy
+
+ if (cpu_plan->cplan.work_size > 0) {
+ cpu_plan->cplan.work_data = new uint8_t[cpu_plan->cplan.work_size];
+ if (cpu_plan->cplan.work_data == NULL) {
+ delete cpu_plan;
+ return NULL;
+ }
+ }
+
+ cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
+ cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
+ cpu_plan->cplan.use_ref = cpu_ctx->use_ref;
+
+ return cpu_plan;
+}
+
+static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
+ struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
+
+ delete[] cpu_plan->cplan.work_data;
+ delete cpu_plan;
+
+ GGML_UNUSED(backend);
+}
+
+static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
+ struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
+
+ return ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
+
+ GGML_UNUSED(backend);
+}
+
+static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
+
+ struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
+
+ if (cpu_ctx->work_size < cplan.work_size) {
+ delete[] cpu_ctx->work_data;
+ cpu_ctx->work_data = new uint8_t[cplan.work_size];
+ if (cpu_ctx->work_data == NULL) {
+ cpu_ctx->work_size = 0;
+ return GGML_STATUS_ALLOC_FAILED;
+ }
+ cpu_ctx->work_size = cplan.work_size;
+ }
+ cplan.work_data = (uint8_t *)cpu_ctx->work_data;
+
+ cplan.abort_callback = cpu_ctx->abort_callback;
+ cplan.abort_callback_data = cpu_ctx->abort_callback_data;
+ cplan.use_ref = cpu_ctx->use_ref;
+
+ return ggml_graph_compute(cgraph, &cplan);
+}
+
+static const struct ggml_backend_i ggml_backend_cpu_i = {
+ /* .get_name = */ ggml_backend_cpu_get_name,
+ /* .free = */ ggml_backend_cpu_free,
+ /* .set_tensor_async = */ NULL,
+ /* .get_tensor_async = */ NULL,
+ /* .cpy_tensor_async = */ NULL,
+ /* .synchronize = */ NULL,
+ /* .graph_plan_create = */ ggml_backend_cpu_graph_plan_create,
+ /* .graph_plan_free = */ ggml_backend_cpu_graph_plan_free,
+ /* .graph_plan_update = */ NULL,
+ /* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
+ /* .graph_compute = */ ggml_backend_cpu_graph_compute,
+ /* .event_record = */ NULL,
+ /* .event_wait = */ NULL,
+ /* .graph_optimize = */ NULL,
+};
+
+static ggml_guid_t ggml_backend_cpu_guid(void) {
+ static ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
+ return &guid;
+}
+
+ggml_backend_t ggml_backend_cpu_init(void) {
+ // initialize CPU backend now to avoid slowing the first graph computation
+ ggml_cpu_init();
+
+ struct ggml_backend_cpu_context * ctx = new ggml_backend_cpu_context;
+ if (ctx == NULL) {
+ return NULL;
+ }
+
+ ctx->n_threads = GGML_DEFAULT_N_THREADS;
+ ctx->threadpool = NULL;
+ ctx->work_data = NULL;
+ ctx->work_size = 0;
+ ctx->abort_callback = NULL;
+ ctx->abort_callback_data = NULL;
+ ctx->use_ref = false;
+
+ ggml_backend_t cpu_backend = new ggml_backend {
+ /* .guid = */ ggml_backend_cpu_guid(),
+ /* .iface = */ ggml_backend_cpu_i,
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
+ /* .context = */ ctx,
+ };
+
+ if (cpu_backend == NULL) {
+ delete ctx;
+ return NULL;
+ }
+
+ return cpu_backend;
+}
+
+bool ggml_backend_is_cpu(ggml_backend_t backend) {
+ return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_cpu_guid());
+}
+
+void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
+
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
+ ctx->n_threads = n_threads;
+}
+
+void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) {
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
+
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
+
+ if (ctx->threadpool && ctx->threadpool != threadpool) {
+ // already had a different threadpool, pause/suspend it before switching
+ ggml_threadpool_pause(ctx->threadpool);
+ }
+ ctx->threadpool = threadpool;
+}
+
+void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) {
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
+
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
+ ctx->abort_callback = abort_callback;
+ ctx->abort_callback_data = abort_callback_data;
+}
+
+void ggml_backend_cpu_set_use_ref(ggml_backend_t backend_cpu, bool use_ref) {
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
+
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
+ ctx->use_ref = use_ref;
+}
+
+// CPU backend - device
+
+struct ggml_backend_cpu_device_context {
+ std::string description = "CPU";
+
+ ggml_backend_cpu_device_context() {
+#ifdef __APPLE__
+ size_t len = 0;
+ if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, 0)) {
+ description.resize(len);
+ sysctlbyname("machdep.cpu.brand_string", &description[0], &len, NULL, 0); // NOLINT
+ }
+#elif defined(__linux__)
+ FILE * f = fopen("/proc/cpuinfo", "r");
+ if (f) {
+ char buf[1024];
+ while (fgets(buf, sizeof(buf), f)) {
+ if (strncmp(buf, "model name", 10) == 0) {
+ char * p = strchr(buf, ':');
+ if (p) {
+ p++;
+ while (std::isspace(*p)) {
+ p++;
+ }
+ while (std::isspace(p[strlen(p) - 1])) {
+ p[strlen(p) - 1] = '\0';
+ }
+ description = p;
+ break;
+ }
+ }
+ }
+ fclose(f);
+ }
+#elif defined(_WIN32)
+ HKEY hKey;
+ if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
+ TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
+ 0,
+ KEY_READ,
+ &hKey) == ERROR_SUCCESS) {
+ DWORD cpu_brand_size = 0;
+ if (RegQueryValueExA(hKey,
+ "ProcessorNameString",
+ NULL,
+ NULL,
+ NULL,
+ &cpu_brand_size) == ERROR_SUCCESS) {
+ description.resize(cpu_brand_size);
+ if (RegQueryValueExA(hKey,
+ "ProcessorNameString",
+ NULL,
+ NULL,
+ (LPBYTE)&description[0], // NOLINT
+ &cpu_brand_size) == ERROR_SUCCESS) {
+ if (description.find('\0') != std::string::npos) {
+ description.resize(description.find('\0'));
+ }
+ }
+ }
+ RegCloseKey(hKey);
+ }
+#endif
+ }
+};
+
+static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
+ return "CPU";
+
+ GGML_UNUSED(dev);
+}
+
+static const char * ggml_backend_cpu_device_get_description(ggml_backend_dev_t dev) {
+ struct ggml_backend_cpu_device_context * ctx = (struct ggml_backend_cpu_device_context *)dev->context;
+
+ return ctx->description.c_str();
+}
+
+static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
+#ifdef _WIN32
+ MEMORYSTATUSEX status;
+ status.dwLength = sizeof(status);
+ GlobalMemoryStatusEx(&status);
+ *total = status.ullTotalPhys;
+ *free = status.ullAvailPhys;
+#else
+ long pages = sysconf(_SC_PHYS_PAGES);
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ *total = pages * page_size;
+
+ // "free" system memory is ill-defined, for practical purposes assume that all of it is free:
+ *free = *total;
+#endif // _WIN32
+
+ GGML_UNUSED(dev);
+}
+
+static enum ggml_backend_dev_type ggml_backend_cpu_device_get_type(ggml_backend_dev_t dev) {
+ return GGML_BACKEND_DEVICE_TYPE_CPU;
+
+ GGML_UNUSED(dev);
+}
+
+static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
+ props->name = ggml_backend_cpu_device_get_name(dev);
+ props->description = ggml_backend_cpu_device_get_description(dev);
+ props->type = ggml_backend_cpu_device_get_type(dev);
+ ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
+ props->caps = {
+ /* .async = */ false,
+ /* .host_buffer = */ false,
+ /* .buffer_from_host_ptr = */ true,
+ /* .events = */ false,
+ };
+}
+
+static ggml_backend_t ggml_backend_cpu_device_init_backend(ggml_backend_dev_t dev, const char * params) {
+ return ggml_backend_cpu_init();
+
+ GGML_UNUSED(dev);
+ GGML_UNUSED(params);
+}
+
+static ggml_backend_buffer_type_t ggml_backend_cpu_device_get_buffer_type(ggml_backend_dev_t dev) {
+ return ggml_backend_cpu_buffer_type();
+
+ GGML_UNUSED(dev);
+}
+
+static ggml_backend_buffer_t ggml_backend_cpu_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
+ return ggml_backend_cpu_buffer_from_ptr(ptr, size);
+
+ GGML_UNUSED(dev);
+ GGML_UNUSED(max_tensor_size);
+}
+
+static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
+ const struct ggml_tensor * src0 = op->src[0];
+ const struct ggml_tensor * src1 = op->src[1];
+
+ if (op->op == GGML_OP_NONE || op->op == GGML_OP_RESHAPE || op->op == GGML_OP_VIEW || op->op == GGML_OP_PERMUTE || op->op == GGML_OP_TRANSPOSE) {
+ return true;
+ }
+
+ // check extra buffer types
+ // note: only the first sources are checked for extra buffer types to reduce overhead, increase if necessary
+ for (int i = 0; i < 4; i++) {
+ if (op->src[i] && op->src[i]->buffer &&
+ ggml_backend_cpu_is_extra_buffer_type(op->src[i]->buffer->buft)) {
+ auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
+ return buf_extra->supports_op(dev, op);
+ }
+ }
+
+ switch (op->op) {
+ case GGML_OP_CPY:
+ case GGML_OP_SET_ROWS:
+ return
+ op->type != GGML_TYPE_IQ3_XXS &&
+ op->type != GGML_TYPE_IQ3_S &&
+ op->type != GGML_TYPE_IQ2_XXS &&
+ op->type != GGML_TYPE_IQ2_XS &&
+ op->type != GGML_TYPE_IQ2_S &&
+ op->type != GGML_TYPE_IQ1_S &&
+ op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
+ case GGML_OP_MUL_MAT:
+ return src1->type == GGML_TYPE_F32 || src1->type == ggml_get_type_traits_cpu(src0->type)->vec_dot_type;
+ case GGML_OP_SOFT_MAX_BACK: {
+ if (op->src[0]->type != GGML_TYPE_F32 || op->src[1]->type != GGML_TYPE_F32) {
+ return false;
+ }
+ float max_bias = 0.0f;
+
+ memcpy(&max_bias, (const float *) op->op_params + 1, sizeof(float));
+
+ return max_bias == 0.0f;
+ }
+ case GGML_OP_IM2COL_BACK:
+ return src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32;
+ case GGML_OP_GET_ROWS_BACK:
+ return src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16;
+ case GGML_OP_OUT_PROD:
+ return (src0->type == GGML_TYPE_F32 || (ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
+ src1->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
+ default:
+ return true;
+ }
+}
+
+static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
+ return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_is_extra_buffer_type(buft);
+ GGML_UNUSED(dev);
+}
+
+static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {
+ /* .get_name = */ ggml_backend_cpu_device_get_name,
+ /* .get_description = */ ggml_backend_cpu_device_get_description,
+ /* .get_memory = */ ggml_backend_cpu_device_get_memory,
+ /* .get_type = */ ggml_backend_cpu_device_get_type,
+ /* .get_props = */ ggml_backend_cpu_device_get_props,
+ /* .init_backend = */ ggml_backend_cpu_device_init_backend,
+ /* .get_buffer_type = */ ggml_backend_cpu_device_get_buffer_type,
+ /* .get_host_buffer_type = */ NULL,
+ /* .buffer_from_host_ptr = */ ggml_backend_cpu_device_buffer_from_host_ptr,
+ /* .supports_op = */ ggml_backend_cpu_device_supports_op,
+ /* .supports_buft = */ ggml_backend_cpu_device_supports_buft,
+ /* .offload_op = */ NULL,
+ /* .event_new = */ NULL,
+ /* .event_free = */ NULL,
+ /* .event_synchronize = */ NULL,
+};
+
+// CPU backend - backend (reg)
+
+static const char * ggml_backend_cpu_reg_get_name(ggml_backend_reg_t reg) {
+ return "CPU";
+
+ GGML_UNUSED(reg);
+}
+
+static size_t ggml_backend_cpu_reg_get_device_count(ggml_backend_reg_t reg) {
+ return 1;
+
+ GGML_UNUSED(reg);
+}
+
+static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg, size_t index) {
+ GGML_ASSERT(index == 0);
+
+ static ggml_backend_cpu_device_context ctx;
+ static ggml_backend_device ggml_backend_cpu_device = {
+ /* .iface = */ ggml_backend_cpu_device_i,
+ /* .reg = */ reg,
+ /* .context = */ &ctx,
+ };
+
+ return &ggml_backend_cpu_device;
+}
+
+// This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
+// and additionally to allow other backends to expose their own list of features that applications can query using the same API
+static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
+ static std::vector<ggml_backend_feature> features = []() {
+ ggml_cpu_init();
+
+ std::vector<ggml_backend_feature> features;
+ if (ggml_cpu_has_sse3()) {
+ features.push_back({ "SSE3", "1" });
+ }
+ if (ggml_cpu_has_ssse3()) {
+ features.push_back({ "SSSE3", "1" });
+ }
+ if (ggml_cpu_has_avx()) {
+ features.push_back({ "AVX", "1" });
+ }
+ if (ggml_cpu_has_avx_vnni()) {
+ features.push_back({ "AVX_VNNI", "1" });
+ }
+ if (ggml_cpu_has_avx2()) {
+ features.push_back({ "AVX2", "1" });
+ }
+ if (ggml_cpu_has_f16c()) {
+ features.push_back({ "F16C", "1" });
+ }
+ if (ggml_cpu_has_fma()) {
+ features.push_back({ "FMA", "1" });
+ }
+ if (ggml_cpu_has_bmi2()) {
+ features.push_back({ "BMI2", "1" });
+ }
+ if (ggml_cpu_has_avx512()) {
+ features.push_back({ "AVX512", "1" });
+ }
+ if (ggml_cpu_has_avx512_vbmi()) {
+ features.push_back({ "AVX512_VBMI", "1" });
+ }
+ if (ggml_cpu_has_avx512_vnni()) {
+ features.push_back({ "AVX512_VNNI", "1" });
+ }
+ if (ggml_cpu_has_avx512_bf16()) {
+ features.push_back({ "AVX512_BF16", "1" });
+ }
+ if (ggml_cpu_has_amx_int8()) {
+ features.push_back({ "AMX_INT8", "1" });
+ }
+ if (ggml_cpu_has_neon()) {
+ features.push_back({ "NEON", "1" });
+ }
+ if (ggml_cpu_has_arm_fma()) {
+ features.push_back({ "ARM_FMA", "1" });
+ }
+ if (ggml_cpu_has_fp16_va()) {
+ features.push_back({ "FP16_VA", "1" });
+ }
+ if (ggml_cpu_has_matmul_int8()) {
+ features.push_back({ "MATMUL_INT8", "1" });
+ }
+ if (ggml_cpu_has_sve()) {
+ features.push_back({ "SVE", "1" });
+ }
+ if (ggml_cpu_has_dotprod()) {
+ features.push_back({ "DOTPROD", "1" });
+ }
+ if (ggml_cpu_get_sve_cnt() > 0) {
+ static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
+ features.push_back({ "SVE_CNT", sve_cnt.c_str() });
+ }
+ if (ggml_cpu_has_sme()) {
+ features.push_back({ "SME", "1" });
+ }
+ if (ggml_cpu_has_riscv_v()) {
+ features.push_back({ "RISCV_V", "1" });
+ }
+ if (ggml_cpu_get_rvv_vlen() > 0) {
+ static std::string rvv_vlen = std::to_string(ggml_cpu_get_rvv_vlen());
+ features.push_back({ "RVV_VLEN", rvv_vlen.c_str() });
+ }
+ if (ggml_cpu_has_vsx()) {
+ features.push_back({ "VSX", "1" });
+ }
+ if (ggml_cpu_has_vxe()) {
+ features.push_back({ "VXE", "1" });
+ }
+ if (ggml_cpu_has_wasm_simd()) {
+ features.push_back({ "WASM_SIMD", "1" });
+ }
+ if (ggml_cpu_has_llamafile()) {
+ features.push_back({ "LLAMAFILE", "1" });
+ }
+ #ifdef GGML_USE_ACCELERATE
+ features.push_back({ "ACCELERATE", "1" });
+ #endif
+ #ifdef GGML_USE_CPU_HBM
+ features.push_back({ "CPU_HBM", "1" });
+ #endif
+ #ifdef GGML_USE_OPENMP
+ features.push_back({ "OPENMP", "1" });
+ #endif
+ #ifdef GGML_USE_CPU_KLEIDIAI
+ features.push_back({ "KLEIDIAI", "1" });
+ #endif
+ #ifdef GGML_USE_CPU_REPACK
+ features.push_back({ "REPACK", "1" });
+ #endif
+
+ features.push_back({ nullptr, nullptr });
+
+ return features;
+ }();
+
+ return features.data();
+
+ GGML_UNUSED(reg);
+}
+
+static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
+ if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
+ ggml_backend_set_n_threads_t fct = ggml_backend_cpu_set_n_threads;
+ return (void *)fct;
+ }
+ if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
+ ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_device_get_extra_buffers_type;
+ return (void *)fct;
+ }
+ if (strcmp(name, "ggml_backend_get_features") == 0) {
+ return (void *)ggml_backend_cpu_get_features;
+ }
+ if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
+ return (void *)ggml_backend_cpu_set_abort_callback;
+ }
+ if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
+ return (void *)ggml_numa_init;
+ }
+ if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
+ return (void *)ggml_is_numa;
+ }
+ if (strcmp(name, "ggml_backend_cpu_set_use_ref") == 0) {
+ return (void *)ggml_backend_cpu_set_use_ref;
+ }
+
+ // threadpool - TODO: move to ggml-base
+ if (strcmp(name, "ggml_threadpool_new") == 0) {
+ return (void *)ggml_threadpool_new;
+ }
+ if (strcmp(name, "ggml_threadpool_free") == 0) {
+ return (void *)ggml_threadpool_free;
+ }
+ if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
+ return (void *)ggml_backend_cpu_set_threadpool;
+ }
+
+ return NULL;
+
+ GGML_UNUSED(reg);
+}
+
+static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
+ /* .get_name = */ ggml_backend_cpu_reg_get_name,
+ /* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
+ /* .get_device = */ ggml_backend_cpu_reg_get_device,
+ /* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
+};
+
+ggml_backend_reg_t ggml_backend_cpu_reg(void) {
+ // init CPU feature detection
+ ggml_cpu_init();
+
+ static struct ggml_backend_reg ggml_backend_cpu_reg = {
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
+ /* .iface = */ ggml_backend_cpu_reg_i,
+ /* .context = */ NULL,
+ };
+
+ return &ggml_backend_cpu_reg;
+}
+
+GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)