aboutsummaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-blas
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-blas
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-blas')
-rw-r--r--llama.cpp/ggml/src/ggml-blas/CMakeLists.txt101
-rw-r--r--llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp518
2 files changed, 619 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt b/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt
new file mode 100644
index 0000000..c27dc17
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt
@@ -0,0 +1,101 @@
1if (GGML_STATIC)
2 set(BLA_STATIC ON)
3endif()
4#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
5# set(BLA_SIZEOF_INTEGER 8)
6#endif()
7
8set(BLA_VENDOR ${GGML_BLAS_VENDOR})
9find_package(BLAS)
10
11if (BLAS_FOUND)
12 message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
13
14 ggml_add_backend_library(ggml-blas
15 ggml-blas.cpp
16 )
17
18 if (${GGML_BLAS_VENDOR} MATCHES "Apple")
19 add_compile_definitions(ACCELERATE_NEW_LAPACK)
20 add_compile_definitions(ACCELERATE_LAPACK_ILP64)
21 add_compile_definitions(GGML_BLAS_USE_ACCELERATE)
22 elseif ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
23 # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
24 # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
25 find_package(PkgConfig REQUIRED)
26 if (${GGML_BLAS_VENDOR} MATCHES "Generic")
27 pkg_check_modules(DepBLAS blas)
28 elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
29 # As of openblas v0.3.22, the 64-bit is named openblas64.pc
30 pkg_check_modules(DepBLAS openblas64)
31 if (NOT DepBLAS_FOUND)
32 pkg_check_modules(DepBLAS openblas)
33 endif()
34 elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
35 pkg_check_modules(DepBLAS blis)
36 elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
37 pkg_check_modules(DepBLAS blas-atlas)
38 elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
39 pkg_check_modules(DepBLAS flexiblas_api)
40 elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
41 # all Intel* libraries share the same include path
42 pkg_check_modules(DepBLAS mkl-sdl)
43 elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
44 # this doesn't provide pkg-config
45 # suggest to assign BLAS_INCLUDE_DIRS on your own
46 if ("${NVHPC_VERSION}" STREQUAL "")
47 message(WARNING "Better to set NVHPC_VERSION")
48 else()
49 set(DepBLAS_FOUND ON)
50 set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
51 endif()
52 endif()
53 if (DepBLAS_FOUND)
54 set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
55 else()
56 message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
57 " detected by pkgconfig, trying to find cblas.h from possible paths...")
58 find_path(BLAS_INCLUDE_DIRS
59 NAMES cblas.h
60 HINTS
61 /usr/include
62 /usr/local/include
63 /usr/include/openblas
64 /opt/homebrew/opt/openblas/include
65 /usr/local/opt/openblas/include
66 /usr/include/x86_64-linux-gnu/openblas/include
67 )
68 endif()
69 endif()
70
71 message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
72
73 target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS})
74
75 if ("${GGML_BLAS_VENDOR}" STREQUAL "")
76 message(WARNING "GGML_BLAS_VENDOR is not set; some methods may not link properly.")
77 endif()
78
79 if ("${GGML_BLAS_VENDOR}" MATCHES "Intel" OR ("${BLAS_INCLUDE_DIRS}" MATCHES "mkl" AND "${GGML_BLAS_VENDOR}" MATCHES "Generic"))
80 add_compile_definitions(GGML_BLAS_USE_MKL)
81 endif()
82
83 if ("${GGML_BLAS_VENDOR}" MATCHES "OpenBLAS")
84 add_compile_definitions(GGML_BLAS_USE_OPENBLAS)
85 endif()
86
87 if ("${GGML_BLAS_VENDOR}" MATCHES "FLAME" OR "${GGML_BLAS_VENDOR}" MATCHES "AOCL" OR "${GGML_BLAS_VENDOR}" MATCHES "AOCL_mt")
88 add_compile_definitions(GGML_BLAS_USE_BLIS)
89 endif()
90
91 if ("${GGML_BLAS_VENDOR}" MATCHES "NVPL")
92 add_compile_definitions(GGML_BLAS_USE_NVPL)
93 endif()
94
95 target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES})
96 target_include_directories(ggml-blas SYSTEM PRIVATE ${BLAS_INCLUDE_DIRS})
97else()
98 message(FATAL_ERROR "BLAS not found, please refer to "
99 "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
100 " to set correct GGML_BLAS_VENDOR")
101endif()
diff --git a/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp b/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp
new file mode 100644
index 0000000..2e9ddf2
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp
@@ -0,0 +1,518 @@
1#include "ggml-impl.h"
2#include "ggml-blas.h"
3#include "ggml-backend-impl.h"
4
5#include <future>
6#include <vector>
7#include <cstring>
8
9#if defined(GGML_BLAS_USE_ACCELERATE)
10# include <Accelerate/Accelerate.h>
11#elif defined(GGML_BLAS_USE_MKL)
12# include <mkl.h>
13#elif defined(GGML_BLAS_USE_BLIS)
14# include <blis.h>
15#elif defined(GGML_BLAS_USE_NVPL)
16# include <nvpl_blas.h>
17#else
18# include <cblas.h>
19#endif
20
21struct ggml_backend_blas_context {
22 int n_threads = GGML_DEFAULT_N_THREADS;
23 std::unique_ptr<char[]> work_data;
24 size_t work_size = 0;
25#ifndef GGML_USE_OPENMP
26 std::vector<std::future<void>> tasks;
27#endif
28};
29
30static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
31 const struct ggml_tensor * src0 = dst->src[0];
32 const struct ggml_tensor * src1 = dst->src[1];
33
34 GGML_TENSOR_BINARY_OP_LOCALS
35
36 const enum ggml_type type = src0->type;
37
38 GGML_ASSERT(ne0 == ne01);
39 GGML_ASSERT(ne1 == ne11);
40 GGML_ASSERT(ne2 == ne12);
41 GGML_ASSERT(ne3 == ne13);
42
43 // we don't support permuted src0 or src1
44 GGML_ASSERT(nb00 == ggml_type_size(type));
45 GGML_ASSERT(nb10 == ggml_type_size(src1->type));
46
47 // dst cannot be transposed or permuted
48 GGML_ASSERT(nb0 == sizeof(float));
49 GGML_ASSERT(nb0 <= nb1);
50 GGML_ASSERT(nb1 <= nb2);
51 GGML_ASSERT(nb2 <= nb3);
52
53 // broadcast factors
54 const int64_t r2 = ne12/ne02;
55 const int64_t r3 = ne13/ne03;
56
57 const int64_t ne_plane = ne01*ne00;
58 const size_t desired_wsize = type == GGML_TYPE_F32 ? 0 : ne03*ne02*ne_plane*sizeof(float);
59
60 if (ctx->work_size < desired_wsize) {
61 ctx->work_data.reset(new char[desired_wsize]);
62 ctx->work_size = desired_wsize;
63 }
64 void * wdata = ctx->work_data.get();
65
66 // convert src0 to float
67 if (type != GGML_TYPE_F32) {
68 const auto * type_traits = ggml_get_type_traits(type);
69 ggml_to_float_t const to_float = type_traits->to_float;
70
71 for (int64_t i03 = 0; i03 < ne03; i03++) {
72 for (int64_t i02 = 0; i02 < ne02; i02++) {
73 const void * x = (char *) src0->data + i02*nb02 + i03*nb03;
74 float * const wplane = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane;
75
76 const int min_cols_per_thread = 4096;
77 const int min_rows_per_thread = std::max((int)(min_cols_per_thread/ne00), 1);
78 const int n_threads = std::max(std::min(ctx->n_threads, (int)(ne01/min_rows_per_thread)), 1);
79
80#ifdef GGML_USE_OPENMP
81 #pragma omp parallel for num_threads(n_threads)
82 for (int64_t i01 = 0; i01 < ne01; i01++) {
83 to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
84 }
85#else
86 for (int i = 1; i < n_threads; i++) {
87 const int64_t start = i*ne01/n_threads;
88 const int64_t end = (i + 1)*ne01/n_threads;
89 if (start < end) {
90 ctx->tasks.push_back(std::async(std::launch::async, [=]() {
91 for (int64_t i01 = start; i01 < end; i01++) {
92 to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
93 }
94 }));
95 }
96 }
97 {
98 // reuse the current thread for the first task
99 const int64_t start = 0;
100 const int64_t end = ne01/n_threads;
101 for (int64_t i01 = start; i01 < end; i01++) {
102 to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
103 }
104 }
105#endif
106 }
107 }
108
109#ifndef GGML_USE_OPENMP
110 // wait for all tasks to finish
111 for (auto & task : ctx->tasks) {
112 task.get();
113 }
114 ctx->tasks.clear();
115#endif
116 }
117
118#if defined(GGML_BLAS_USE_OPENBLAS)
119 openblas_set_num_threads(ctx->n_threads);
120#elif defined(GGML_BLAS_USE_BLIS)
121 bli_thread_set_num_threads(ctx->n_threads);
122#elif defined(GGML_BLAS_USE_NVPL)
123 nvpl_blas_set_num_threads(ctx->n_threads);
124#endif
125
126 for (int64_t i13 = 0; i13 < ne13; i13++) {
127 for (int64_t i12 = 0; i12 < ne12; i12++) {
128 const int64_t i03 = i13/r3;
129 const int64_t i02 = i12/r2;
130
131 const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03);
132 const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13);
133 float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
134
135 if (type != GGML_TYPE_F32) {
136 x = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane;
137 }
138
139 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
140 ne1, ne01, ne10,
141 1.0f, y, ne10,
142 x, ne00,
143 0.0f, d, ne01);
144 }
145 }
146}
147
148static void ggml_backend_blas_out_prod(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
149 const struct ggml_tensor * src0 = dst->src[0];
150 const struct ggml_tensor * src1 = dst->src[1];
151
152 GGML_TENSOR_BINARY_OP_LOCALS
153
154 GGML_ASSERT(ne0 == ne00);
155 GGML_ASSERT(ne1 == ne10);
156 GGML_ASSERT(ne2 == ne02);
157 GGML_ASSERT(ne02 == ne12);
158 GGML_ASSERT(ne3 == ne13);
159 GGML_ASSERT(ne03 == ne13);
160
161 // we don't support permuted src0 or src1
162 GGML_ASSERT(nb00 == sizeof(float));
163
164 // dst cannot be transposed or permuted
165 GGML_ASSERT(nb0 == sizeof(float));
166 // GGML_ASSERT(nb0 <= nb1);
167 // GGML_ASSERT(nb1 <= nb2);
168 // GGML_ASSERT(nb2 <= nb3);
169
170 // Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
171 // src0: (k,n)
172 // src1: (k,m)
173 // dst: (m,n)
174 //
175 // Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f)
176 // Also expressed as (major,minor)
177 // a: (m,k): so src1 transposed
178 // b: (k,n): so src0
179 // c: (m,n)
180 //
181 // However, if ggml_is_transposed(src1) is true, then
182 // src1->data already contains a transposed version, so sgemm mustn't
183 // transpose it further.
184
185 int n = src0->ne[0];
186 int k = src0->ne[1];
187 int m = src1->ne[0];
188
189 CBLAS_TRANSPOSE transposeA;
190 int lda;
191
192 if (!ggml_is_transposed(src1)) {
193 transposeA = CblasTrans;
194 lda = m;
195 } else {
196 transposeA = CblasNoTrans;
197 lda = k;
198 }
199
200 float * a = (float *) ((char *) src1->data);
201 float * b = (float *) ((char *) src0->data);
202 float * c = (float *) ((char *) dst->data);
203
204 cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n);
205
206 GGML_UNUSED(ctx);
207}
208
209// backend interface
210
211static const char * ggml_backend_blas_get_name(ggml_backend_t backend) {
212 return "BLAS";
213
214 GGML_UNUSED(backend);
215}
216
217static void ggml_backend_blas_free(ggml_backend_t backend) {
218 ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context;
219 delete ctx;
220 delete backend;
221}
222
223static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
224 ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context;
225
226 for (int i = 0; i < cgraph->n_nodes; i++) {
227 struct ggml_tensor * node = cgraph->nodes[i];
228
229 if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
230 continue;
231 }
232
233 switch (node->op) {
234 case GGML_OP_MUL_MAT:
235 ggml_backend_blas_mul_mat(ctx, node);
236 break;
237
238 case GGML_OP_OUT_PROD:
239 ggml_backend_blas_out_prod(ctx, node);
240 break;
241
242 case GGML_OP_NONE:
243 case GGML_OP_RESHAPE:
244 case GGML_OP_VIEW:
245 case GGML_OP_PERMUTE:
246 case GGML_OP_TRANSPOSE:
247 break;
248
249 default:
250 GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
251 }
252 }
253
254 return GGML_STATUS_SUCCESS;
255
256 GGML_UNUSED(backend);
257}
258
259static struct ggml_backend_i blas_backend_i = {
260 /* .get_name = */ ggml_backend_blas_get_name,
261 /* .free = */ ggml_backend_blas_free,
262 /* .set_tensor_async = */ NULL,
263 /* .get_tensor_async = */ NULL,
264 /* .cpy_tensor_async = */ NULL,
265 /* .synchronize = */ NULL,
266 /* .graph_plan_create = */ NULL,
267 /* .graph_plan_free = */ NULL,
268 /* .graph_plan_update = */ NULL,
269 /* .graph_plan_compute = */ NULL,
270 /* .graph_compute = */ ggml_backend_blas_graph_compute,
271 /* .event_record = */ NULL,
272 /* .event_wait = */ NULL,
273 /* .graph_optimize = */ NULL,
274};
275
276static ggml_guid_t ggml_backend_blas_guid(void) {
277 static ggml_guid guid = { 0x12, 0xa8, 0xae, 0xf4, 0xc0, 0x1e, 0x61, 0x97, 0x8f, 0xeb, 0x33, 0x04, 0xa1, 0x33, 0x51, 0x2d };
278 return &guid;
279}
280
281ggml_backend_t ggml_backend_blas_init(void) {
282 ggml_backend_blas_context * ctx = new ggml_backend_blas_context;
283
284 ggml_backend_t backend = new ggml_backend {
285 /* .guid = */ ggml_backend_blas_guid(),
286 /* .iface = */ blas_backend_i,
287 /* .device = */ ggml_backend_reg_dev_get(ggml_backend_blas_reg(), 0),
288 /* .context = */ ctx,
289 };
290
291#if defined(GGML_BLAS_USE_OPENBLAS) && defined(GGML_USE_OPENMP)
292 if (openblas_get_parallel() != OPENBLAS_OPENMP) {
293 GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but OpenBLAS was compiled without OpenMP support\n", __func__);
294 }
295#endif
296
297#if defined(BLIS_ENABLE_CBLAS) && defined(GGML_USE_OPENMP) && !defined(BLIS_ENABLE_OPENMP)
298 GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but BLIS was compiled without OpenMP support\n", __func__);
299#endif
300
301 return backend;
302}
303
304bool ggml_backend_is_blas(ggml_backend_t backend) {
305 return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_blas_guid());
306}
307
308void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads) {
309 GGML_ASSERT(ggml_backend_is_blas(backend_blas));
310
311 ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context;
312 ctx->n_threads = n_threads;
313}
314
315// device interface
316
317static const char * ggml_backend_blas_device_get_name(ggml_backend_dev_t dev) {
318 return "BLAS";
319
320 GGML_UNUSED(dev);
321}
322
323static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t dev) {
324 #if defined(GGML_BLAS_USE_ACCELERATE)
325 return "Accelerate";
326 #elif defined(GGML_BLAS_USE_MKL)
327 return "MKL";
328 #elif defined(GGML_BLAS_USE_BLIS)
329 return "BLIS";
330 #elif defined(GGML_BLAS_USE_NVPL)
331 return "NVPL";
332 #elif defined(GGML_BLAS_USE_OPENBLAS)
333 return "OpenBLAS";
334 #else
335 return "BLAS";
336 #endif
337
338 GGML_UNUSED(dev);
339}
340
341static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
342 // TODO
343 *free = 0;
344 *total = 0;
345
346 GGML_UNUSED(dev);
347}
348
349static enum ggml_backend_dev_type ggml_backend_blas_device_get_type(ggml_backend_dev_t dev) {
350 return GGML_BACKEND_DEVICE_TYPE_ACCEL;
351
352 GGML_UNUSED(dev);
353}
354
355static void ggml_backend_blas_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
356 props->name = ggml_backend_blas_device_get_name(dev);
357 props->description = ggml_backend_blas_device_get_description(dev);
358 props->type = ggml_backend_blas_device_get_type(dev);
359 ggml_backend_blas_device_get_memory(dev, &props->memory_free, &props->memory_total);
360 props->caps = {
361 /* .async = */ false,
362 /* .host_buffer = */ false,
363 /* .buffer_from_host_ptr = */ true,
364 /* .events = */ false,
365 };
366}
367
368static ggml_backend_t ggml_backend_blas_device_init_backend(ggml_backend_dev_t dev, const char * params) {
369 return ggml_backend_blas_init();
370
371 GGML_UNUSED(dev);
372 GGML_UNUSED(params);
373}
374
375static ggml_backend_buffer_type_t ggml_backend_blas_device_get_buffer_type(ggml_backend_dev_t dev) {
376 return ggml_backend_cpu_buffer_type();
377
378 GGML_UNUSED(dev);
379}
380
381static ggml_backend_buffer_t ggml_backend_blas_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
382 return ggml_backend_cpu_buffer_from_ptr(ptr, size);
383
384 GGML_UNUSED(dev);
385 GGML_UNUSED(max_tensor_size);
386}
387
388static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
389 const struct ggml_tensor * src0 = op->src[0];
390 const struct ggml_tensor * src1 = op->src[1];
391
392 switch (op->op) {
393 case GGML_OP_NONE:
394 case GGML_OP_RESHAPE:
395 case GGML_OP_VIEW:
396 case GGML_OP_PERMUTE:
397 case GGML_OP_TRANSPOSE:
398 return true;
399
400 case GGML_OP_MUL_MAT:
401 {
402 // BLAS usually is only faster for large matrices
403 const struct ggml_tensor * src0 = op->src[0];
404 const struct ggml_tensor * src1 = op->src[1];
405
406 const int64_t ne10 = src1->ne[0];
407
408 const int64_t ne0 = op->ne[0];
409 const int64_t ne1 = op->ne[1];
410
411 // TODO: find the optimal value
412 const int64_t min_batch = 32;
413
414 return ggml_is_contiguous(src0) &&
415 ggml_is_contiguous(src1) &&
416 src1->type == GGML_TYPE_F32 &&
417 (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
418 (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
419 }
420
421 case GGML_OP_OUT_PROD:
422 return op->src[0]->type == GGML_TYPE_F32 &&
423 op->src[1]->type == GGML_TYPE_F32 &&
424 ggml_is_matrix(src0) &&
425 ggml_is_matrix(src1) &&
426 ggml_is_contiguous(src0) &&
427 (ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
428 (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
429
430 default:
431 return false;
432
433 }
434
435 GGML_UNUSED(dev);
436}
437
438static bool ggml_backend_blas_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
439 return ggml_backend_buft_is_host(buft);
440
441 GGML_UNUSED(dev);
442}
443
444static const struct ggml_backend_device_i ggml_backend_blas_device_i = {
445 /* .get_name = */ ggml_backend_blas_device_get_name,
446 /* .get_description = */ ggml_backend_blas_device_get_description,
447 /* .get_memory = */ ggml_backend_blas_device_get_memory,
448 /* .get_type = */ ggml_backend_blas_device_get_type,
449 /* .get_props = */ ggml_backend_blas_device_get_props,
450 /* .init_backend = */ ggml_backend_blas_device_init_backend,
451 /* .get_buffer_type = */ ggml_backend_blas_device_get_buffer_type,
452 /* .get_host_buffer_type = */ NULL,
453 /* .buffer_from_host_ptr = */ ggml_backend_blas_device_buffer_from_host_ptr,
454 /* .supports_op = */ ggml_backend_blas_device_supports_op,
455 /* .supports_buft = */ ggml_backend_blas_device_supports_buft,
456 /* .offload_op = */ NULL,
457 /* .event_new = */ NULL,
458 /* .event_free = */ NULL,
459 /* .event_synchronize = */ NULL,
460};
461
462// backend reg interface
463
464static const char * ggml_backend_blas_reg_get_name(ggml_backend_reg_t reg) {
465 return "BLAS";
466
467 GGML_UNUSED(reg);
468}
469
470static size_t ggml_backend_blas_reg_get_device_count(ggml_backend_reg_t reg) {
471 return 1;
472
473 GGML_UNUSED(reg);
474}
475
476static ggml_backend_dev_t ggml_backend_blas_reg_get_device(ggml_backend_reg_t reg, size_t index) {
477 GGML_ASSERT(index == 0);
478
479 static ggml_backend_device ggml_backend_blas_device = {
480 /* .iface = */ ggml_backend_blas_device_i,
481 /* .reg = */ reg,
482 /* .context = */ nullptr,
483 };
484
485 return &ggml_backend_blas_device;
486
487 GGML_UNUSED(reg);
488 GGML_UNUSED(index);
489}
490
491static void * ggml_backend_blas_get_proc_address(ggml_backend_reg_t reg, const char * name) {
492 if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) {
493 return (void *)ggml_backend_blas_set_n_threads;
494 }
495 return NULL;
496
497 GGML_UNUSED(reg);
498 GGML_UNUSED(name);
499}
500
501static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
502 /* .get_name = */ ggml_backend_blas_reg_get_name,
503 /* .get_device_count = */ ggml_backend_blas_reg_get_device_count,
504 /* .get_device = */ ggml_backend_blas_reg_get_device,
505 /* .get_proc_address = */ ggml_backend_blas_get_proc_address,
506};
507
508ggml_backend_reg_t ggml_backend_blas_reg(void) {
509 static struct ggml_backend_reg ggml_backend_blas_reg = {
510 /* .api_version = */ GGML_BACKEND_API_VERSION,
511 /* .iface = */ ggml_backend_blas_reg_i,
512 /* .context = */ NULL,
513 };
514
515 return &ggml_backend_blas_reg;
516}
517
518GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)