diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
| commit | b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch) | |
| tree | 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-blas | |
| download | llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz | |
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-blas')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-blas/CMakeLists.txt | 101 | ||||
| -rw-r--r-- | llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp | 518 |
2 files changed, 619 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt b/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt new file mode 100644 index 0000000..c27dc17 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt | |||
| @@ -0,0 +1,101 @@ | |||
| 1 | if (GGML_STATIC) | ||
| 2 | set(BLA_STATIC ON) | ||
| 3 | endif() | ||
| 4 | #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22) | ||
| 5 | # set(BLA_SIZEOF_INTEGER 8) | ||
| 6 | #endif() | ||
| 7 | |||
| 8 | set(BLA_VENDOR ${GGML_BLAS_VENDOR}) | ||
| 9 | find_package(BLAS) | ||
| 10 | |||
| 11 | if (BLAS_FOUND) | ||
| 12 | message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") | ||
| 13 | |||
| 14 | ggml_add_backend_library(ggml-blas | ||
| 15 | ggml-blas.cpp | ||
| 16 | ) | ||
| 17 | |||
| 18 | if (${GGML_BLAS_VENDOR} MATCHES "Apple") | ||
| 19 | add_compile_definitions(ACCELERATE_NEW_LAPACK) | ||
| 20 | add_compile_definitions(ACCELERATE_LAPACK_ILP64) | ||
| 21 | add_compile_definitions(GGML_BLAS_USE_ACCELERATE) | ||
| 22 | elseif ("${BLAS_INCLUDE_DIRS}" STREQUAL "") | ||
| 23 | # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake. | ||
| 24 | # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268 | ||
| 25 | find_package(PkgConfig REQUIRED) | ||
| 26 | if (${GGML_BLAS_VENDOR} MATCHES "Generic") | ||
| 27 | pkg_check_modules(DepBLAS blas) | ||
| 28 | elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS") | ||
| 29 | # As of openblas v0.3.22, the 64-bit is named openblas64.pc | ||
| 30 | pkg_check_modules(DepBLAS openblas64) | ||
| 31 | if (NOT DepBLAS_FOUND) | ||
| 32 | pkg_check_modules(DepBLAS openblas) | ||
| 33 | endif() | ||
| 34 | elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME") | ||
| 35 | pkg_check_modules(DepBLAS blis) | ||
| 36 | elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS") | ||
| 37 | pkg_check_modules(DepBLAS blas-atlas) | ||
| 38 | elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS") | ||
| 39 | pkg_check_modules(DepBLAS flexiblas_api) | ||
| 40 | elseif (${GGML_BLAS_VENDOR} MATCHES "Intel") | ||
| 41 | # all Intel* libraries share the same include path | ||
| 42 | pkg_check_modules(DepBLAS mkl-sdl) | ||
| 43 | elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC") | ||
| 44 | # this doesn't provide pkg-config | ||
| 45 | # suggest to assign BLAS_INCLUDE_DIRS on your own | ||
| 46 | if ("${NVHPC_VERSION}" STREQUAL "") | ||
| 47 | message(WARNING "Better to set NVHPC_VERSION") | ||
| 48 | else() | ||
| 49 | set(DepBLAS_FOUND ON) | ||
| 50 | set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include") | ||
| 51 | endif() | ||
| 52 | endif() | ||
| 53 | if (DepBLAS_FOUND) | ||
| 54 | set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS}) | ||
| 55 | else() | ||
| 56 | message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically" | ||
| 57 | " detected by pkgconfig, trying to find cblas.h from possible paths...") | ||
| 58 | find_path(BLAS_INCLUDE_DIRS | ||
| 59 | NAMES cblas.h | ||
| 60 | HINTS | ||
| 61 | /usr/include | ||
| 62 | /usr/local/include | ||
| 63 | /usr/include/openblas | ||
| 64 | /opt/homebrew/opt/openblas/include | ||
| 65 | /usr/local/opt/openblas/include | ||
| 66 | /usr/include/x86_64-linux-gnu/openblas/include | ||
| 67 | ) | ||
| 68 | endif() | ||
| 69 | endif() | ||
| 70 | |||
| 71 | message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") | ||
| 72 | |||
| 73 | target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS}) | ||
| 74 | |||
| 75 | if ("${GGML_BLAS_VENDOR}" STREQUAL "") | ||
| 76 | message(WARNING "GGML_BLAS_VENDOR is not set; some methods may not link properly.") | ||
| 77 | endif() | ||
| 78 | |||
| 79 | if ("${GGML_BLAS_VENDOR}" MATCHES "Intel" OR ("${BLAS_INCLUDE_DIRS}" MATCHES "mkl" AND "${GGML_BLAS_VENDOR}" MATCHES "Generic")) | ||
| 80 | add_compile_definitions(GGML_BLAS_USE_MKL) | ||
| 81 | endif() | ||
| 82 | |||
| 83 | if ("${GGML_BLAS_VENDOR}" MATCHES "OpenBLAS") | ||
| 84 | add_compile_definitions(GGML_BLAS_USE_OPENBLAS) | ||
| 85 | endif() | ||
| 86 | |||
| 87 | if ("${GGML_BLAS_VENDOR}" MATCHES "FLAME" OR "${GGML_BLAS_VENDOR}" MATCHES "AOCL" OR "${GGML_BLAS_VENDOR}" MATCHES "AOCL_mt") | ||
| 88 | add_compile_definitions(GGML_BLAS_USE_BLIS) | ||
| 89 | endif() | ||
| 90 | |||
| 91 | if ("${GGML_BLAS_VENDOR}" MATCHES "NVPL") | ||
| 92 | add_compile_definitions(GGML_BLAS_USE_NVPL) | ||
| 93 | endif() | ||
| 94 | |||
| 95 | target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES}) | ||
| 96 | target_include_directories(ggml-blas SYSTEM PRIVATE ${BLAS_INCLUDE_DIRS}) | ||
| 97 | else() | ||
| 98 | message(FATAL_ERROR "BLAS not found, please refer to " | ||
| 99 | "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" | ||
| 100 | " to set correct GGML_BLAS_VENDOR") | ||
| 101 | endif() | ||
diff --git a/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp b/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp new file mode 100644 index 0000000..2e9ddf2 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp | |||
| @@ -0,0 +1,518 @@ | |||
| 1 | #include "ggml-impl.h" | ||
| 2 | #include "ggml-blas.h" | ||
| 3 | #include "ggml-backend-impl.h" | ||
| 4 | |||
| 5 | #include <future> | ||
| 6 | #include <vector> | ||
| 7 | #include <cstring> | ||
| 8 | |||
| 9 | #if defined(GGML_BLAS_USE_ACCELERATE) | ||
| 10 | # include <Accelerate/Accelerate.h> | ||
| 11 | #elif defined(GGML_BLAS_USE_MKL) | ||
| 12 | # include <mkl.h> | ||
| 13 | #elif defined(GGML_BLAS_USE_BLIS) | ||
| 14 | # include <blis.h> | ||
| 15 | #elif defined(GGML_BLAS_USE_NVPL) | ||
| 16 | # include <nvpl_blas.h> | ||
| 17 | #else | ||
| 18 | # include <cblas.h> | ||
| 19 | #endif | ||
| 20 | |||
| 21 | struct ggml_backend_blas_context { | ||
| 22 | int n_threads = GGML_DEFAULT_N_THREADS; | ||
| 23 | std::unique_ptr<char[]> work_data; | ||
| 24 | size_t work_size = 0; | ||
| 25 | #ifndef GGML_USE_OPENMP | ||
| 26 | std::vector<std::future<void>> tasks; | ||
| 27 | #endif | ||
| 28 | }; | ||
| 29 | |||
| 30 | static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) { | ||
| 31 | const struct ggml_tensor * src0 = dst->src[0]; | ||
| 32 | const struct ggml_tensor * src1 = dst->src[1]; | ||
| 33 | |||
| 34 | GGML_TENSOR_BINARY_OP_LOCALS | ||
| 35 | |||
| 36 | const enum ggml_type type = src0->type; | ||
| 37 | |||
| 38 | GGML_ASSERT(ne0 == ne01); | ||
| 39 | GGML_ASSERT(ne1 == ne11); | ||
| 40 | GGML_ASSERT(ne2 == ne12); | ||
| 41 | GGML_ASSERT(ne3 == ne13); | ||
| 42 | |||
| 43 | // we don't support permuted src0 or src1 | ||
| 44 | GGML_ASSERT(nb00 == ggml_type_size(type)); | ||
| 45 | GGML_ASSERT(nb10 == ggml_type_size(src1->type)); | ||
| 46 | |||
| 47 | // dst cannot be transposed or permuted | ||
| 48 | GGML_ASSERT(nb0 == sizeof(float)); | ||
| 49 | GGML_ASSERT(nb0 <= nb1); | ||
| 50 | GGML_ASSERT(nb1 <= nb2); | ||
| 51 | GGML_ASSERT(nb2 <= nb3); | ||
| 52 | |||
| 53 | // broadcast factors | ||
| 54 | const int64_t r2 = ne12/ne02; | ||
| 55 | const int64_t r3 = ne13/ne03; | ||
| 56 | |||
| 57 | const int64_t ne_plane = ne01*ne00; | ||
| 58 | const size_t desired_wsize = type == GGML_TYPE_F32 ? 0 : ne03*ne02*ne_plane*sizeof(float); | ||
| 59 | |||
| 60 | if (ctx->work_size < desired_wsize) { | ||
| 61 | ctx->work_data.reset(new char[desired_wsize]); | ||
| 62 | ctx->work_size = desired_wsize; | ||
| 63 | } | ||
| 64 | void * wdata = ctx->work_data.get(); | ||
| 65 | |||
| 66 | // convert src0 to float | ||
| 67 | if (type != GGML_TYPE_F32) { | ||
| 68 | const auto * type_traits = ggml_get_type_traits(type); | ||
| 69 | ggml_to_float_t const to_float = type_traits->to_float; | ||
| 70 | |||
| 71 | for (int64_t i03 = 0; i03 < ne03; i03++) { | ||
| 72 | for (int64_t i02 = 0; i02 < ne02; i02++) { | ||
| 73 | const void * x = (char *) src0->data + i02*nb02 + i03*nb03; | ||
| 74 | float * const wplane = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane; | ||
| 75 | |||
| 76 | const int min_cols_per_thread = 4096; | ||
| 77 | const int min_rows_per_thread = std::max((int)(min_cols_per_thread/ne00), 1); | ||
| 78 | const int n_threads = std::max(std::min(ctx->n_threads, (int)(ne01/min_rows_per_thread)), 1); | ||
| 79 | |||
| 80 | #ifdef GGML_USE_OPENMP | ||
| 81 | #pragma omp parallel for num_threads(n_threads) | ||
| 82 | for (int64_t i01 = 0; i01 < ne01; i01++) { | ||
| 83 | to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00); | ||
| 84 | } | ||
| 85 | #else | ||
| 86 | for (int i = 1; i < n_threads; i++) { | ||
| 87 | const int64_t start = i*ne01/n_threads; | ||
| 88 | const int64_t end = (i + 1)*ne01/n_threads; | ||
| 89 | if (start < end) { | ||
| 90 | ctx->tasks.push_back(std::async(std::launch::async, [=]() { | ||
| 91 | for (int64_t i01 = start; i01 < end; i01++) { | ||
| 92 | to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00); | ||
| 93 | } | ||
| 94 | })); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | { | ||
| 98 | // reuse the current thread for the first task | ||
| 99 | const int64_t start = 0; | ||
| 100 | const int64_t end = ne01/n_threads; | ||
| 101 | for (int64_t i01 = start; i01 < end; i01++) { | ||
| 102 | to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | #endif | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | #ifndef GGML_USE_OPENMP | ||
| 110 | // wait for all tasks to finish | ||
| 111 | for (auto & task : ctx->tasks) { | ||
| 112 | task.get(); | ||
| 113 | } | ||
| 114 | ctx->tasks.clear(); | ||
| 115 | #endif | ||
| 116 | } | ||
| 117 | |||
| 118 | #if defined(GGML_BLAS_USE_OPENBLAS) | ||
| 119 | openblas_set_num_threads(ctx->n_threads); | ||
| 120 | #elif defined(GGML_BLAS_USE_BLIS) | ||
| 121 | bli_thread_set_num_threads(ctx->n_threads); | ||
| 122 | #elif defined(GGML_BLAS_USE_NVPL) | ||
| 123 | nvpl_blas_set_num_threads(ctx->n_threads); | ||
| 124 | #endif | ||
| 125 | |||
| 126 | for (int64_t i13 = 0; i13 < ne13; i13++) { | ||
| 127 | for (int64_t i12 = 0; i12 < ne12; i12++) { | ||
| 128 | const int64_t i03 = i13/r3; | ||
| 129 | const int64_t i02 = i12/r2; | ||
| 130 | |||
| 131 | const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03); | ||
| 132 | const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13); | ||
| 133 | float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3); | ||
| 134 | |||
| 135 | if (type != GGML_TYPE_F32) { | ||
| 136 | x = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane; | ||
| 137 | } | ||
| 138 | |||
| 139 | cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, | ||
| 140 | ne1, ne01, ne10, | ||
| 141 | 1.0f, y, ne10, | ||
| 142 | x, ne00, | ||
| 143 | 0.0f, d, ne01); | ||
| 144 | } | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | static void ggml_backend_blas_out_prod(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) { | ||
| 149 | const struct ggml_tensor * src0 = dst->src[0]; | ||
| 150 | const struct ggml_tensor * src1 = dst->src[1]; | ||
| 151 | |||
| 152 | GGML_TENSOR_BINARY_OP_LOCALS | ||
| 153 | |||
| 154 | GGML_ASSERT(ne0 == ne00); | ||
| 155 | GGML_ASSERT(ne1 == ne10); | ||
| 156 | GGML_ASSERT(ne2 == ne02); | ||
| 157 | GGML_ASSERT(ne02 == ne12); | ||
| 158 | GGML_ASSERT(ne3 == ne13); | ||
| 159 | GGML_ASSERT(ne03 == ne13); | ||
| 160 | |||
| 161 | // we don't support permuted src0 or src1 | ||
| 162 | GGML_ASSERT(nb00 == sizeof(float)); | ||
| 163 | |||
| 164 | // dst cannot be transposed or permuted | ||
| 165 | GGML_ASSERT(nb0 == sizeof(float)); | ||
| 166 | // GGML_ASSERT(nb0 <= nb1); | ||
| 167 | // GGML_ASSERT(nb1 <= nb2); | ||
| 168 | // GGML_ASSERT(nb2 <= nb3); | ||
| 169 | |||
| 170 | // Arguments to ggml_compute_forward_out_prod (expressed as major,minor) | ||
| 171 | // src0: (k,n) | ||
| 172 | // src1: (k,m) | ||
| 173 | // dst: (m,n) | ||
| 174 | // | ||
| 175 | // Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f) | ||
| 176 | // Also expressed as (major,minor) | ||
| 177 | // a: (m,k): so src1 transposed | ||
| 178 | // b: (k,n): so src0 | ||
| 179 | // c: (m,n) | ||
| 180 | // | ||
| 181 | // However, if ggml_is_transposed(src1) is true, then | ||
| 182 | // src1->data already contains a transposed version, so sgemm mustn't | ||
| 183 | // transpose it further. | ||
| 184 | |||
| 185 | int n = src0->ne[0]; | ||
| 186 | int k = src0->ne[1]; | ||
| 187 | int m = src1->ne[0]; | ||
| 188 | |||
| 189 | CBLAS_TRANSPOSE transposeA; | ||
| 190 | int lda; | ||
| 191 | |||
| 192 | if (!ggml_is_transposed(src1)) { | ||
| 193 | transposeA = CblasTrans; | ||
| 194 | lda = m; | ||
| 195 | } else { | ||
| 196 | transposeA = CblasNoTrans; | ||
| 197 | lda = k; | ||
| 198 | } | ||
| 199 | |||
| 200 | float * a = (float *) ((char *) src1->data); | ||
| 201 | float * b = (float *) ((char *) src0->data); | ||
| 202 | float * c = (float *) ((char *) dst->data); | ||
| 203 | |||
| 204 | cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n); | ||
| 205 | |||
| 206 | GGML_UNUSED(ctx); | ||
| 207 | } | ||
| 208 | |||
| 209 | // backend interface | ||
| 210 | |||
| 211 | static const char * ggml_backend_blas_get_name(ggml_backend_t backend) { | ||
| 212 | return "BLAS"; | ||
| 213 | |||
| 214 | GGML_UNUSED(backend); | ||
| 215 | } | ||
| 216 | |||
| 217 | static void ggml_backend_blas_free(ggml_backend_t backend) { | ||
| 218 | ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context; | ||
| 219 | delete ctx; | ||
| 220 | delete backend; | ||
| 221 | } | ||
| 222 | |||
| 223 | static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { | ||
| 224 | ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context; | ||
| 225 | |||
| 226 | for (int i = 0; i < cgraph->n_nodes; i++) { | ||
| 227 | struct ggml_tensor * node = cgraph->nodes[i]; | ||
| 228 | |||
| 229 | if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) { | ||
| 230 | continue; | ||
| 231 | } | ||
| 232 | |||
| 233 | switch (node->op) { | ||
| 234 | case GGML_OP_MUL_MAT: | ||
| 235 | ggml_backend_blas_mul_mat(ctx, node); | ||
| 236 | break; | ||
| 237 | |||
| 238 | case GGML_OP_OUT_PROD: | ||
| 239 | ggml_backend_blas_out_prod(ctx, node); | ||
| 240 | break; | ||
| 241 | |||
| 242 | case GGML_OP_NONE: | ||
| 243 | case GGML_OP_RESHAPE: | ||
| 244 | case GGML_OP_VIEW: | ||
| 245 | case GGML_OP_PERMUTE: | ||
| 246 | case GGML_OP_TRANSPOSE: | ||
| 247 | break; | ||
| 248 | |||
| 249 | default: | ||
| 250 | GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node)); | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | return GGML_STATUS_SUCCESS; | ||
| 255 | |||
| 256 | GGML_UNUSED(backend); | ||
| 257 | } | ||
| 258 | |||
| 259 | static struct ggml_backend_i blas_backend_i = { | ||
| 260 | /* .get_name = */ ggml_backend_blas_get_name, | ||
| 261 | /* .free = */ ggml_backend_blas_free, | ||
| 262 | /* .set_tensor_async = */ NULL, | ||
| 263 | /* .get_tensor_async = */ NULL, | ||
| 264 | /* .cpy_tensor_async = */ NULL, | ||
| 265 | /* .synchronize = */ NULL, | ||
| 266 | /* .graph_plan_create = */ NULL, | ||
| 267 | /* .graph_plan_free = */ NULL, | ||
| 268 | /* .graph_plan_update = */ NULL, | ||
| 269 | /* .graph_plan_compute = */ NULL, | ||
| 270 | /* .graph_compute = */ ggml_backend_blas_graph_compute, | ||
| 271 | /* .event_record = */ NULL, | ||
| 272 | /* .event_wait = */ NULL, | ||
| 273 | /* .graph_optimize = */ NULL, | ||
| 274 | }; | ||
| 275 | |||
| 276 | static ggml_guid_t ggml_backend_blas_guid(void) { | ||
| 277 | static ggml_guid guid = { 0x12, 0xa8, 0xae, 0xf4, 0xc0, 0x1e, 0x61, 0x97, 0x8f, 0xeb, 0x33, 0x04, 0xa1, 0x33, 0x51, 0x2d }; | ||
| 278 | return &guid; | ||
| 279 | } | ||
| 280 | |||
| 281 | ggml_backend_t ggml_backend_blas_init(void) { | ||
| 282 | ggml_backend_blas_context * ctx = new ggml_backend_blas_context; | ||
| 283 | |||
| 284 | ggml_backend_t backend = new ggml_backend { | ||
| 285 | /* .guid = */ ggml_backend_blas_guid(), | ||
| 286 | /* .iface = */ blas_backend_i, | ||
| 287 | /* .device = */ ggml_backend_reg_dev_get(ggml_backend_blas_reg(), 0), | ||
| 288 | /* .context = */ ctx, | ||
| 289 | }; | ||
| 290 | |||
| 291 | #if defined(GGML_BLAS_USE_OPENBLAS) && defined(GGML_USE_OPENMP) | ||
| 292 | if (openblas_get_parallel() != OPENBLAS_OPENMP) { | ||
| 293 | GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but OpenBLAS was compiled without OpenMP support\n", __func__); | ||
| 294 | } | ||
| 295 | #endif | ||
| 296 | |||
| 297 | #if defined(BLIS_ENABLE_CBLAS) && defined(GGML_USE_OPENMP) && !defined(BLIS_ENABLE_OPENMP) | ||
| 298 | GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but BLIS was compiled without OpenMP support\n", __func__); | ||
| 299 | #endif | ||
| 300 | |||
| 301 | return backend; | ||
| 302 | } | ||
| 303 | |||
| 304 | bool ggml_backend_is_blas(ggml_backend_t backend) { | ||
| 305 | return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_blas_guid()); | ||
| 306 | } | ||
| 307 | |||
| 308 | void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads) { | ||
| 309 | GGML_ASSERT(ggml_backend_is_blas(backend_blas)); | ||
| 310 | |||
| 311 | ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context; | ||
| 312 | ctx->n_threads = n_threads; | ||
| 313 | } | ||
| 314 | |||
| 315 | // device interface | ||
| 316 | |||
| 317 | static const char * ggml_backend_blas_device_get_name(ggml_backend_dev_t dev) { | ||
| 318 | return "BLAS"; | ||
| 319 | |||
| 320 | GGML_UNUSED(dev); | ||
| 321 | } | ||
| 322 | |||
| 323 | static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t dev) { | ||
| 324 | #if defined(GGML_BLAS_USE_ACCELERATE) | ||
| 325 | return "Accelerate"; | ||
| 326 | #elif defined(GGML_BLAS_USE_MKL) | ||
| 327 | return "MKL"; | ||
| 328 | #elif defined(GGML_BLAS_USE_BLIS) | ||
| 329 | return "BLIS"; | ||
| 330 | #elif defined(GGML_BLAS_USE_NVPL) | ||
| 331 | return "NVPL"; | ||
| 332 | #elif defined(GGML_BLAS_USE_OPENBLAS) | ||
| 333 | return "OpenBLAS"; | ||
| 334 | #else | ||
| 335 | return "BLAS"; | ||
| 336 | #endif | ||
| 337 | |||
| 338 | GGML_UNUSED(dev); | ||
| 339 | } | ||
| 340 | |||
| 341 | static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { | ||
| 342 | // TODO | ||
| 343 | *free = 0; | ||
| 344 | *total = 0; | ||
| 345 | |||
| 346 | GGML_UNUSED(dev); | ||
| 347 | } | ||
| 348 | |||
| 349 | static enum ggml_backend_dev_type ggml_backend_blas_device_get_type(ggml_backend_dev_t dev) { | ||
| 350 | return GGML_BACKEND_DEVICE_TYPE_ACCEL; | ||
| 351 | |||
| 352 | GGML_UNUSED(dev); | ||
| 353 | } | ||
| 354 | |||
| 355 | static void ggml_backend_blas_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { | ||
| 356 | props->name = ggml_backend_blas_device_get_name(dev); | ||
| 357 | props->description = ggml_backend_blas_device_get_description(dev); | ||
| 358 | props->type = ggml_backend_blas_device_get_type(dev); | ||
| 359 | ggml_backend_blas_device_get_memory(dev, &props->memory_free, &props->memory_total); | ||
| 360 | props->caps = { | ||
| 361 | /* .async = */ false, | ||
| 362 | /* .host_buffer = */ false, | ||
| 363 | /* .buffer_from_host_ptr = */ true, | ||
| 364 | /* .events = */ false, | ||
| 365 | }; | ||
| 366 | } | ||
| 367 | |||
| 368 | static ggml_backend_t ggml_backend_blas_device_init_backend(ggml_backend_dev_t dev, const char * params) { | ||
| 369 | return ggml_backend_blas_init(); | ||
| 370 | |||
| 371 | GGML_UNUSED(dev); | ||
| 372 | GGML_UNUSED(params); | ||
| 373 | } | ||
| 374 | |||
| 375 | static ggml_backend_buffer_type_t ggml_backend_blas_device_get_buffer_type(ggml_backend_dev_t dev) { | ||
| 376 | return ggml_backend_cpu_buffer_type(); | ||
| 377 | |||
| 378 | GGML_UNUSED(dev); | ||
| 379 | } | ||
| 380 | |||
| 381 | static ggml_backend_buffer_t ggml_backend_blas_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) { | ||
| 382 | return ggml_backend_cpu_buffer_from_ptr(ptr, size); | ||
| 383 | |||
| 384 | GGML_UNUSED(dev); | ||
| 385 | GGML_UNUSED(max_tensor_size); | ||
| 386 | } | ||
| 387 | |||
| 388 | static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { | ||
| 389 | const struct ggml_tensor * src0 = op->src[0]; | ||
| 390 | const struct ggml_tensor * src1 = op->src[1]; | ||
| 391 | |||
| 392 | switch (op->op) { | ||
| 393 | case GGML_OP_NONE: | ||
| 394 | case GGML_OP_RESHAPE: | ||
| 395 | case GGML_OP_VIEW: | ||
| 396 | case GGML_OP_PERMUTE: | ||
| 397 | case GGML_OP_TRANSPOSE: | ||
| 398 | return true; | ||
| 399 | |||
| 400 | case GGML_OP_MUL_MAT: | ||
| 401 | { | ||
| 402 | // BLAS usually is only faster for large matrices | ||
| 403 | const struct ggml_tensor * src0 = op->src[0]; | ||
| 404 | const struct ggml_tensor * src1 = op->src[1]; | ||
| 405 | |||
| 406 | const int64_t ne10 = src1->ne[0]; | ||
| 407 | |||
| 408 | const int64_t ne0 = op->ne[0]; | ||
| 409 | const int64_t ne1 = op->ne[1]; | ||
| 410 | |||
| 411 | // TODO: find the optimal value | ||
| 412 | const int64_t min_batch = 32; | ||
| 413 | |||
| 414 | return ggml_is_contiguous(src0) && | ||
| 415 | ggml_is_contiguous(src1) && | ||
| 416 | src1->type == GGML_TYPE_F32 && | ||
| 417 | (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) && | ||
| 418 | (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL); | ||
| 419 | } | ||
| 420 | |||
| 421 | case GGML_OP_OUT_PROD: | ||
| 422 | return op->src[0]->type == GGML_TYPE_F32 && | ||
| 423 | op->src[1]->type == GGML_TYPE_F32 && | ||
| 424 | ggml_is_matrix(src0) && | ||
| 425 | ggml_is_matrix(src1) && | ||
| 426 | ggml_is_contiguous(src0) && | ||
| 427 | (ggml_is_contiguous(src1) || ggml_is_transposed(src1)) && | ||
| 428 | (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL); | ||
| 429 | |||
| 430 | default: | ||
| 431 | return false; | ||
| 432 | |||
| 433 | } | ||
| 434 | |||
| 435 | GGML_UNUSED(dev); | ||
| 436 | } | ||
| 437 | |||
| 438 | static bool ggml_backend_blas_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { | ||
| 439 | return ggml_backend_buft_is_host(buft); | ||
| 440 | |||
| 441 | GGML_UNUSED(dev); | ||
| 442 | } | ||
| 443 | |||
| 444 | static const struct ggml_backend_device_i ggml_backend_blas_device_i = { | ||
| 445 | /* .get_name = */ ggml_backend_blas_device_get_name, | ||
| 446 | /* .get_description = */ ggml_backend_blas_device_get_description, | ||
| 447 | /* .get_memory = */ ggml_backend_blas_device_get_memory, | ||
| 448 | /* .get_type = */ ggml_backend_blas_device_get_type, | ||
| 449 | /* .get_props = */ ggml_backend_blas_device_get_props, | ||
| 450 | /* .init_backend = */ ggml_backend_blas_device_init_backend, | ||
| 451 | /* .get_buffer_type = */ ggml_backend_blas_device_get_buffer_type, | ||
| 452 | /* .get_host_buffer_type = */ NULL, | ||
| 453 | /* .buffer_from_host_ptr = */ ggml_backend_blas_device_buffer_from_host_ptr, | ||
| 454 | /* .supports_op = */ ggml_backend_blas_device_supports_op, | ||
| 455 | /* .supports_buft = */ ggml_backend_blas_device_supports_buft, | ||
| 456 | /* .offload_op = */ NULL, | ||
| 457 | /* .event_new = */ NULL, | ||
| 458 | /* .event_free = */ NULL, | ||
| 459 | /* .event_synchronize = */ NULL, | ||
| 460 | }; | ||
| 461 | |||
| 462 | // backend reg interface | ||
| 463 | |||
| 464 | static const char * ggml_backend_blas_reg_get_name(ggml_backend_reg_t reg) { | ||
| 465 | return "BLAS"; | ||
| 466 | |||
| 467 | GGML_UNUSED(reg); | ||
| 468 | } | ||
| 469 | |||
| 470 | static size_t ggml_backend_blas_reg_get_device_count(ggml_backend_reg_t reg) { | ||
| 471 | return 1; | ||
| 472 | |||
| 473 | GGML_UNUSED(reg); | ||
| 474 | } | ||
| 475 | |||
| 476 | static ggml_backend_dev_t ggml_backend_blas_reg_get_device(ggml_backend_reg_t reg, size_t index) { | ||
| 477 | GGML_ASSERT(index == 0); | ||
| 478 | |||
| 479 | static ggml_backend_device ggml_backend_blas_device = { | ||
| 480 | /* .iface = */ ggml_backend_blas_device_i, | ||
| 481 | /* .reg = */ reg, | ||
| 482 | /* .context = */ nullptr, | ||
| 483 | }; | ||
| 484 | |||
| 485 | return &ggml_backend_blas_device; | ||
| 486 | |||
| 487 | GGML_UNUSED(reg); | ||
| 488 | GGML_UNUSED(index); | ||
| 489 | } | ||
| 490 | |||
| 491 | static void * ggml_backend_blas_get_proc_address(ggml_backend_reg_t reg, const char * name) { | ||
| 492 | if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) { | ||
| 493 | return (void *)ggml_backend_blas_set_n_threads; | ||
| 494 | } | ||
| 495 | return NULL; | ||
| 496 | |||
| 497 | GGML_UNUSED(reg); | ||
| 498 | GGML_UNUSED(name); | ||
| 499 | } | ||
| 500 | |||
| 501 | static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = { | ||
| 502 | /* .get_name = */ ggml_backend_blas_reg_get_name, | ||
| 503 | /* .get_device_count = */ ggml_backend_blas_reg_get_device_count, | ||
| 504 | /* .get_device = */ ggml_backend_blas_reg_get_device, | ||
| 505 | /* .get_proc_address = */ ggml_backend_blas_get_proc_address, | ||
| 506 | }; | ||
| 507 | |||
| 508 | ggml_backend_reg_t ggml_backend_blas_reg(void) { | ||
| 509 | static struct ggml_backend_reg ggml_backend_blas_reg = { | ||
| 510 | /* .api_version = */ GGML_BACKEND_API_VERSION, | ||
| 511 | /* .iface = */ ggml_backend_blas_reg_i, | ||
| 512 | /* .context = */ NULL, | ||
| 513 | }; | ||
| 514 | |||
| 515 | return &ggml_backend_blas_reg; | ||
| 516 | } | ||
| 517 | |||
| 518 | GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg) | ||
