1#include "ggml-backend-impl.h"
2#include "ggml-backend.h"
3#include "ggml-backend-dl.h"
4#include "ggml-impl.h"
5#include <algorithm>
6#include <cstring>
7#include <filesystem>
8#include <memory>
9#include <string>
10#include <type_traits>
11#include <vector>
12#include <cctype>
13
14#ifdef _WIN32
15# define WIN32_LEAN_AND_MEAN
16# ifndef NOMINMAX
17# define NOMINMAX
18# endif
19# include <windows.h>
20#elif defined(__APPLE__)
21# include <mach-o/dyld.h>
22# include <dlfcn.h>
23#else
24# include <dlfcn.h>
25# include <unistd.h>
26#endif
27
28// Backend registry
29#ifdef GGML_USE_CPU
30#include "ggml-cpu.h"
31#endif
32
33#ifdef GGML_USE_CUDA
34#include "ggml-cuda.h"
35#endif
36
37#ifdef GGML_USE_METAL
38#include "ggml-metal.h"
39#endif
40
41#ifdef GGML_USE_SYCL
42#include "ggml-sycl.h"
43#endif
44
45#ifdef GGML_USE_VULKAN
46#include "ggml-vulkan.h"
47#endif
48
49#ifdef GGML_USE_WEBGPU
50#include "ggml-webgpu.h"
51#endif
52
53#ifdef GGML_USE_ZDNN
54#include "ggml-zdnn.h"
55#endif
56
57#ifdef GGML_USE_OPENCL
58#include "ggml-opencl.h"
59#endif
60
61#ifdef GGML_USE_HEXAGON
62#include "ggml-hexagon.h"
63#endif
64
65#ifdef GGML_USE_BLAS
66#include "ggml-blas.h"
67#endif
68
69#ifdef GGML_USE_RPC
70#include "ggml-rpc.h"
71#endif
72
73#ifdef GGML_USE_VIRTGPU_FRONTEND
74#include "ggml-virtgpu.h"
75#endif
76
77#ifdef GGML_USE_CANN
78#include "ggml-cann.h"
79#endif
80
81#ifdef GGML_USE_ZENDNN
82#include "ggml-zendnn.h"
83#endif
84
85namespace fs = std::filesystem;
86
87static std::string path_str(const fs::path & path) {
88 try {
89#if defined(__cpp_lib_char8_t)
90 // C++20 and later: u8string() returns std::u8string
91 const std::u8string u8str = path.u8string();
92 return std::string(reinterpret_cast<const char *>(u8str.data()), u8str.size());
93#else
94 // C++17: u8string() returns std::string
95 return path.u8string();
96#endif
97 } catch (...) {
98 return std::string();
99 }
100}
101
102struct ggml_backend_reg_entry {
103 ggml_backend_reg_t reg;
104 dl_handle_ptr handle;
105};
106
107struct ggml_backend_registry {
108 std::vector<ggml_backend_reg_entry> backends;
109 std::vector<ggml_backend_dev_t> devices;
110
111 ggml_backend_registry() {
112#ifdef GGML_USE_CUDA
113 register_backend(ggml_backend_cuda_reg());
114#endif
115#ifdef GGML_USE_METAL
116 register_backend(ggml_backend_metal_reg());
117#endif
118#ifdef GGML_USE_SYCL
119 register_backend(ggml_backend_sycl_reg());
120#endif
121#ifdef GGML_USE_VULKAN
122 // Add runtime disable check
123 if (getenv("GGML_DISABLE_VULKAN") == nullptr) {
124 register_backend(ggml_backend_vk_reg());
125 } else {
126 GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n");
127 }
128#endif
129#ifdef GGML_USE_WEBGPU
130 register_backend(ggml_backend_webgpu_reg());
131#endif
132#ifdef GGML_USE_ZDNN
133 register_backend(ggml_backend_zdnn_reg());
134#endif
135#ifdef GGML_USE_VIRTGPU_FRONTEND
136 register_backend(ggml_backend_virtgpu_reg());
137#endif
138
139#ifdef GGML_USE_OPENCL
140 register_backend(ggml_backend_opencl_reg());
141#endif
142#ifdef GGML_USE_ZENDNN
143 register_backend(ggml_backend_zendnn_reg());
144#endif
145#ifdef GGML_USE_HEXAGON
146 register_backend(ggml_backend_hexagon_reg());
147#endif
148#ifdef GGML_USE_CANN
149 register_backend(ggml_backend_cann_reg());
150#endif
151#ifdef GGML_USE_BLAS
152 register_backend(ggml_backend_blas_reg());
153#endif
154#ifdef GGML_USE_RPC
155 register_backend(ggml_backend_rpc_reg());
156#endif
157#ifdef GGML_USE_CPU
158 register_backend(ggml_backend_cpu_reg());
159#endif
160 }
161
162 ~ggml_backend_registry() {
163 // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
164 // since backend threads may still be running and accessing resources from the dynamic library
165 for (auto & entry : backends) {
166 if (entry.handle) {
167 entry.handle.release(); // NOLINT
168 }
169 }
170 }
171
172 void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
173 if (!reg) {
174 return;
175 }
176
177#ifndef NDEBUG
178 GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
179 __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
180#endif
181 backends.push_back({ reg, std::move(handle) });
182 for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
183 register_device(ggml_backend_reg_dev_get(reg, i));
184 }
185 }
186
187 void register_device(ggml_backend_dev_t device) {
188#ifndef NDEBUG
189 GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
190#endif
191 devices.push_back(device);
192 }
193
194 ggml_backend_reg_t load_backend(const fs::path & path, bool silent) {
195 dl_handle_ptr handle { dl_load_library(path) };
196 if (!handle) {
197 if (!silent) {
198 GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(path).c_str(), dl_error());
199 }
200 return nullptr;
201 }
202
203 auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
204 if (score_fn && score_fn() == 0) {
205 if (!silent) {
206 GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str());
207 }
208 return nullptr;
209 }
210
211 auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
212 if (!backend_init_fn) {
213 if (!silent) {
214 GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path_str(path).c_str());
215 }
216 return nullptr;
217 }
218
219 ggml_backend_reg_t reg = backend_init_fn();
220 if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
221 if (!silent) {
222 if (!reg) {
223 GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n",
224 __func__, path_str(path).c_str());
225 } else {
226 GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
227 __func__, path_str(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
228 }
229 }
230 return nullptr;
231 }
232
233 GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path_str(path).c_str());
234
235 register_backend(reg, std::move(handle));
236
237 return reg;
238 }
239
240 void unload_backend(ggml_backend_reg_t reg, bool silent) {
241 auto it = std::find_if(backends.begin(), backends.end(),
242 [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
243
244 if (it == backends.end()) {
245 if (!silent) {
246 GGML_LOG_ERROR("%s: backend not found\n", __func__);
247 }
248 return;
249 }
250
251 if (!silent) {
252 GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
253 }
254
255 // remove devices
256 devices.erase(
257 std::remove_if(devices.begin(), devices.end(),
258 [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
259 devices.end());
260
261 // remove backend
262 backends.erase(it);
263 }
264};
265
266static ggml_backend_registry & get_reg() {
267 static ggml_backend_registry reg;
268 return reg;
269}
270
271// Internal API
272void ggml_backend_register(ggml_backend_reg_t reg) {
273 get_reg().register_backend(reg);
274}
275
276void ggml_backend_device_register(ggml_backend_dev_t device) {
277 get_reg().register_device(device);
278}
279
280// Backend (reg) enumeration
281static bool striequals(const char * a, const char * b) {
282 for (; *a && *b; a++, b++) {
283 if (std::tolower(*a) != std::tolower(*b)) {
284 return false;
285 }
286 }
287 return *a == *b;
288}
289
290size_t ggml_backend_reg_count() {
291 return get_reg().backends.size();
292}
293
294ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
295 GGML_ASSERT(index < ggml_backend_reg_count());
296 return get_reg().backends[index].reg;
297}
298
299ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
300 for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
301 ggml_backend_reg_t reg = ggml_backend_reg_get(i);
302 if (striequals(ggml_backend_reg_name(reg), name)) {
303 return reg;
304 }
305 }
306 return nullptr;
307}
308
309// Device enumeration
310size_t ggml_backend_dev_count() {
311 return get_reg().devices.size();
312}
313
314ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
315 GGML_ASSERT(index < ggml_backend_dev_count());
316 return get_reg().devices[index];
317}
318
319ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
320 for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
321 ggml_backend_dev_t dev = ggml_backend_dev_get(i);
322 if (striequals(ggml_backend_dev_name(dev), name)) {
323 return dev;
324 }
325 }
326 return nullptr;
327}
328
329ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
330 for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
331 ggml_backend_dev_t dev = ggml_backend_dev_get(i);
332 if (ggml_backend_dev_type(dev) == type) {
333 return dev;
334 }
335 }
336 return nullptr;
337}
338
339// Convenience functions
340ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
341 ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
342 if (!dev) {
343 return nullptr;
344 }
345 return ggml_backend_dev_init(dev, params);
346}
347
348ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
349 ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
350 if (!dev) {
351 return nullptr;
352 }
353 return ggml_backend_dev_init(dev, params);
354}
355
356ggml_backend_t ggml_backend_init_best(void) {
357 ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
358 dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU);
359 dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
360 if (!dev) {
361 return nullptr;
362 }
363 return ggml_backend_dev_init(dev, nullptr);
364}
365
366// Dynamic loading
367ggml_backend_reg_t ggml_backend_load(const char * path) {
368 return get_reg().load_backend(path, false);
369}
370
371void ggml_backend_unload(ggml_backend_reg_t reg) {
372 get_reg().unload_backend(reg, true);
373}
374
375static fs::path get_executable_path() {
376#if defined(__APPLE__)
377 // get executable path
378 std::vector<char> path;
379 uint32_t size;
380 while (true) {
381 size = path.size();
382 if (_NSGetExecutablePath(path.data(), &size) == 0) {
383 break;
384 }
385 path.resize(size);
386 }
387 std::string base_path(path.data(), size);
388 // remove executable name
389 auto last_slash = base_path.find_last_of('/');
390 if (last_slash != std::string::npos) {
391 base_path = base_path.substr(0, last_slash);
392 }
393 return base_path + "/";
394#elif defined(__linux__) || defined(__FreeBSD__)
395 std::string base_path = ".";
396 std::vector<char> path(1024);
397 while (true) {
398 // get executable path
399# if defined(__linux__)
400 ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
401# elif defined(__FreeBSD__)
402 ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
403# endif
404 if (len == -1) {
405 break;
406 }
407 if (len < (ssize_t) path.size()) {
408 base_path = std::string(path.data(), len);
409 // remove executable name
410 auto last_slash = base_path.find_last_of('/');
411 if (last_slash != std::string::npos) {
412 base_path = base_path.substr(0, last_slash);
413 }
414 break;
415 }
416 path.resize(path.size() * 2);
417 }
418
419 return base_path + "/";
420#elif defined(_WIN32)
421 std::vector<wchar_t> path(MAX_PATH);
422 DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
423 if (len == 0) {
424 return {};
425 }
426 std::wstring base_path(path.data(), len);
427 // remove executable name
428 auto last_slash = base_path.find_last_of('\\');
429 if (last_slash != std::string::npos) {
430 base_path = base_path.substr(0, last_slash);
431 }
432 return base_path + L"\\";
433#else
434 return {};
435#endif
436}
437
438static fs::path backend_filename_prefix() {
439#ifdef _WIN32
440 return fs::u8path("ggml-");
441#else
442 return fs::u8path("libggml-");
443#endif
444}
445
446static fs::path backend_filename_extension() {
447#ifdef _WIN32
448 return fs::u8path(".dll");
449#else
450 return fs::u8path(".so");
451#endif
452}
453
454static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
455 // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
456 const fs::path name_path = fs::u8path(name);
457 const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
458 const fs::path file_extension = backend_filename_extension();
459
460 std::vector<fs::path> search_paths;
461 if (user_search_path == nullptr) {
462#ifdef GGML_BACKEND_DIR
463 search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
464#endif
465 // default search paths: executable directory, current directory
466 search_paths.push_back(get_executable_path());
467 search_paths.push_back(fs::current_path());
468 } else {
469 search_paths.push_back(fs::u8path(user_search_path));
470 }
471
472 int best_score = 0;
473 fs::path best_path;
474 std::error_code ec;
475
476 for (const auto & search_path : search_paths) {
477 if (!fs::exists(search_path, ec)) {
478 if (ec) {
479 GGML_LOG_DEBUG("%s: posix_stat(%s) failure, error-message: %s\n", __func__, path_str(search_path).c_str(), ec.message().c_str());
480 } else {
481 GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
482 }
483 continue;
484 }
485 fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
486 for (const auto & entry : dir_it) {
487 if (entry.is_regular_file(ec)) {
488 auto filename = entry.path().filename();
489 auto ext = entry.path().extension();
490 if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
491 dl_handle_ptr handle { dl_load_library(entry) };
492 if (!handle && !silent) {
493 GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(entry.path()).c_str(), dl_error());
494 }
495 if (handle) {
496 auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
497 if (score_fn) {
498 int s = score_fn();
499#ifndef NDEBUG
500 GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s);
501#endif
502 if (s > best_score) {
503 best_score = s;
504 best_path = entry.path();
505 }
506 } else {
507 if (!silent) {
508 GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str());
509 }
510 }
511 }
512 }
513 }
514 }
515 }
516
517 if (best_score == 0) {
518 // try to load the base backend
519 for (const auto & search_path : search_paths) {
520 fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
521 fs::path path = search_path / filename;
522 if (std::error_code ec; fs::exists(path, ec)) {
523 return get_reg().load_backend(path, silent);
524 } else {
525 if (ec) {
526 GGML_LOG_DEBUG("%s: posix_stat(%s) failure, error-message: %s\n", __func__, path_str(path).c_str(), ec.message().c_str());
527 }
528 }
529 }
530 return nullptr;
531 }
532
533 return get_reg().load_backend(best_path, silent);
534}
535
536void ggml_backend_load_all() {
537 ggml_backend_load_all_from_path(nullptr);
538}
539
540void ggml_backend_load_all_from_path(const char * dir_path) {
541#ifdef NDEBUG
542 bool silent = true;
543#else
544 bool silent = false;
545#endif
546
547 ggml_backend_load_best("blas", silent, dir_path);
548 ggml_backend_load_best("zendnn", silent, dir_path);
549 ggml_backend_load_best("cann", silent, dir_path);
550 ggml_backend_load_best("cuda", silent, dir_path);
551 ggml_backend_load_best("hip", silent, dir_path);
552 ggml_backend_load_best("metal", silent, dir_path);
553 ggml_backend_load_best("rpc", silent, dir_path);
554 ggml_backend_load_best("sycl", silent, dir_path);
555 ggml_backend_load_best("vulkan", silent, dir_path);
556 ggml_backend_load_best("virtgpu", silent, dir_path);
557 ggml_backend_load_best("opencl", silent, dir_path);
558 ggml_backend_load_best("hexagon", silent, dir_path);
559 ggml_backend_load_best("musa", silent, dir_path);
560 ggml_backend_load_best("cpu", silent, dir_path);
561 // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
562 const char * backend_path = std::getenv("GGML_BACKEND_PATH");
563 if (backend_path) {
564 ggml_backend_load(backend_path);
565 }
566}