1#include "ggml-backend-impl.h"
  2#include "ggml-backend.h"
  3#include "ggml-backend-dl.h"
  4#include "ggml-impl.h"
  5#include <algorithm>
  6#include <cstring>
  7#include <filesystem>
  8#include <memory>
  9#include <string>
 10#include <type_traits>
 11#include <vector>
 12#include <cctype>
 13
 14#ifdef _WIN32
 15#    define WIN32_LEAN_AND_MEAN
 16#    ifndef NOMINMAX
 17#        define NOMINMAX
 18#    endif
 19#    include <windows.h>
 20#elif defined(__APPLE__)
 21#    include <mach-o/dyld.h>
 22#    include <dlfcn.h>
 23#else
 24#    include <dlfcn.h>
 25#    include <unistd.h>
 26#endif
 27
 28// Backend registry
 29#ifdef GGML_USE_CPU
 30#include "ggml-cpu.h"
 31#endif
 32
 33#ifdef GGML_USE_CUDA
 34#include "ggml-cuda.h"
 35#endif
 36
 37#ifdef GGML_USE_METAL
 38#include "ggml-metal.h"
 39#endif
 40
 41#ifdef GGML_USE_SYCL
 42#include "ggml-sycl.h"
 43#endif
 44
 45#ifdef GGML_USE_VULKAN
 46#include "ggml-vulkan.h"
 47#endif
 48
 49#ifdef GGML_USE_WEBGPU
 50#include "ggml-webgpu.h"
 51#endif
 52
 53#ifdef GGML_USE_ZDNN
 54#include "ggml-zdnn.h"
 55#endif
 56
 57#ifdef GGML_USE_OPENCL
 58#include "ggml-opencl.h"
 59#endif
 60
 61#ifdef GGML_USE_HEXAGON
 62#include "ggml-hexagon.h"
 63#endif
 64
 65#ifdef GGML_USE_BLAS
 66#include "ggml-blas.h"
 67#endif
 68
 69#ifdef GGML_USE_RPC
 70#include "ggml-rpc.h"
 71#endif
 72
 73#ifdef GGML_USE_VIRTGPU_FRONTEND
 74#include "ggml-virtgpu.h"
 75#endif
 76
 77#ifdef GGML_USE_CANN
 78#include "ggml-cann.h"
 79#endif
 80
 81#ifdef GGML_USE_ZENDNN
 82#include "ggml-zendnn.h"
 83#endif
 84
 85namespace fs = std::filesystem;
 86
 87static std::string path_str(const fs::path & path) {
 88    try {
 89#if defined(__cpp_lib_char8_t)
 90        // C++20 and later: u8string() returns std::u8string
 91        const std::u8string u8str = path.u8string();
 92        return std::string(reinterpret_cast<const char *>(u8str.data()), u8str.size());
 93#else
 94        // C++17: u8string() returns std::string
 95        return path.u8string();
 96#endif
 97    } catch (...) {
 98        return std::string();
 99    }
100}
101
102struct ggml_backend_reg_entry {
103    ggml_backend_reg_t reg;
104    dl_handle_ptr handle;
105};
106
107struct ggml_backend_registry {
108    std::vector<ggml_backend_reg_entry> backends;
109    std::vector<ggml_backend_dev_t> devices;
110
111    ggml_backend_registry() {
112#ifdef GGML_USE_CUDA
113        register_backend(ggml_backend_cuda_reg());
114#endif
115#ifdef GGML_USE_METAL
116        register_backend(ggml_backend_metal_reg());
117#endif
118#ifdef GGML_USE_SYCL
119        register_backend(ggml_backend_sycl_reg());
120#endif
121#ifdef GGML_USE_VULKAN
122    // Add runtime disable check
123    if (getenv("GGML_DISABLE_VULKAN") == nullptr) {
124        register_backend(ggml_backend_vk_reg());
125    } else {
126        GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n");
127    }
128#endif
129#ifdef GGML_USE_WEBGPU
130        register_backend(ggml_backend_webgpu_reg());
131#endif
132#ifdef GGML_USE_ZDNN
133        register_backend(ggml_backend_zdnn_reg());
134#endif
135#ifdef GGML_USE_VIRTGPU_FRONTEND
136        register_backend(ggml_backend_virtgpu_reg());
137#endif
138
139#ifdef GGML_USE_OPENCL
140        register_backend(ggml_backend_opencl_reg());
141#endif
142#ifdef GGML_USE_ZENDNN
143        register_backend(ggml_backend_zendnn_reg());
144#endif
145#ifdef GGML_USE_HEXAGON
146        register_backend(ggml_backend_hexagon_reg());
147#endif
148#ifdef GGML_USE_CANN
149        register_backend(ggml_backend_cann_reg());
150#endif
151#ifdef GGML_USE_BLAS
152        register_backend(ggml_backend_blas_reg());
153#endif
154#ifdef GGML_USE_RPC
155        register_backend(ggml_backend_rpc_reg());
156#endif
157#ifdef GGML_USE_CPU
158        register_backend(ggml_backend_cpu_reg());
159#endif
160    }
161
162    ~ggml_backend_registry() {
163        // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
164        // since backend threads may still be running and accessing resources from the dynamic library
165        for (auto & entry : backends) {
166            if (entry.handle) {
167                entry.handle.release(); // NOLINT
168            }
169        }
170    }
171
172    void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
173        if (!reg) {
174            return;
175        }
176
177#ifndef NDEBUG
178        GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
179            __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
180#endif
181        backends.push_back({ reg, std::move(handle) });
182        for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
183            register_device(ggml_backend_reg_dev_get(reg, i));
184        }
185    }
186
187    void register_device(ggml_backend_dev_t device) {
188#ifndef NDEBUG
189        GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
190#endif
191        devices.push_back(device);
192    }
193
194    ggml_backend_reg_t load_backend(const fs::path & path, bool silent) {
195        dl_handle_ptr handle { dl_load_library(path) };
196        if (!handle) {
197            if (!silent) {
198                GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(path).c_str(), dl_error());
199            }
200            return nullptr;
201        }
202
203        auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
204        if (score_fn && score_fn() == 0) {
205            if (!silent) {
206                GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str());
207            }
208            return nullptr;
209        }
210
211        auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
212        if (!backend_init_fn) {
213            if (!silent) {
214                GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path_str(path).c_str());
215            }
216            return nullptr;
217        }
218
219        ggml_backend_reg_t reg = backend_init_fn();
220        if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
221            if (!silent) {
222                if (!reg) {
223                    GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n",
224                        __func__, path_str(path).c_str());
225                } else {
226                    GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
227                        __func__, path_str(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
228                }
229            }
230            return nullptr;
231        }
232
233        GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path_str(path).c_str());
234
235        register_backend(reg, std::move(handle));
236
237        return reg;
238    }
239
240    void unload_backend(ggml_backend_reg_t reg, bool silent) {
241        auto it = std::find_if(backends.begin(), backends.end(),
242                               [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
243
244        if (it == backends.end()) {
245            if (!silent) {
246                GGML_LOG_ERROR("%s: backend not found\n", __func__);
247            }
248            return;
249        }
250
251        if (!silent) {
252            GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
253        }
254
255        // remove devices
256        devices.erase(
257            std::remove_if(devices.begin(), devices.end(),
258                            [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
259            devices.end());
260
261        // remove backend
262        backends.erase(it);
263    }
264};
265
266static ggml_backend_registry & get_reg() {
267    static ggml_backend_registry reg;
268    return reg;
269}
270
271// Internal API
272void ggml_backend_register(ggml_backend_reg_t reg) {
273    get_reg().register_backend(reg);
274}
275
276void ggml_backend_device_register(ggml_backend_dev_t device) {
277    get_reg().register_device(device);
278}
279
280// Backend (reg) enumeration
281static bool striequals(const char * a, const char * b) {
282    for (; *a && *b; a++, b++) {
283        if (std::tolower(*a) != std::tolower(*b)) {
284            return false;
285        }
286    }
287    return *a == *b;
288}
289
290size_t ggml_backend_reg_count() {
291    return get_reg().backends.size();
292}
293
294ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
295    GGML_ASSERT(index < ggml_backend_reg_count());
296    return get_reg().backends[index].reg;
297}
298
299ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
300    for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
301        ggml_backend_reg_t reg = ggml_backend_reg_get(i);
302        if (striequals(ggml_backend_reg_name(reg), name)) {
303            return reg;
304        }
305    }
306    return nullptr;
307}
308
309// Device enumeration
310size_t ggml_backend_dev_count() {
311    return get_reg().devices.size();
312}
313
314ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
315    GGML_ASSERT(index < ggml_backend_dev_count());
316    return get_reg().devices[index];
317}
318
319ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
320    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
321        ggml_backend_dev_t dev = ggml_backend_dev_get(i);
322        if (striequals(ggml_backend_dev_name(dev), name)) {
323            return dev;
324        }
325    }
326    return nullptr;
327}
328
329ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
330    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
331        ggml_backend_dev_t dev = ggml_backend_dev_get(i);
332        if (ggml_backend_dev_type(dev) == type) {
333            return dev;
334        }
335    }
336    return nullptr;
337}
338
339// Convenience functions
340ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
341    ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
342    if (!dev) {
343        return nullptr;
344    }
345    return ggml_backend_dev_init(dev, params);
346}
347
348ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
349    ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
350    if (!dev) {
351        return nullptr;
352    }
353    return ggml_backend_dev_init(dev, params);
354}
355
356ggml_backend_t ggml_backend_init_best(void) {
357    ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
358    dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU);
359    dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
360    if (!dev) {
361        return nullptr;
362    }
363    return ggml_backend_dev_init(dev, nullptr);
364}
365
366// Dynamic loading
367ggml_backend_reg_t ggml_backend_load(const char * path) {
368    return get_reg().load_backend(path, false);
369}
370
371void ggml_backend_unload(ggml_backend_reg_t reg) {
372    get_reg().unload_backend(reg, true);
373}
374
375static fs::path get_executable_path() {
376#if defined(__APPLE__)
377    // get executable path
378    std::vector<char> path;
379    uint32_t size;
380    while (true) {
381        size = path.size();
382        if (_NSGetExecutablePath(path.data(), &size) == 0) {
383            break;
384        }
385        path.resize(size);
386    }
387    std::string base_path(path.data(), size);
388    // remove executable name
389    auto last_slash = base_path.find_last_of('/');
390    if (last_slash != std::string::npos) {
391        base_path = base_path.substr(0, last_slash);
392    }
393    return base_path + "/";
394#elif defined(__linux__) || defined(__FreeBSD__)
395    std::string base_path = ".";
396    std::vector<char> path(1024);
397    while (true) {
398        // get executable path
399#    if defined(__linux__)
400        ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
401#    elif defined(__FreeBSD__)
402        ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
403#    endif
404        if (len == -1) {
405            break;
406        }
407        if (len < (ssize_t) path.size()) {
408            base_path = std::string(path.data(), len);
409            // remove executable name
410            auto last_slash = base_path.find_last_of('/');
411            if (last_slash != std::string::npos) {
412                base_path = base_path.substr(0, last_slash);
413            }
414            break;
415        }
416        path.resize(path.size() * 2);
417    }
418
419    return base_path + "/";
420#elif defined(_WIN32)
421    std::vector<wchar_t> path(MAX_PATH);
422    DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
423    if (len == 0) {
424        return {};
425    }
426    std::wstring base_path(path.data(), len);
427    // remove executable name
428    auto last_slash = base_path.find_last_of('\\');
429    if (last_slash != std::string::npos) {
430        base_path = base_path.substr(0, last_slash);
431    }
432    return base_path + L"\\";
433#else
434    return {};
435#endif
436}
437
438static fs::path backend_filename_prefix() {
439#ifdef _WIN32
440    return fs::u8path("ggml-");
441#else
442    return fs::u8path("libggml-");
443#endif
444}
445
446static fs::path backend_filename_extension() {
447#ifdef _WIN32
448    return fs::u8path(".dll");
449#else
450    return fs::u8path(".so");
451#endif
452}
453
454static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
455    // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
456    const fs::path name_path = fs::u8path(name);
457    const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
458    const fs::path file_extension = backend_filename_extension();
459
460    std::vector<fs::path> search_paths;
461    if (user_search_path == nullptr) {
462#ifdef GGML_BACKEND_DIR
463        search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
464#endif
465        // default search paths: executable directory, current directory
466        search_paths.push_back(get_executable_path());
467        search_paths.push_back(fs::current_path());
468    } else {
469        search_paths.push_back(fs::u8path(user_search_path));
470    }
471
472    int best_score = 0;
473    fs::path best_path;
474    std::error_code ec;
475
476    for (const auto & search_path : search_paths) {
477        if (!fs::exists(search_path, ec)) {
478            if (ec) {
479                GGML_LOG_DEBUG("%s: posix_stat(%s) failure, error-message: %s\n", __func__, path_str(search_path).c_str(), ec.message().c_str());
480            } else {
481                GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
482            }
483            continue;
484        }
485        fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
486        for (const auto & entry : dir_it) {
487            if (entry.is_regular_file(ec)) {
488                auto filename = entry.path().filename();
489                auto ext = entry.path().extension();
490                if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
491                    dl_handle_ptr handle { dl_load_library(entry) };
492                    if (!handle && !silent) {
493                        GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(entry.path()).c_str(), dl_error());
494                    }
495                    if (handle) {
496                        auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
497                        if (score_fn) {
498                            int s = score_fn();
499#ifndef NDEBUG
500                            GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s);
501#endif
502                            if (s > best_score) {
503                                best_score = s;
504                                best_path = entry.path();
505                            }
506                        } else {
507                            if (!silent) {
508                                GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str());
509                            }
510                        }
511                    }
512                }
513            }
514        }
515    }
516
517    if (best_score == 0) {
518        // try to load the base backend
519        for (const auto & search_path : search_paths) {
520            fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
521            fs::path path = search_path / filename;
522            if (std::error_code ec; fs::exists(path, ec)) {
523                return get_reg().load_backend(path, silent);
524            } else {
525                if (ec) {
526                    GGML_LOG_DEBUG("%s: posix_stat(%s) failure, error-message: %s\n", __func__, path_str(path).c_str(), ec.message().c_str());
527                }
528            }
529        }
530        return nullptr;
531    }
532
533    return get_reg().load_backend(best_path, silent);
534}
535
536void ggml_backend_load_all() {
537    ggml_backend_load_all_from_path(nullptr);
538}
539
540void ggml_backend_load_all_from_path(const char * dir_path) {
541#ifdef NDEBUG
542    bool silent = true;
543#else
544    bool silent = false;
545#endif
546
547    ggml_backend_load_best("blas", silent, dir_path);
548    ggml_backend_load_best("zendnn", silent, dir_path);
549    ggml_backend_load_best("cann", silent, dir_path);
550    ggml_backend_load_best("cuda", silent, dir_path);
551    ggml_backend_load_best("hip", silent, dir_path);
552    ggml_backend_load_best("metal", silent, dir_path);
553    ggml_backend_load_best("rpc", silent, dir_path);
554    ggml_backend_load_best("sycl", silent, dir_path);
555    ggml_backend_load_best("vulkan", silent, dir_path);
556    ggml_backend_load_best("virtgpu", silent, dir_path);
557    ggml_backend_load_best("opencl", silent, dir_path);
558    ggml_backend_load_best("hexagon", silent, dir_path);
559    ggml_backend_load_best("musa", silent, dir_path);
560    ggml_backend_load_best("cpu", silent, dir_path);
561    // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
562    const char * backend_path = std::getenv("GGML_BACKEND_PATH");
563    if (backend_path) {
564        ggml_backend_load(backend_path);
565    }
566}