diff options
Diffstat (limited to 'llama.cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp b/llama.cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp new file mode 100644 index 0000000..c7d2881 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp @@ -0,0 +1,157 @@ +#include "ggml-remoting.h" + +static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) { + virtgpu * gpu = DEV_TO_GPU(dev); + + return gpu->cached_device_info.name; +} + +static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) { + virtgpu * gpu = DEV_TO_GPU(dev); + + // Return the pre-cached description from the virtgpu structure + return gpu->cached_device_info.description; +} + +static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) { + virtgpu * gpu = DEV_TO_GPU(dev); + + return (enum ggml_backend_dev_type) gpu->cached_device_info.type; +} + +static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { + virtgpu * gpu = DEV_TO_GPU(dev); + + *free = gpu->cached_device_info.memory_free; + *total = gpu->cached_device_info.memory_total; +} + +static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) { +#if USE_ALWAYS_TRUE_SUPPORTS_OP == 1 + /* ggml-rpc cheats it like this */ + /* with the current implementation of serialize_tensor, the src/view aren't properly passed */ + UNUSED(dev); + UNUSED(op); + + return true; +#else + virtgpu * gpu = DEV_TO_GPU(dev); + + return apir_device_supports_op(gpu, op); +#endif +} + +static bool ggml_backend_remoting_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { + bool supported = buft->device == dev; + + return supported; +} + +static bool ggml_backend_remoting_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { + UNUSED(dev); + UNUSED(op); + + return false; +} + +static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { + props->name = ggml_backend_remoting_device_get_name(dev); + props->description = ggml_backend_remoting_device_get_description(dev); + props->type = ggml_backend_remoting_device_get_type(dev); + ggml_backend_remoting_device_get_memory(dev, &props->memory_free, &props->memory_total); + + virtgpu * gpu = DEV_TO_GPU(dev); + apir_device_get_props(gpu, &props->caps.async, &props->caps.host_buffer, &props->caps.buffer_from_host_ptr, + &props->caps.events); + + props->caps.buffer_from_host_ptr = false; + props->caps.async = false; + props->caps.events = false; +} + +ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { + virtgpu * gpu = DEV_TO_GPU(dev); + + static std::atomic<bool> initialized = false; + static ggml_backend_buffer_type buft; + + if (!initialized) { + static std::mutex mutex; + std::lock_guard<std::mutex> lock(mutex); + + if (!initialized) { + buft = { + /* .iface = */ ggml_backend_remoting_buffer_type_interface, + /* .device = */ dev, + /* .context = */ (void *) gpu->cached_buffer_type.host_handle, + }; + initialized = true; + } + } + + return &buft; +} + +static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { + virtgpu * gpu = DEV_TO_GPU(dev); + + static std::atomic<bool> initialized = false; + static ggml_backend_buffer_type buft; + + if (!initialized) { + static std::mutex mutex; + std::lock_guard<std::mutex> lock(mutex); + + if (!initialized) { + buft = { + /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface, + /* .device = */ dev, + /* .context = */ (void *) gpu->cached_buffer_type.host_handle, + }; + initialized = true; + } + } + + return &buft; +} + +static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_backend_dev_t dev, + void * ptr, + size_t size, + size_t max_tensor_size) { + virtgpu * gpu = DEV_TO_GPU(dev); + + ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); + if (!context) { + GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the buffer context ...", __func__); + } + + context->gpu = gpu; + context->apir_context = apir_device_buffer_from_ptr(gpu, size, max_tensor_size); + context->base = ptr; + context->is_from_ptr = true; + + ggml_backend_buffer_t buffer = + ggml_backend_buffer_init(ggml_backend_remoting_device_get_buffer_from_ptr_type(dev), + ggml_backend_remoting_buffer_from_ptr_interface, (void *) context, size); + + return buffer; +} + +const ggml_backend_device_i ggml_backend_remoting_device_interface = { + /* .get_name = */ ggml_backend_remoting_device_get_name, + /* .get_description = */ ggml_backend_remoting_device_get_description, + /* .get_memory = */ ggml_backend_remoting_device_get_memory, + /* .get_type = */ ggml_backend_remoting_device_get_type, + /* .get_props = */ ggml_backend_remoting_device_get_props, + /* .init_backend = */ ggml_backend_remoting_device_init, + /* .get_buffer_type = */ ggml_backend_remoting_device_get_buffer_type, + /* .get_host_buffer_type = */ NULL, + /* .buffer_from_host_ptr = */ ggml_backend_remoting_device_buffer_from_ptr, + /* .supports_op = */ ggml_backend_remoting_device_supports_op, + /* .supports_buft = */ ggml_backend_remoting_device_supports_buft, + /* .offload_op = */ ggml_backend_remoting_device_offload_op, + /* .event_new = */ NULL, + /* .event_free = */ NULL, + /* .event_synchronize = */ NULL, +}; |
