1#pragma once
 2
 3#include "ggml-backend-impl.h"
 4#include "ggml-backend.h"
 5#include "ggml-impl.h"
 6#include "virtgpu.h"
 7
 8#include <memory>
 9#include <string>
10
11#define GGML_VIRTGPU_NAME "ggml-virtgpu"
12#define GGML_VIRTGPU "ggml-virtgpu: "
13
14// USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes
15
16#define USE_ALWAYS_TRUE_SUPPORTS_OP 1
17#define USE_METAL_GUEST_SUPPORTS_OP 0
18
19#define DEV_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->context)->gpu
20
21#define BUFFER_TO_GGML_CONTEXT(name) ((ggml_backend_remoting_buffer_context *) (name)->context)
22
23#define BUFFER_TO_APIR_CONTEXT(name) &((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context
24
25#define BUFFER_TO_HOST_HANDLE(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle
26
27#define GET_DEVICE_CONTEXT() (ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context
28
29#define BUFT_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->device->context)->gpu
30
31struct ggml_backend_remoting_device_context {
32    size_t      device;
33    std::string name;
34    std::string description;
35
36    std::vector<std::tuple<void *, size_t, virtgpu_shmem *>> shared_memory;
37
38    virtgpu * gpu;
39};
40
41struct ggml_backend_remoting_buffer_context {
42    apir_buffer_context_t apir_context;
43
44    virtgpu * gpu;
45
46    void * base;
47
48    bool is_from_ptr;
49};
50
51extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface;
52extern const ggml_backend_device_i      ggml_backend_remoting_device_interface;
53extern const ggml_backend_buffer_i      ggml_backend_remoting_buffer_interface;
54extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface;
55extern const ggml_backend_buffer_i      ggml_backend_remoting_buffer_from_ptr_interface;
56
57ggml_backend_dev_t         ggml_backend_remoting_get_device(size_t device);
58ggml_backend_t             ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params);
59ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev);
60
61static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
62    // in the backend, the buffer handle is the buffer pointer
63    return (apir_buffer_type_host_handle_t) buft->context;
64}
65
66static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
67    if (!buffer->context) {
68        GGML_ABORT(GGML_VIRTGPU "%s: no context available :/", __func__);
69    }
70    return BUFFER_TO_HOST_HANDLE(buffer);
71}