1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
#include "ggml-remoting.h"
#define BUFFER_TO_GPU(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->gpu
static void * ggml_backend_remoting_buffer_get_base(ggml_backend_buffer_t buffer) {
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) buffer->context;
if (context->base) {
return context->base;
}
context->base = apir_buffer_get_base(BUFFER_TO_GPU(buffer), BUFFER_TO_APIR_CONTEXT(buffer));
return context->base;
}
static void ggml_backend_remoting_buffer_set_tensor(ggml_backend_buffer_t buffer,
ggml_tensor * tensor,
const void * data,
size_t offset,
size_t size) {
virtgpu * gpu = BUFFER_TO_GPU(buffer);
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
if (context->is_from_ptr) {
memcpy((char *) tensor->data + offset, data, size);
} else {
apir_buffer_set_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size);
}
return;
}
static void ggml_backend_remoting_buffer_get_tensor(ggml_backend_buffer_t buffer,
const ggml_tensor * tensor,
void * data,
size_t offset,
size_t size) {
virtgpu * gpu = BUFFER_TO_GPU(buffer);
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
if (context->is_from_ptr) {
memcpy(data, (const char *) tensor->data + offset, size);
} else {
apir_buffer_get_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size);
}
}
static void ggml_backend_remoting_buffer_set_tensor_from_ptr(ggml_backend_buffer_t buffer,
ggml_tensor * tensor,
const void * data,
size_t offset,
size_t size) {
UNUSED(buffer);
memcpy((char *) tensor->data + offset, data, size);
return;
}
static void ggml_backend_remoting_buffer_get_tensor_from_ptr(ggml_backend_buffer_t buffer,
const ggml_tensor * tensor,
void * data,
size_t offset,
size_t size) {
UNUSED(buffer);
memcpy(data, (const char *) tensor->data + offset, size);
}
static bool ggml_backend_remoting_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
const ggml_tensor * src,
ggml_tensor * dst) {
virtgpu * gpu = BUFFER_TO_GPU(buffer);
bool ret = apir_buffer_cpy_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), src, dst);
return ret;
}
static void ggml_backend_remoting_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
virtgpu * gpu = BUFFER_TO_GPU(buffer);
apir_buffer_clear(gpu, BUFFER_TO_APIR_CONTEXT(buffer), value);
return;
}
static void ggml_backend_remoting_buffer_free_buffer(ggml_backend_buffer_t buffer) {
virtgpu * gpu = BUFFER_TO_GPU(buffer);
apir_buffer_free_buffer(gpu, BUFFER_TO_APIR_CONTEXT(buffer));
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
free(context);
buffer->context = NULL;
}
const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface = {
/* .free_buffer = */ ggml_backend_remoting_buffer_free_buffer,
/* .get_base = */ ggml_backend_remoting_buffer_get_base,
/* .init_tensor = */ NULL,
/* .memset_tensor = */ NULL,
/* .set_tensor = */ ggml_backend_remoting_buffer_set_tensor,
/* .get_tensor = */ ggml_backend_remoting_buffer_get_tensor,
/* .cpy_tensor = */ ggml_backend_remoting_buffer_cpy_tensor,
/* .clear = */ ggml_backend_remoting_buffer_clear,
/* .reset = */ NULL,
};
const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface = {
/* .free_buffer = */ ggml_backend_remoting_buffer_free_buffer,
/* .get_base = */ ggml_backend_remoting_buffer_get_base,
/* .init_tensor = */ NULL,
/* .memset_tensor = */ NULL,
/* .set_tensor = */ ggml_backend_remoting_buffer_set_tensor_from_ptr,
/* .get_tensor = */ ggml_backend_remoting_buffer_get_tensor_from_ptr,
/* .cpy_tensor = */ ggml_backend_remoting_buffer_cpy_tensor,
/* .clear = */ ggml_backend_remoting_buffer_clear,
/* .reset = */ NULL,
};
|