1#include "backend/shared/apir_cs_rpc.h"
2#include "ggml-backend-impl.h"
3#include "ggml-impl.h"
4#include "ggml-remoting.h"
5
6#include <cinttypes>
7#include <unordered_map>
8#include <unordered_set>
9#include <vector>
10
11apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) {
12 apir_rpc_tensor result;
13 result.id = reinterpret_cast<uint64_t>(tensor);
14 result.type = tensor->type;
15 if (tensor->buffer) {
16 ggml_backend_buffer_t buffer = tensor->buffer;
17
18 result.buffer = BUFFER_TO_HOST_HANDLE(buffer);
19 } else {
20 result.buffer = 0;
21 }
22 for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
23 result.ne[i] = tensor->ne[i];
24 result.nb[i] = tensor->nb[i];
25 }
26 result.op = tensor->op;
27 for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
28 result.op_params[i] = tensor->op_params[i];
29 }
30 result.flags = tensor->flags;
31 for (uint32_t i = 0; i < GGML_MAX_SRC; i++) {
32 result.src[i] = reinterpret_cast<uint64_t>(tensor->src[i]);
33 }
34 result.view_src = reinterpret_cast<uint64_t>(tensor->view_src);
35 result.view_offs = tensor->view_offs;
36 result.data = reinterpret_cast<uint64_t>(tensor->data);
37 if (tensor->data) {
38 if (!tensor->buffer) {
39 GGML_ABORT("%s: tensor has data but not buffer", __func__);
40 }
41 // tensor->data is serialized as an offset to the buffer base address
42 result.data -= reinterpret_cast<uint64_t>(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base);
43 }
44 snprintf(result.name, GGML_MAX_NAME, "%s", tensor->name);
45 return result;
46}
47
48void apir_add_tensor(ggml_tensor * tensor,
49 std::vector<apir_rpc_tensor> & tensors,
50 std::unordered_set<ggml_tensor *> & visited) {
51 if (tensor == nullptr) {
52 return;
53 }
54 if (visited.find(tensor) != visited.end()) {
55 return;
56 }
57 visited.insert(tensor);
58 for (int i = 0; i < GGML_MAX_SRC; i++) {
59 apir_add_tensor(tensor->src[i], tensors, visited);
60 }
61 apir_add_tensor(tensor->view_src, tensors, visited);
62 tensors.push_back(apir_serialize_tensor(tensor));
63}
64
65void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & output) {
66 uint32_t n_nodes = cgraph->n_nodes;
67 std::vector<apir_rpc_tensor> tensors;
68 std::unordered_set<ggml_tensor *> visited;
69 for (uint32_t i = 0; i < n_nodes; i++) {
70 apir_add_tensor(cgraph->nodes[i], tensors, visited);
71 }
72 // serialization format:
73 // | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(apir_rpc_tensor)) |
74 uint32_t n_tensors = tensors.size();
75 int output_size =
76 sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(apir_rpc_tensor);
77 output.resize(output_size, 0);
78 memcpy(output.data(), &n_nodes, sizeof(n_nodes));
79 for (uint32_t i = 0; i < n_nodes; i++) {
80 memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t));
81 }
82 uint32_t * out_ntensors = (uint32_t *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t));
83 *out_ntensors = n_tensors;
84 apir_rpc_tensor * out_tensors =
85 (apir_rpc_tensor *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t));
86 memcpy(out_tensors, tensors.data(), n_tensors * sizeof(apir_rpc_tensor));
87}