diff options
Diffstat (limited to 'llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp b/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp new file mode 100644 index 0000000..cc879e5 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp @@ -0,0 +1,65 @@ +#include "backend-dispatched.h" +#include "backend-virgl-apir.h" +#include "ggml-backend-impl.h" +#include "ggml-backend.h" +#include "ggml-impl.h" +#include "shared/apir_backend.h" + +#include <cstdint> + +uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { + GGML_UNUSED(ctx); + GGML_UNUSED(enc); + + static bool async_backend_initialized = false; + static bool async_backend; + + if (!async_backend_initialized) { + ggml_backend_dev_props props; + + dev->iface.get_props(dev, &props); + async_backend = props.caps.async; + async_backend_initialized = true; + } + + uint32_t shmem_res_id; + apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); + + const void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); + if (!shmem_data) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__); + apir_decoder_set_fatal(dec); + return 1; + } + size_t cgraph_size; + apir_decode_size_t(dec, &cgraph_size); + + apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size); + + ggml_cgraph * cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size); + + ggml_status status; +#if APIR_BACKEND_CHECK_SUPPORTS_OP == 1 + for (int idx = 0; idx < cgraph->n_nodes; idx++) { + ggml_tensor * op = ggml_graph_node(cgraph, idx); + if (dev->iface.supports_op(dev, op)) { + continue; + } + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op)); + + status = GGML_STATUS_ABORTED; + apir_encode_ggml_status(enc, &status); + + return 0; + } +#endif + status = bck->iface.graph_compute(bck, cgraph); + + if (async_backend) { + bck->iface.synchronize(bck); + } + + apir_encode_ggml_status(enc, &status); + + return 0; +} |
