summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp')
-rw-r--r--llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp58
1 files changed, 58 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp b/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp
new file mode 100644
index 0000000..07d9a66
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp
@@ -0,0 +1,58 @@
+#include "virtgpu-forward-impl.h"
+
+static long long current_time_ms() {
+ timespec ts;
+ clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time
+ return (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
+}
+
+ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
+ apir_encoder * encoder;
+ apir_decoder * decoder;
+ ApirForwardReturnCode ret;
+
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE);
+
+ std::vector<uint8_t> cgraph_data;
+ size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data);
+
+ virtgpu_shmem temp_shmem; // Local storage for large buffers
+ virtgpu_shmem * shmem = &temp_shmem;
+ bool using_shared_shmem = false;
+
+ if (cgraph_size <= gpu->data_shmem.mmap_size) {
+ // Lock mutex before using shared data_shmem buffer
+ if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
+ GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
+ }
+ using_shared_shmem = true;
+ shmem = &gpu->data_shmem;
+ } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) {
+ GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
+ }
+
+ apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
+
+ apir_encode_size_t(encoder, &cgraph_size);
+
+ char * shmem_data = (char *) shmem->mmap_ptr;
+ apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size);
+
+ apir_encode_cgraph_data(&secondary_enc, cgraph_data);
+
+ REMOTE_CALL(gpu, encoder, decoder, ret);
+
+ ggml_status status = GGML_STATUS_ABORTED;
+ apir_decode_ggml_status(decoder, &status);
+
+ remote_call_finish(gpu, encoder, decoder);
+
+ // Unlock mutex before cleanup
+ if (using_shared_shmem) {
+ mtx_unlock(&gpu->data_shmem_mutex);
+ } else {
+ virtgpu_shmem_destroy(gpu, shmem);
+ }
+
+ return status;
+}