diff options
Diffstat (limited to 'llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp b/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp new file mode 100644 index 0000000..07d9a66 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp @@ -0,0 +1,58 @@ +#include "virtgpu-forward-impl.h" + +static long long current_time_ms() { + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time + return (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec; +} + +ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { + apir_encoder * encoder; + apir_decoder * decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE); + + std::vector<uint8_t> cgraph_data; + size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data); + + virtgpu_shmem temp_shmem; // Local storage for large buffers + virtgpu_shmem * shmem = &temp_shmem; + bool using_shared_shmem = false; + + if (cgraph_size <= gpu->data_shmem.mmap_size) { + // Lock mutex before using shared data_shmem buffer + if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) { + GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__); + } + using_shared_shmem = true; + shmem = &gpu->data_shmem; + } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) { + GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__); + } + + apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); + + apir_encode_size_t(encoder, &cgraph_size); + + char * shmem_data = (char *) shmem->mmap_ptr; + apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size); + + apir_encode_cgraph_data(&secondary_enc, cgraph_data); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + ggml_status status = GGML_STATUS_ABORTED; + apir_decode_ggml_status(decoder, &status); + + remote_call_finish(gpu, encoder, decoder); + + // Unlock mutex before cleanup + if (using_shared_shmem) { + mtx_unlock(&gpu->data_shmem_mutex); + } else { + virtgpu_shmem_destroy(gpu, shmem); + } + + return status; +} |
