1#include "virtgpu-forward-impl.h"
2
3static long long current_time_ms() {
4 timespec ts;
5 clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time
6 return (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
7}
8
9ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
10 apir_encoder * encoder;
11 apir_decoder * decoder;
12 ApirForwardReturnCode ret;
13
14 REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE);
15
16 std::vector<uint8_t> cgraph_data;
17 size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data);
18
19 virtgpu_shmem temp_shmem; // Local storage for large buffers
20 virtgpu_shmem * shmem = &temp_shmem;
21 bool using_shared_shmem = false;
22
23 if (cgraph_size <= gpu->data_shmem.mmap_size) {
24 // Lock mutex before using shared data_shmem buffer
25 if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
26 GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
27 }
28 using_shared_shmem = true;
29 shmem = &gpu->data_shmem;
30 } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) {
31 GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
32 }
33
34 apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
35
36 apir_encode_size_t(encoder, &cgraph_size);
37
38 char * shmem_data = (char *) shmem->mmap_ptr;
39 apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size);
40
41 apir_encode_cgraph_data(&secondary_enc, cgraph_data);
42
43 REMOTE_CALL(gpu, encoder, decoder, ret);
44
45 ggml_status status = GGML_STATUS_ABORTED;
46 apir_decode_ggml_status(decoder, &status);
47
48 remote_call_finish(gpu, encoder, decoder);
49
50 // Unlock mutex before cleanup
51 if (using_shared_shmem) {
52 mtx_unlock(&gpu->data_shmem_mutex);
53 } else {
54 virtgpu_shmem_destroy(gpu, shmem);
55 }
56
57 return status;
58}