llmnpc - llama.cpp/ggml/src/ggml-virtgpu/virtgpu.cpp

Path: llmnpc / llama.cpp / ggml / src / ggml-virtgpu / virtgpu.cpp (raw)
  1#include "virtgpu.h"
  2
  3#include <stdio.h>
  4#include <unistd.h>
  5
  6#include <cassert>
  7#include <cerrno>
  8#include <cstdlib>
  9
 10static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr dev);
 11static virt_gpu_result_t virtgpu_open(virtgpu * gpu);
 12
 13static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu);
 14static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu);
 15
 16static int      virtgpu_ioctl_context_init(virtgpu * gpu, virgl_renderer_capset capset_id);
 17static int      virtgpu_ioctl_get_caps(virtgpu *             gpu,
 18                                       virgl_renderer_capset id,
 19                                       uint32_t              version,
 20                                       void *                capset,
 21                                       size_t                capset_size);
 22static uint64_t virtgpu_ioctl_getparam(virtgpu * gpu, uint64_t param);
 23static void     virtgpu_init_renderer_info(virtgpu * gpu);
 24
 25static void log_call_duration(long long call_duration_ns, const char * name);
 26
 27const uint64_t APIR_HANDSHAKE_MAX_WAIT_MS   = 2 * 1000;   // 2s
 28const uint64_t APIR_LOADLIBRARY_MAX_WAIT_MS = 60 * 1000;  // 60s
 29
 30static int virtgpu_handshake(virtgpu * gpu) {
 31    apir_encoder * encoder;
 32    apir_decoder * decoder;
 33
 34    encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HANDSHAKE, 0);
 35    if (!encoder) {
 36        GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__);
 37        return 1;
 38    }
 39
 40    /* write handshake props */
 41
 42    uint32_t guest_major = APIR_PROTOCOL_MAJOR;
 43    uint32_t guest_minor = APIR_PROTOCOL_MINOR;
 44    apir_encode_uint32_t(encoder, &guest_major);
 45    apir_encode_uint32_t(encoder, &guest_minor);
 46
 47    /* *** */
 48
 49    uint32_t  ret_magic;
 50    long long call_duration_ns;
 51    ret_magic = remote_call(gpu, encoder, &decoder, APIR_HANDSHAKE_MAX_WAIT_MS, &call_duration_ns);
 52    log_call_duration(call_duration_ns, "API Remoting handshake");
 53
 54    if (!decoder) {
 55        GGML_ABORT(GGML_VIRTGPU
 56            "%s: failed to initiate the communication with the virglrenderer library. "
 57            "Most likely, the wrong virglrenderer library was loaded in the hypervisor.",
 58            __func__);
 59        return 1;
 60    }
 61
 62    /* read handshake return values */
 63
 64    uint32_t host_major;
 65    uint32_t host_minor;
 66
 67    if (ret_magic != APIR_HANDSHAKE_MAGIC) {
 68        GGML_ABORT(GGML_VIRTGPU
 69                   "%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
 70                   apir_backend_initialize_error(ret_magic));
 71    } else {
 72        apir_decode_uint32_t(decoder, &host_major);
 73        apir_decode_uint32_t(decoder, &host_minor);
 74    }
 75
 76    remote_call_finish(gpu, encoder, decoder);
 77
 78    if (ret_magic != APIR_HANDSHAKE_MAGIC) {
 79        return 1;
 80    }
 81
 82    GGML_LOG_INFO(GGML_VIRTGPU "%s: Guest is running with %u.%u\n", __func__, guest_major, guest_minor);
 83    GGML_LOG_INFO(GGML_VIRTGPU "%s: Host is running with %u.%u\n", __func__, host_major, host_minor);
 84
 85    if (guest_major != host_major) {
 86        GGML_LOG_ERROR(GGML_VIRTGPU "Host major (%d) and guest major (%d) version differ\n", host_major, guest_major);
 87    } else if (guest_minor != host_minor) {
 88        GGML_LOG_WARN(GGML_VIRTGPU "Host minor (%d) and guest minor (%d) version differ\n", host_minor, guest_minor);
 89    }
 90
 91    return 0;
 92}
 93
 94static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
 95    apir_encoder *            encoder;
 96    apir_decoder *            decoder;
 97    ApirLoadLibraryReturnCode ret;
 98
 99    encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LOADLIBRARY, 0);
100    if (!encoder) {
101        GGML_ABORT(GGML_VIRTGPU "%s: hypercall error: failed to prepare the API Remoting command encoder", __func__);
102        return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR;
103    }
104
105    long long call_duration_ns;
106
107    ret = (ApirLoadLibraryReturnCode) remote_call(gpu, encoder, &decoder, APIR_LOADLIBRARY_MAX_WAIT_MS,
108                                                  &call_duration_ns);
109    log_call_duration(call_duration_ns, "API Remoting LoadLibrary");
110
111    if (!decoder) {
112        GGML_ABORT(GGML_VIRTGPU "%s: hypercall error: failed to trigger the API Remoting hypercall.\n", __func__);
113        return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR;
114    }
115
116    remote_call_finish(gpu, encoder, decoder);
117
118    if (ret == APIR_LOAD_LIBRARY_SUCCESS) {
119        GGML_LOG_INFO(GGML_VIRTGPU "The API Remoting backend was successfully loaded and initialized\n");
120
121        return ret;
122    }
123
124    // something wrong happened, find out what.
125    if (ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
126        if (ret == APIR_LOAD_LIBRARY_ENV_VAR_MISSING) {
127            GGML_ABORT(GGML_VIRTGPU
128                       "%s: virglrenderer could not open the API Remoting backend library, "
129                       "some environment variables are missing. "
130                       "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
131                       __func__, apir_load_library_error(ret));
132        } else if (ret == APIR_LOAD_LIBRARY_CANNOT_OPEN) {
133            GGML_ABORT(GGML_VIRTGPU
134                       "%s: virglrenderer could not open the API Remoting backend library. "
135                       "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
136                       __func__, apir_load_library_error(ret));
137        } else if (ret == APIR_LOAD_LIBRARY_ENV_VAR_MISSING) {
138            GGML_ABORT(GGML_VIRTGPU
139                       "%s: could not load the backend library, some symbols are missing. "
140                       "Make sure virglrenderer is correctly configured by the hypervisor. (%s) ",
141                       __func__, apir_load_library_error(ret));
142        } else {
143            GGML_ABORT(GGML_VIRTGPU
144                       "%s: virglrenderer could not load the API Remoting backend library. (%s - code %d)", __func__,
145                       apir_load_library_error(ret), ret);
146        }
147        return ret;
148    }
149
150    GGML_LOG_INFO(GGML_VIRTGPU
151                  "%s: virglrenderer successfully loaded the API Remoting backend library.\n", __func__);
152
153    ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX);
154
155    if (apir_ret == APIR_LOAD_LIBRARY_CANNOT_OPEN) {
156        GGML_ABORT(GGML_VIRTGPU
157                   "%s: the API Remoting backend library couldn't load the GGML backend library. "
158                   "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
159                   __func__, apir_load_library_error(apir_ret));
160    } else if (apir_ret == APIR_LOAD_LIBRARY_SYMBOL_MISSING) {
161        GGML_ABORT(GGML_VIRTGPU
162                   "%s: the API Remoting backend library couldn't load the GGML backend library, some symbols are missing. "
163                   "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
164                   __func__, apir_load_library_error(apir_ret));
165    } else if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
166        GGML_ABORT(GGML_VIRTGPU
167                   "%s: the API Remoting backend library couldn't load the GGML backend library: apir code=%d | %s)",
168                   __func__, apir_ret, apir_load_library_error(apir_ret));
169    } else {
170        uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX;
171        GGML_ABORT(GGML_VIRTGPU
172                   "%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__,
173                   lib_ret);
174    }
175    return ret;
176}
177
178virtgpu * create_virtgpu() {
179    virtgpu * gpu = new virtgpu();
180
181    gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr;
182    util_sparse_array_init(&gpu->shmem_array, sizeof(virtgpu_shmem), 1024);
183
184    // Initialize mutex to protect shared data_shmem buffer
185    if (mtx_init(&gpu->data_shmem_mutex, mtx_plain) != thrd_success) {
186        delete gpu;
187        GGML_ABORT(GGML_VIRTGPU
188                   "%s: failed to initialize data_shmem mutex", __func__);
189        return NULL;
190    }
191
192    if (virtgpu_open(gpu) != APIR_SUCCESS) {
193        GGML_LOG_ERROR(GGML_VIRTGPU
194                       "%s: failed to open the virtgpu device\n", __func__);
195        return NULL;
196    }
197
198    if (virtgpu_init_capset(gpu) != APIR_SUCCESS) {
199        if (gpu->use_apir_capset) {
200            GGML_ABORT(GGML_VIRTGPU
201                       "%s: failed to initialize the virtgpu APIR capset. Make sure that the virglrenderer library supports it.", __func__);
202        } else {
203            GGML_ABORT(GGML_VIRTGPU
204                       "%s: failed to initialize the virtgpu Venus capset", __func__);
205        }
206        return NULL;
207    }
208
209    if (virtgpu_init_context(gpu) != APIR_SUCCESS) {
210        GGML_ABORT(GGML_VIRTGPU
211                   "%s: failed to initialize the GPU context", __func__);
212        return NULL;
213    }
214
215    if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) {
216        GGML_ABORT(GGML_VIRTGPU
217                   "%s: failed to create the shared reply memory pages", __func__);
218        return NULL;
219    }
220
221    if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) {
222        GGML_ABORT(GGML_VIRTGPU
223                   "%s: failed to create the shared data memory pages", __func__);
224        return NULL;
225    }
226
227    if (virtgpu_handshake(gpu)) {
228        GGML_ABORT(GGML_VIRTGPU
229                   "%s: failed to handshake with the virglrenderer library", __func__);
230        return NULL;
231    }
232
233    if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) {
234        GGML_ABORT(GGML_VIRTGPU
235                   "%s: failed to load the backend library", __func__);
236        return NULL;
237    }
238
239    return gpu;
240}
241
242static virt_gpu_result_t virtgpu_open(virtgpu * gpu) {
243    drmDevicePtr devs[8];
244    int          count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
245    if (count < 0) {
246        GGML_LOG_ERROR(GGML_VIRTGPU
247                       "%s: failed to enumerate DRM devices\n", __func__);
248        return APIR_ERROR_INITIALIZATION_FAILED;
249    }
250
251    virt_gpu_result_t result = APIR_ERROR_INITIALIZATION_FAILED;
252    for (int i = 0; i < count; i++) {
253        result = virtgpu_open_device(gpu, devs[i]);
254        if (result == APIR_SUCCESS) {
255            break;
256        }
257    }
258
259    drmFreeDevices(devs, count);
260
261    return result;
262}
263
264static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr dev) {
265    const char * node_path = dev->nodes[DRM_NODE_RENDER];
266
267    int fd = open(node_path, O_RDWR | O_CLOEXEC);
268    if (fd < 0) {
269        GGML_ABORT(GGML_VIRTGPU
270                   "%s: failed to open %s", __func__, node_path);
271        return APIR_ERROR_INITIALIZATION_FAILED;
272    }
273
274    drmVersionPtr version = drmGetVersion(fd);
275    if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) {
276        if (version) {
277            GGML_LOG_ERROR(GGML_VIRTGPU
278                           "%s: unknown DRM driver %s version %d\n", __func__, version->name, version->version_major);
279        } else {
280            GGML_LOG_ERROR(GGML_VIRTGPU
281                           "%s: failed to get DRM driver version\n", __func__);
282        }
283
284        if (version) {
285            drmFreeVersion(version);
286        }
287        close(fd);
288        return APIR_ERROR_INITIALIZATION_FAILED;
289    }
290
291    gpu->fd = fd;
292
293    drmFreeVersion(version);
294
295    GGML_LOG_INFO(GGML_VIRTGPU "using DRM device %s\n", node_path);
296
297    return APIR_SUCCESS;
298}
299
300static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) {
301    assert(!gpu->capset.version);
302    const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
303    if (ret) {
304        GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to initialize context: %s\n", __func__, strerror(errno));
305        return APIR_ERROR_INITIALIZATION_FAILED;
306    }
307
308    return APIR_SUCCESS;
309}
310
311static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) {
312    if (gpu->use_apir_capset) {
313        GGML_LOG_INFO(GGML_VIRTGPU "Using the APIR capset\n");
314        gpu->capset.id = VIRTGPU_DRM_CAPSET_APIR;
315    } else {
316        GGML_LOG_INFO(GGML_VIRTGPU "Using the Venus capset\n");
317        gpu->capset.id = VIRTGPU_DRM_CAPSET_VENUS;
318    }
319    gpu->capset.version = 0;
320
321    int ret =
322        virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data));
323
324    if (ret) {
325        GGML_LOG_ERROR(GGML_VIRTGPU
326                       "%s: failed to get APIR v%d capset: %s\n",
327                       __func__, gpu->capset.version, strerror(errno));
328        return APIR_ERROR_INITIALIZATION_FAILED;
329    }
330
331    assert(gpu->capset.data.supports_blob_resources);
332
333    return APIR_SUCCESS;
334}
335
336static int virtgpu_ioctl_context_init(virtgpu * gpu, virgl_renderer_capset capset_id) {
337    drm_virtgpu_context_set_param ctx_set_params[3] = {
338        {
339         .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
340         .value = capset_id,
341         },
342        {
343         .param = VIRTGPU_CONTEXT_PARAM_NUM_RINGS,
344         .value = 1,
345         },
346        {
347         .param = VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK,
348         .value = 0, /* don't generate drm_events on fence signaling */
349        },
350    };
351
352    drm_virtgpu_context_init args = {
353        .num_params     = ARRAY_SIZE(ctx_set_params),
354        .pad            = 0,
355        .ctx_set_params = (uintptr_t) &ctx_set_params,
356    };
357
358    return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
359}
360
361static int virtgpu_ioctl_get_caps(virtgpu *             gpu,
362                                  virgl_renderer_capset id,
363                                  uint32_t              version,
364                                  void *                capset,
365                                  size_t                capset_size) {
366    drm_virtgpu_get_caps args = {
367        .cap_set_id  = id,
368        .cap_set_ver = version,
369        .addr        = (uintptr_t) capset,
370        .size        = (__u32) capset_size,
371        .pad         = 0,
372    };
373
374    return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
375}
376
377static uint64_t virtgpu_ioctl_getparam(virtgpu * gpu, uint64_t param) {
378    /* val must be zeroed because kernel only writes the lower 32 bits */
379    uint64_t             val  = 0;
380    drm_virtgpu_getparam args = {
381        .param = param,
382        .value = (uintptr_t) &val,
383    };
384
385    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
386    return ret ? 0 : val;
387}
388
389apir_encoder * remote_call_prepare(virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags) {
390    /*
391     * Prepare the command encoder and its buffer
392     */
393
394    thread_local char encoder_buffer[4096];
395
396    thread_local apir_encoder enc;
397    enc = {
398        .cur   = encoder_buffer,
399        .start = encoder_buffer,
400        .end   = encoder_buffer + sizeof(encoder_buffer),
401        .fatal = false,
402    };
403
404    /*
405     * Fill the command encoder with the common args:
406     * - cmd_type (int32_t)
407     * - cmd_flags (int32_t)
408     * - reply res id (uint32_t)
409   */
410
411    int32_t cmd_type = apir_cmd_type;
412
413    // for testing during the hypervisor transition
414    if (!gpu->use_apir_capset) {
415        cmd_type += VENUS_COMMAND_TYPE_LENGTH;
416    }
417    apir_encode_int32_t(&enc, &cmd_type);
418    apir_encode_int32_t(&enc, &cmd_flags);
419
420    uint32_t reply_res_id = gpu->reply_shmem.res_id;
421    apir_encode_uint32_t(&enc, &reply_res_id);
422
423    return &enc;
424}
425
426void remote_call_finish(virtgpu * gpu, apir_encoder * enc, apir_decoder * dec) {
427    UNUSED(gpu);
428
429    if (!enc) {
430        GGML_ABORT(GGML_VIRTGPU "%s: Invalid (null) encoder", __func__);
431    }
432
433    if (!dec) {
434        GGML_ABORT(GGML_VIRTGPU "%s: Invalid (null) decoder", __func__);
435    }
436
437    if (apir_encoder_get_fatal(enc)) {
438        GGML_LOG_ERROR(GGML_VIRTGPU "%s: Failed to encode the output parameters.", __func__);
439    }
440
441    if (apir_decoder_get_fatal(dec)) {
442        GGML_LOG_ERROR(GGML_VIRTGPU "%s: Failed to decode the input parameters.", __func__);
443    }
444}
445
446uint32_t remote_call(virtgpu *       gpu,
447                     apir_encoder *  encoder,
448                     apir_decoder ** decoder,
449                     float           max_wait_ms,
450                     long long *     call_duration_ns) {
451    /*
452     * Prepare the reply notification pointer
453     */
454
455    volatile std::atomic_uint * atomic_reply_notif = (volatile std::atomic_uint *) gpu->reply_shmem.mmap_ptr;
456    *atomic_reply_notif                            = 0;
457
458    /*
459     * Trigger the execbuf ioctl
460     */
461
462    drm_virtgpu_execbuffer args = {
463        .flags   = VIRTGPU_EXECBUF_RING_IDX,
464        .size    = (uint32_t) (encoder->cur - encoder->start),
465        .command = (uintptr_t) encoder->start,
466
467        .bo_handles     = 0,
468        .num_bo_handles = 0,
469
470        .fence_fd         = 0,
471        .ring_idx         = 0,
472        .syncobj_stride   = 0,
473        .num_in_syncobjs  = 0,
474        .num_out_syncobjs = 0,
475        .in_syncobjs      = 0,
476        .out_syncobjs     = 0,
477    };
478
479    *decoder = NULL;
480
481    int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
482
483    if (ret != 0) {
484        GGML_ABORT(GGML_VIRTGPU "%s: the virtgpu EXECBUFFER ioctl failed (%d)", __func__, ret);
485    }
486
487    /*
488     * Wait for the response notification
489     */
490    timer_data wait_host_reply_timer = { 0, 0, 0 };
491
492    start_timer(&wait_host_reply_timer);
493
494    timespec ts_start, ts_end;
495    clock_gettime(CLOCK_MONOTONIC, &ts_start);
496    long long start_time = (long long) ts_start.tv_sec * 1000000000LL + ts_start.tv_nsec;
497
498    bool     timedout    = false;
499    uint32_t notif_value = 0;
500    while (true) {
501        notif_value = std::atomic_load_explicit(atomic_reply_notif, std::memory_order_acquire);
502
503        if (notif_value != 0) {
504            break;
505        }
506
507        int64_t base_sleep_us = 15;
508
509        os_time_sleep(base_sleep_us);
510
511        if (max_wait_ms) {
512            clock_gettime(CLOCK_MONOTONIC, &ts_end);
513            long long end_time    = (long long) ts_end.tv_sec * 1000000000LL + ts_end.tv_nsec;
514            float     duration_ms = (end_time - start_time) / 1000000;
515
516            if (duration_ms > max_wait_ms) {
517                timedout = true;
518                break;
519            }
520        }
521    }
522
523    if (call_duration_ns) {
524        *call_duration_ns = stop_timer(&wait_host_reply_timer);
525    }
526
527    if (max_wait_ms && timedout) {
528        GGML_LOG_ERROR(GGML_VIRTGPU "%s: timed out waiting for the host answer...\n", __func__);
529        return APIR_FORWARD_TIMEOUT;
530    }
531
532    /*
533     * Prepare the decoder
534     */
535    static apir_decoder response_dec;
536    response_dec.cur = (char *) gpu->reply_shmem.mmap_ptr + sizeof(*atomic_reply_notif);
537    response_dec.end = (char *) gpu->reply_shmem.mmap_ptr + gpu->reply_shmem.mmap_size;
538    *decoder         = &response_dec;
539
540    // extract the actual return value from the notif flag
541    uint32_t returned_value = notif_value - 1;
542    return returned_value;
543}
544
545static void log_call_duration(long long call_duration_ns, const char * name) {
546    double call_duration_ms = (double) call_duration_ns / 1e6;  // 1 millisecond = 1e6 nanoseconds
547    double call_duration_s  = (double) call_duration_ns / 1e9;  // 1 second = 1e9 nanoseconds
548
549    if (call_duration_s > 1) {
550        GGML_LOG_INFO(GGML_VIRTGPU
551                      "waited %.2fs for the %s host reply...\n", call_duration_s, name);
552    } else if (call_duration_ms > 1) {
553        GGML_LOG_INFO(GGML_VIRTGPU
554                      "waited %.2fms for the %s host reply...\n", call_duration_ms, name);
555    } else {
556        GGML_LOG_INFO(GGML_VIRTGPU
557                      "waited %lldns for the %s host reply...\n", call_duration_ns, name);
558    }
559}