From ffaafde16ffebd2853467f0dd833625a726ce08e Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Thu, 26 Feb 2026 13:00:57 +0100 Subject: [PATCH] ggml-virtgpu: improve the reliability of the code (#19846) * ggml-virtgpu-backend: validate the consistency of the received objects This patch adds consistency checks in the ggml-virtgpu-backend (running on the host side) to ensure that the data received from the guest is consistent (valid pointers, valid sizes and offsets). * ggml-virtgpu-backend: add fallback/skips for optional ggml backend methods ``` 1. bck->iface.synchronize(bck) 2. buft->iface.get_alloc_size(buft, op) 3. buft->iface.get_max_size(buft) ``` these three methods are optional in the GGML interface. `get_max_size` was already properly defaulted, but `backend sychronize` and `butf get_max_size` would have segfaulted the backend if not implemented. * ggml-virtgpu-backend: fix log format missing argument * ggml-virtgpu-backend: improve the abort message * ggml-virtgpu-backend: more safety checks * ggml-virtgpu-backend: new error code * ggml-virtgpu-backend: initialize all the error codes * ggml-virtgpu: add a missing comment generated by the code generator * ggml-virtgpu: add the '[virtgpu]' prefix to the device/buffer names * ggml-virtgpu: apir_device_buffer_from_ptr: improve the error message * ggml-virtgpu: shared: make it match the latest api_remoting.h of Virglrenderer APIR (still unmerged) * ggml-virtgpu: update the code generator to have dispatch_command_name in a host/guest shared file * ggml-virtgpu: REMOTE_CALL: fail if the backend returns an error * docs/backend/VirtGPU.md: indicate that the RAM+VRAM size is limed to 64 GB with libkrun * ggml-virtgpu: turn off clang-format header ordering for some of the files Compilation breaks when ordered alphabetically. * ggml-virtgpu: clang-format * ggml-virtgpu/backend/shared/api_remoting: better comments for the APIR return codes --- docs/backend/VirtGPU.md | 4 +- .../backend/backend-dispatched-backend.cpp | 43 +++++++++- .../backend-dispatched-buffer-type.cpp | 14 ++- .../backend/backend-dispatched-buffer.cpp | 48 +++++++++++ .../backend/backend-dispatched.cpp | 13 ++- .../backend/backend-dispatched.gen.h | 58 ------------- .../ggml-virtgpu/backend/backend-dispatched.h | 2 + .../ggml-virtgpu/backend/backend-virgl-apir.h | 2 +- ggml/src/ggml-virtgpu/backend/backend.cpp | 38 ++++----- .../backend/shared/api_remoting.h | 21 +++-- .../backend/shared/apir_backend.gen.h | 58 +++++++++++++ .../backend/shared/apir_backend.h | 6 +- .../src/ggml-virtgpu/backend/shared/apir_cs.h | 20 ++--- .../backend/shared/apir_cs_ggml.h | 27 ++++-- .../ggml-virtgpu/backend/shared/apir_cs_rpc.h | 4 + .../ggml-virtgpu/ggml-backend-buffer-type.cpp | 6 +- ggml/src/ggml-virtgpu/ggml-backend-device.cpp | 7 +- ggml/src/ggml-virtgpu/ggml-backend-reg.cpp | 56 ++++++++---- ggml/src/ggml-virtgpu/ggml-backend.cpp | 2 +- ggml/src/ggml-virtgpu/ggml-remoting.h | 2 +- ggml/src/ggml-virtgpu/include/apir_hw.h | 6 +- ggml/src/ggml-virtgpu/regenerate_remoting.py | 47 +++++----- .../ggml-virtgpu/virtgpu-forward-backend.cpp | 6 +- .../virtgpu-forward-buffer-type.cpp | 8 +- .../ggml-virtgpu/virtgpu-forward-buffer.cpp | 12 +-- .../ggml-virtgpu/virtgpu-forward-device.cpp | 4 +- ggml/src/ggml-virtgpu/virtgpu-forward-impl.h | 47 +++++----- ggml/src/ggml-virtgpu/virtgpu-forward.gen.h | 1 + ggml/src/ggml-virtgpu/virtgpu.cpp | 85 ++++++++----------- ggml/src/ggml-virtgpu/virtgpu.h | 8 +- 30 files changed, 398 insertions(+), 257 deletions(-) diff --git a/docs/backend/VirtGPU.md b/docs/backend/VirtGPU.md index c81468da13..72f376dea7 100644 --- a/docs/backend/VirtGPU.md +++ b/docs/backend/VirtGPU.md @@ -152,7 +152,9 @@ Commands and data are serialized using a custom binary protocol with: - **VM-specific**: Only works in virtual machines with virtio-gpu support - **Host dependency**: Requires properly configured host-side backend - **Latency**: Small overhead from VM escaping for each operation - +- **Shared-memory size**: with the `libkrun` hypervisor, the RAM + VRAM + addressable memory is limited to 64 GB. So the maximum GPU memory + will be `64GB - RAM`, regardless of the hardware VRAM size. * This work is pending upstream changes in the VirglRenderer project. diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp index cc879e51d0..03a037f1cb 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp @@ -7,9 +7,21 @@ #include +static uint32_t validate_graph_operation(size_t cgraph_size, uint32_t shmem_res_id, const char * operation) { + if (cgraph_size == 0) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Zero-size computation graph\n", operation); + return 1; + } + + // place-holder: validate that the size of shmem_res_id is <= cgraph_size + // need to add another method in the Virgl->APIR callback interface + GGML_UNUSED(shmem_res_id); + + return 0; // Valid +} + uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { GGML_UNUSED(ctx); - GGML_UNUSED(enc); static bool async_backend_initialized = false; static bool async_backend; @@ -34,10 +46,26 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v size_t cgraph_size; apir_decode_size_t(dec, &cgraph_size); + if (validate_graph_operation(cgraph_size, shmem_res_id, __func__) != 0) { + apir_decoder_set_fatal(dec); + return 1; + } + apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size); ggml_cgraph * cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size); + if (!cgraph || apir_decoder_get_fatal(&secondary_dec)) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Failed to deserialize computation graph\n", __func__); + return 1; + } + + if (cgraph->n_nodes < 0 || cgraph->n_leafs < 0) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid negative node/leaf count: nodes=%d leafs=%d\n", __func__, + cgraph->n_nodes, cgraph->n_leafs); + return 1; + } + ggml_status status; #if APIR_BACKEND_CHECK_SUPPORTS_OP == 1 for (int idx = 0; idx < cgraph->n_nodes; idx++) { @@ -45,7 +73,8 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v if (dev->iface.supports_op(dev, op)) { continue; } - GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op)); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", __func__, idx, + ggml_op_desc(op)); status = GGML_STATUS_ABORTED; apir_encode_ggml_status(enc, &status); @@ -53,9 +82,17 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v return 0; } #endif + + // Check if backend is properly initialized + if (!bck) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Backend not initialized (bck is null)\n", __func__); + + return 1; + } + status = bck->iface.graph_compute(bck, cgraph); - if (async_backend) { + if (async_backend && bck->iface.synchronize) { bck->iface.synchronize(bck); } diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp index d55eec2761..c66dbaa9e8 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp @@ -85,7 +85,19 @@ uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * d const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec); - size_t value = buft->iface.get_alloc_size(buft, op); + // Check for decode error + if (op == nullptr) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Failed to decode tensor\n", __func__); + apir_decoder_set_fatal(dec); + return 1; + } + + size_t value; + if (buft->iface.get_alloc_size) { + value = buft->iface.get_alloc_size(buft, op); + } else { + value = ggml_nbytes(op); // Default fallback + } apir_encode_size_t(enc, &value); diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp index 8cc063ff0a..3ade8d99b4 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp @@ -6,11 +6,26 @@ #include +static uint32_t validate_buffer_operation(size_t offset, size_t size, const char * operation) { + // Only check for critical integer overflow - no arbitrary size limits + if (offset > SIZE_MAX - size) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Integer overflow in offset+size: %zu + %zu\n", operation, offset, size); + return 1; + } + + return 0; // Valid +} + uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { GGML_UNUSED(ctx); ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); + if (!buffer || apir_decoder_get_fatal(dec)) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__); + return 1; + } + uintptr_t base = (uintptr_t) buffer->iface.get_base(buffer); apir_encode_uintptr_t(enc, &base); @@ -24,6 +39,11 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); + if (!buffer || apir_decoder_get_fatal(dec)) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__); + return 1; + } + ggml_tensor * tensor; // safe to remove the const qualifier here tensor = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec); @@ -37,6 +57,10 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl size_t size; apir_decode_size_t(dec, &size); + if (validate_buffer_operation(offset, size, __func__) != 0) { + return 1; + } + void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); if (!shmem_data) { @@ -56,6 +80,11 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); + if (!buffer || apir_decoder_get_fatal(dec)) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__); + return 1; + } + const ggml_tensor * tensor; // safe to remove the const qualifier here tensor = apir_decode_ggml_tensor(dec); @@ -69,6 +98,10 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl size_t size; apir_decode_size_t(dec, &size); + if (validate_buffer_operation(offset, size, __func__) != 0) { + return 1; + } + void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); if (!shmem_data) { GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__); @@ -86,6 +119,11 @@ uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); + if (!buffer || apir_decoder_get_fatal(dec)) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__); + return 1; + } + const ggml_tensor * src; // safe to remove the const qualifier here src = apir_decode_ggml_tensor(dec); @@ -105,6 +143,11 @@ uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); + if (!buffer || apir_decoder_get_fatal(dec)) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__); + return 1; + } + uint8_t value; apir_decode_uint8_t(dec, &value); @@ -120,6 +163,11 @@ uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virg ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); + if (!buffer || apir_decoder_get_fatal(dec)) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__); + return 1; + } + if (!apir_untrack_backend_buffer(buffer)) { GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: unknown buffer %p\n", __func__, (void *) buffer); return 1; diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp index 64152eef0d..c80e4aabe1 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp @@ -1,6 +1,6 @@ #include "backend-dispatched.h" -#include "backend-virgl-apir.h" +#include "backend-virgl-apir.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" #include "ggml-impl.h" @@ -28,19 +28,24 @@ uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) { return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED; } - if (!reg->iface.get_device_count(reg)) { - GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device found\n", __func__); + size_t device_count = reg->iface.get_device_count(reg); + if (!device_count) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: no device found\n", __func__); return APIR_BACKEND_INITIALIZE_NO_DEVICE; } dev = reg->iface.get_device(reg, 0); if (!dev) { - GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device received\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: failed to get device\n", __func__); return APIR_BACKEND_INITIALIZE_NO_DEVICE; } bck = dev->iface.init_backend(dev, NULL); + if (!bck) { + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed\n", __func__); + return APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED; + } return APIR_BACKEND_INITIALIZE_SUCCESS; } diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h b/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h index 481d7f3150..3dc334e4ce 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h @@ -32,64 +32,6 @@ uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virg /* backend */ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); -static inline const char * backend_dispatch_command_name(ApirBackendCommandType type) { - switch (type) { - /* device */ - case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT: - return "backend_device_get_device_count"; - case APIR_COMMAND_TYPE_DEVICE_GET_COUNT: - return "backend_device_get_count"; - case APIR_COMMAND_TYPE_DEVICE_GET_NAME: - return "backend_device_get_name"; - case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION: - return "backend_device_get_description"; - case APIR_COMMAND_TYPE_DEVICE_GET_TYPE: - return "backend_device_get_type"; - case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY: - return "backend_device_get_memory"; - case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP: - return "backend_device_supports_op"; - case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE: - return "backend_device_get_buffer_type"; - case APIR_COMMAND_TYPE_DEVICE_GET_PROPS: - return "backend_device_get_props"; - case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR: - return "backend_device_buffer_from_ptr"; - /* buffer-type */ - case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME: - return "backend_buffer_type_get_name"; - case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT: - return "backend_buffer_type_get_alignment"; - case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE: - return "backend_buffer_type_get_max_size"; - case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST: - return "backend_buffer_type_is_host (DEPRECATED)"; - case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER: - return "backend_buffer_type_alloc_buffer"; - case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE: - return "backend_buffer_type_get_alloc_size"; - /* buffer */ - case APIR_COMMAND_TYPE_BUFFER_GET_BASE: - return "backend_buffer_get_base"; - case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR: - return "backend_buffer_set_tensor"; - case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR: - return "backend_buffer_get_tensor"; - case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR: - return "backend_buffer_cpy_tensor"; - case APIR_COMMAND_TYPE_BUFFER_CLEAR: - return "backend_buffer_clear"; - case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER: - return "backend_buffer_free_buffer"; - /* backend */ - case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE: - return "backend_backend_graph_compute"; - - default: - return "unknown"; - } -} - extern "C" { static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = { diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched.h b/ggml/src/ggml-virtgpu/backend/backend-dispatched.h index 10311631d4..740ee9e3ff 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched.h @@ -1,5 +1,6 @@ #pragma once +// clang-format off #include #include @@ -10,6 +11,7 @@ #include "shared/apir_backend.h" #include "shared/apir_cs.h" #include "shared/apir_cs_ggml.h" +// clang-format on #define GGML_VIRTGPU_BCK "ggml-virtgpu-backend: " diff --git a/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h b/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h index 44b347f853..c65a01cdf9 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +++ b/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h @@ -19,7 +19,7 @@ struct virgl_apir_callbacks { }; extern "C" { -ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs); +ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks * virgl_cbs); void apir_backend_deinit(uint32_t virgl_ctx_id); uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id, virgl_apir_callbacks * virgl_cbs, diff --git a/ggml/src/ggml-virtgpu/backend/backend.cpp b/ggml/src/ggml-virtgpu/backend/backend.cpp index d93414a078..535a05f3e6 100644 --- a/ggml/src/ggml-virtgpu/backend/backend.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend.cpp @@ -1,6 +1,5 @@ #include "backend-dispatched.h" #include "backend-virgl-apir.h" - #include "shared/api_remoting.h" #include "shared/apir_backend.h" #include "shared/apir_cs.h" @@ -17,10 +16,10 @@ #define GGML_DEFAULT_BACKEND_REG "ggml_backend_init" static void * backend_library_handle = NULL; -static FILE * apir_logfile = NULL; +static FILE * apir_logfile = NULL; static void log_to_file_callback(enum ggml_log_level level, const char * text, void * user_data) { - FILE * logfile = (FILE *)user_data; + FILE * logfile = (FILE *) user_data; fprintf(logfile, "[%d] %s", level, text); fflush(logfile); } @@ -48,9 +47,9 @@ void apir_backend_deinit(uint32_t virgl_ctx_id) { } #define APIR_GGML_LIBRARY_PATH_KEY "ggml.library.path" -#define APIR_GGML_LIBRARY_REG_KEY "ggml.library.reg" +#define APIR_GGML_LIBRARY_REG_KEY "ggml.library.reg" -ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs) { +ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks * virgl_cbs) { const char * dlsym_error; const char * apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV); @@ -63,15 +62,13 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct } } - const char * library_name = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_PATH_KEY); + const char * library_name = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_PATH_KEY); const char * virgl_library_reg = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_REG_KEY); - const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG; + const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG; if (!library_name) { - GGML_LOG_ERROR(GGML_VIRTGPU_BCK - "%s: cannot open the GGML library: env var '%s' not defined\n", - __func__, APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV); - + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot open the GGML library: env var '%s' not defined\n", __func__, + APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV); return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } @@ -79,16 +76,14 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct backend_library_handle = dlopen(library_name, RTLD_LAZY); if (!backend_library_handle) { - GGML_LOG_ERROR(GGML_VIRTGPU_BCK - "%s: cannot open the GGML library: %s\n", __func__, dlerror()); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot open the GGML library: %s\n", __func__, dlerror()); return APIR_LOAD_LIBRARY_CANNOT_OPEN; } if (!library_reg) { - GGML_LOG_ERROR(GGML_VIRTGPU_BCK - "%s: cannot register the GGML library: env var '%s' not defined\n", - __func__, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot register the GGML library: env var '%s' not defined\n", __func__, + APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV); return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } @@ -96,11 +91,9 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg); dlsym_error = dlerror(); if (dlsym_error) { - GGML_LOG_ERROR(GGML_VIRTGPU_BCK - "%s: cannot find the GGML backend registration symbol '%s' (from %s): %s\n", + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot find the GGML backend registration symbol '%s' (from %s): %s\n", __func__, library_reg, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error); - return APIR_LOAD_LIBRARY_SYMBOL_MISSING; } @@ -132,13 +125,12 @@ uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id, virgl_apir_context ctx = { .ctx_id = virgl_ctx_id, - .iface = virgl_cbs, + .iface = virgl_cbs, }; if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) { - GGML_LOG_ERROR(GGML_VIRTGPU_BCK - "%s: Received an invalid dispatch index (%d >= %d)\n", - __func__, cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Received an invalid dispatch index (%d >= %d)\n", __func__, cmd_type, + APIR_BACKEND_DISPATCH_TABLE_COUNT); return APIR_BACKEND_FORWARD_INDEX_INVALID; } diff --git a/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h b/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h index f19a5d12d1..6bf97e8a3a 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +++ b/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h @@ -16,28 +16,32 @@ enum ApirCommandType { APIR_COMMAND_TYPE_LOADLIBRARY = 1, APIR_COMMAND_TYPE_FORWARD = 2, - APIR_COMMAND_TYPE_LENGTH = 3, + APIR_COMMAND_TYPE_LENGTH = 3, }; typedef uint64_t ApirCommandFlags; enum ApirLoadLibraryReturnCode { APIR_LOAD_LIBRARY_SUCCESS = 0, + // these error codes are returned by the Virglrenderer APIR component APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR = 1, APIR_LOAD_LIBRARY_ALREADY_LOADED = 2, APIR_LOAD_LIBRARY_ENV_VAR_MISSING = 3, APIR_LOAD_LIBRARY_CANNOT_OPEN = 4, APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5, - APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code + // any value greater than this is an APIR *backend library* initialization return code + APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, }; enum ApirForwardReturnCode { - APIR_FORWARD_SUCCESS = 0, - APIR_FORWARD_NO_DISPATCH_FCT = 1, - APIR_FORWARD_TIMEOUT = 2, - - APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code -} ; + APIR_FORWARD_SUCCESS = 0, + // these error codes are returned by the Virglrenderer APIR component + APIR_FORWARD_NO_DISPATCH_FCT = 1, + APIR_FORWARD_TIMEOUT = 2, + APIR_FORWARD_FAILED_TO_SYNC_STREAMS = 3, + // any value greater than this index an APIR *backend library* forward return code + APIR_FORWARD_BASE_INDEX = 4, +}; __attribute__((unused)) static inline const char * apir_command_name(ApirCommandType type) { switch (type) { @@ -82,6 +86,7 @@ __attribute__((unused)) static const char * apir_forward_error(ApirForwardReturn APIR_FORWARD_ERROR(APIR_FORWARD_SUCCESS); APIR_FORWARD_ERROR(APIR_FORWARD_NO_DISPATCH_FCT); APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT); + APIR_FORWARD_ERROR(APIR_FORWARD_FAILED_TO_SYNC_STREAMS); APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX); return "Unknown APIR_COMMAND_TYPE_FORWARD error"; diff --git a/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h b/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h index d214b6f2a9..520ac9c729 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +++ b/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h @@ -34,3 +34,61 @@ typedef enum ApirBackendCommandType { // last command_type index + 1 APIR_BACKEND_DISPATCH_TABLE_COUNT = 23, } ApirBackendCommandType; + +static inline const char * apir_dispatch_command_name(ApirBackendCommandType type) { + switch (type) { + /* device */ + case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT: + return "device_get_device_count"; + case APIR_COMMAND_TYPE_DEVICE_GET_COUNT: + return "device_get_count"; + case APIR_COMMAND_TYPE_DEVICE_GET_NAME: + return "device_get_name"; + case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION: + return "device_get_description"; + case APIR_COMMAND_TYPE_DEVICE_GET_TYPE: + return "device_get_type"; + case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY: + return "device_get_memory"; + case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP: + return "device_supports_op"; + case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE: + return "device_get_buffer_type"; + case APIR_COMMAND_TYPE_DEVICE_GET_PROPS: + return "device_get_props"; + case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR: + return "device_buffer_from_ptr"; + /* buffer-type */ + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME: + return "buffer_type_get_name"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT: + return "buffer_type_get_alignment"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE: + return "buffer_type_get_max_size"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST: + return "buffer_type_is_host"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER: + return "buffer_type_alloc_buffer"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE: + return "buffer_type_get_alloc_size"; + /* buffer */ + case APIR_COMMAND_TYPE_BUFFER_GET_BASE: + return "buffer_get_base"; + case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR: + return "buffer_set_tensor"; + case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR: + return "buffer_get_tensor"; + case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR: + return "buffer_cpy_tensor"; + case APIR_COMMAND_TYPE_BUFFER_CLEAR: + return "buffer_clear"; + case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER: + return "buffer_free_buffer"; + /* backend */ + case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE: + return "backend_graph_compute"; + + default: + return "unknown"; + } +} diff --git a/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h b/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h index f3efa52c72..da1e21b5b2 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +++ b/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h @@ -14,7 +14,7 @@ #define APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED 6 #define APIR_BACKEND_INITIALIZE_ALREADY_INITED 7 #define APIR_BACKEND_INITIALIZE_NO_DEVICE 8 - +#define APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED 9 // new entries here need to be added to the apir_backend_initialize_error function below @@ -39,6 +39,10 @@ static const char * apir_backend_initialize_error(int code) { APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS); APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS); APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_FAILED); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_ALREADY_INITED); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_NO_DEVICE); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED); return "Unknown APIR_BACKEND_INITIALIZE error:/"; diff --git a/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h index 1bc3a5f685..64bf2ec960 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +++ b/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h @@ -13,7 +13,6 @@ struct apir_encoder { const char * start; const char * end; bool fatal; - }; struct apir_decoder { @@ -28,8 +27,8 @@ struct apir_decoder { static apir_decoder apir_new_decoder(const char * ptr, size_t size) { apir_decoder dec = { - .cur = ptr, - .end = ptr + size, + .cur = ptr, + .end = ptr + size, .fatal = false, }; @@ -79,10 +78,7 @@ static inline bool apir_decoder_get_fatal(const apir_decoder * dec) { * encode peek */ -static inline bool apir_decoder_peek_internal(apir_decoder * dec, - size_t size, - void * val, - size_t val_size) { +static inline bool apir_decoder_peek_internal(apir_decoder * dec, size_t size, void * val, size_t val_size) { assert(val_size <= size); if (unlikely(size > (size_t) (dec->end - dec->cur))) { @@ -332,8 +328,7 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t static inline void * apir_decoder_alloc_array(size_t size, size_t count) { size_t alloc_size; if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) { - GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n", - __func__, size, count); + GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n", __func__, size, count); return NULL; } @@ -352,20 +347,19 @@ static inline void apir_decode_bool_t(apir_decoder * dec, bool * val) { /* apir_buffer_type_host_handle_t */ -static inline void apir_encode_apir_buffer_type_host_handle_t(apir_encoder * enc, +static inline void apir_encode_apir_buffer_type_host_handle_t(apir_encoder * enc, const apir_buffer_type_host_handle_t * val) { apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); } -static inline void apir_decode_apir_buffer_type_host_handle_t(apir_decoder * dec, +static inline void apir_decode_apir_buffer_type_host_handle_t(apir_decoder * dec, apir_buffer_type_host_handle_t * val) { apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); } /* apir_buffer_host_handle_t */ -static inline void apir_encode_apir_buffer_host_handle_t(apir_encoder * enc, - const apir_buffer_host_handle_t * val) { +static inline void apir_encode_apir_buffer_host_handle_t(apir_encoder * enc, const apir_buffer_host_handle_t * val) { apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t)); } diff --git a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h index 289f4b77d7..fabe3e401c 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +++ b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h @@ -1,11 +1,10 @@ -#include "ggml-impl.h" #include "apir_cs.h" #include "apir_cs_rpc.h" +#include "ggml-impl.h" // ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer); -static inline void apir_encode_ggml_buffer_host_handle(apir_encoder * enc, - const apir_buffer_host_handle_t * handle); +static inline void apir_encode_ggml_buffer_host_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle); static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec); @@ -22,8 +21,7 @@ static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_inplace(apir_decoder return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); } -static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec, - uint32_t n_tensors) { +static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec, uint32_t n_tensors) { size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors; return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); @@ -45,9 +43,9 @@ static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) { } ggml_init_params params{ - /*.mem_size =*/ ggml_tensor_overhead(), - /*.mem_buffer =*/ NULL, - /*.no_alloc =*/ true, + /*.mem_size =*/ggml_tensor_overhead(), + /*.mem_buffer =*/NULL, + /*.no_alloc =*/true, }; ggml_context * ctx = ggml_init(params); @@ -105,6 +103,19 @@ static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec) apir_decoder_read(dec, buffer_ptr_size, &buffer, buffer_ptr_size); + // SECURITY: Validate buffer handle against tracked buffers to prevent + // guest VM from providing arbitrary host memory addresses + if (buffer) { + extern std::unordered_set backend_buffers; + if (backend_buffers.find(buffer) == backend_buffers.end()) { + GGML_LOG_WARN("ggml-virtgpu-backend: %s: Invalid buffer handle from guest: %p\n", __func__, + (void *) buffer); + // Set fatal flag to prevent further processing with invalid handle + apir_decoder_set_fatal(dec); + return NULL; + } + } + return buffer; } diff --git a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h index f681798952..4cb2f047d1 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +++ b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h @@ -1,3 +1,6 @@ +#pragma once + +// clang-format off #include "ggml.h" #include "ggml-backend-impl.h" @@ -5,6 +8,7 @@ #include #include #include +// clang-format on // ggml_tensor is serialized into apir_rpc_tensor struct apir_rpc_tensor { diff --git a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp index c493a8e2ae..8fa20ff43b 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp @@ -34,6 +34,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) { virtgpu * gpu = BUFT_TO_GPU(buft); + // Return the prefixed name that was built once during initialization return gpu->cached_buffer_type.name; } @@ -53,9 +54,8 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff const ggml_tensor * tensor) { virtgpu * gpu = BUFT_TO_GPU(buft); - if (tensor->buffer == NULL - || !tensor->buffer->context - || !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) { + if (tensor->buffer == NULL || !tensor->buffer->context || + !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) { return ggml_nbytes(tensor); } diff --git a/ggml/src/ggml-virtgpu/ggml-backend-device.cpp b/ggml/src/ggml-virtgpu/ggml-backend-device.cpp index c7d2881058..ec8156bb86 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-device.cpp @@ -3,6 +3,7 @@ static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); + // Return the prefixed name that was built once during initialization return gpu->cached_device_info.name; } @@ -22,7 +23,7 @@ static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_bac static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { virtgpu * gpu = DEV_TO_GPU(dev); - *free = gpu->cached_device_info.memory_free; + *free = gpu->cached_device_info.memory_free; *total = gpu->cached_device_info.memory_total; } @@ -72,7 +73,7 @@ static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - static std::atomic initialized = false; + static std::atomic initialized = false; static ggml_backend_buffer_type buft; if (!initialized) { @@ -95,7 +96,7 @@ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_bac static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - static std::atomic initialized = false; + static std::atomic initialized = false; static ggml_backend_buffer_type buft; if (!initialized) { diff --git a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp index 2d02cfec1d..a4df5956aa 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp @@ -7,8 +7,8 @@ void ggml_virtgpu_cleanup(virtgpu * gpu); static virtgpu * apir_initialize() { - static virtgpu * gpu = NULL; - static std::atomic initialized = false; + static virtgpu * gpu = NULL; + static std::atomic initialized = false; if (initialized) { // fast track @@ -31,29 +31,53 @@ static virtgpu * apir_initialize() { } // Pre-fetch and cache all device information, it will not change - gpu->cached_device_info.description = apir_device_get_description(gpu); + gpu->cached_device_info.description = apir_device_get_description(gpu); if (!gpu->cached_device_info.description) { GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device description", __func__); } - gpu->cached_device_info.name = apir_device_get_name(gpu); - if (!gpu->cached_device_info.name) { - GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device name", __func__); - } gpu->cached_device_info.device_count = apir_device_get_count(gpu); gpu->cached_device_info.type = apir_device_get_type(gpu); - apir_device_get_memory(gpu, - &gpu->cached_device_info.memory_free, - &gpu->cached_device_info.memory_total); + { + // Get the remote name and create prefixed version + char * rmt_device_name = apir_device_get_name(gpu); + if (!rmt_device_name) { + GGML_ABORT(GGML_VIRTGPU "%s: failed to get the virtgpu device name", __func__); + } + + size_t device_name_len = strlen(rmt_device_name) + 11; // "[virtgpu] " + null terminator + gpu->cached_device_info.name = (char *) malloc(device_name_len); + if (!gpu->cached_device_info.name) { + free(rmt_device_name); + GGML_ABORT(GGML_VIRTGPU "%s: failed to allocate memory for prefixed device name", __func__); + } + snprintf(gpu->cached_device_info.name, device_name_len, "[virtgpu] %s", rmt_device_name); + free(rmt_device_name); + } + + apir_device_get_memory(gpu, &gpu->cached_device_info.memory_free, &gpu->cached_device_info.memory_total); apir_buffer_type_host_handle_t buft_host_handle = apir_device_get_buffer_type(gpu); gpu->cached_buffer_type.host_handle = buft_host_handle; - gpu->cached_buffer_type.name = apir_buffer_type_get_name(gpu, buft_host_handle); - if (!gpu->cached_buffer_type.name) { - GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu buffer type name", __func__); + { + // Get the remote name and create prefixed version + char * rmt_name = apir_buffer_type_get_name(gpu, buft_host_handle); + if (!rmt_name) { + GGML_ABORT(GGML_VIRTGPU "%s: failed to get the virtgpu buffer type name", __func__); + } + + size_t prefixed_len = strlen(rmt_name) + 11; // "[virtgpu] " + null terminator + gpu->cached_buffer_type.name = (char *) malloc(prefixed_len); + if (!gpu->cached_buffer_type.name) { + free(rmt_name); + GGML_ABORT(GGML_VIRTGPU "%s: failed to allocate memory for prefixed buffer type name", __func__); + } + snprintf(gpu->cached_buffer_type.name, prefixed_len, "[virtgpu] %s", rmt_name); + free(rmt_name); } - gpu->cached_buffer_type.alignment = apir_buffer_type_get_alignment(gpu, buft_host_handle); - gpu->cached_buffer_type.max_size = apir_buffer_type_get_max_size(gpu, buft_host_handle); + + gpu->cached_buffer_type.alignment = apir_buffer_type_get_alignment(gpu, buft_host_handle); + gpu->cached_buffer_type.max_size = apir_buffer_type_get_max_size(gpu, buft_host_handle); initialized = true; } @@ -98,7 +122,7 @@ static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) { static std::atomic initialized = false; if (initialized) { - return; // fast track + return; // fast track } { diff --git a/ggml/src/ggml-virtgpu/ggml-backend.cpp b/ggml/src/ggml-virtgpu/ggml-backend.cpp index 5cd6c0c060..a63ee2b9d2 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend.cpp @@ -1,5 +1,5 @@ -#include "ggml-remoting.h" #include "../../include/ggml-virtgpu.h" +#include "ggml-remoting.h" static const char * ggml_backend_remoting_get_name(ggml_backend_t backend) { UNUSED(backend); diff --git a/ggml/src/ggml-virtgpu/ggml-remoting.h b/ggml/src/ggml-virtgpu/ggml-remoting.h index 0876640867..4f70326bee 100644 --- a/ggml/src/ggml-virtgpu/ggml-remoting.h +++ b/ggml/src/ggml-virtgpu/ggml-remoting.h @@ -9,7 +9,7 @@ #include #define GGML_VIRTGPU_NAME "ggml-virtgpu" -#define GGML_VIRTGPU "ggml-virtgpu: " +#define GGML_VIRTGPU "ggml-virtgpu: " // USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes diff --git a/ggml/src/ggml-virtgpu/include/apir_hw.h b/ggml/src/ggml-virtgpu/include/apir_hw.h index 33af045ca2..7d6ea2265d 100644 --- a/ggml/src/ggml-virtgpu/include/apir_hw.h +++ b/ggml/src/ggml-virtgpu/include/apir_hw.h @@ -3,7 +3,7 @@ #include struct virgl_renderer_capset_apir { - uint32_t apir_version; - uint32_t supports_blob_resources; - uint32_t reserved[4]; // For future expansion + uint32_t apir_version; + uint32_t supports_blob_resources; + uint32_t reserved[4]; // For future expansion }; diff --git a/ggml/src/ggml-virtgpu/regenerate_remoting.py b/ggml/src/ggml-virtgpu/regenerate_remoting.py index aeb48a4087..dae75fd1c8 100755 --- a/ggml/src/ggml-virtgpu/regenerate_remoting.py +++ b/ggml/src/ggml-virtgpu/regenerate_remoting.py @@ -145,8 +145,31 @@ class RemotingCodebaseGenerator: enum_lines.append(f" APIR_BACKEND_DISPATCH_TABLE_COUNT = {total_count},") enum_lines.append("} ApirBackendCommandType;") + # Generate function name mapping + func_lines = [] + func_lines.append("static inline const char * apir_dispatch_command_name(ApirBackendCommandType type) {") + func_lines.append(" switch (type) {") + + current_group = None + for func in functions: + # Add comment for new group + if func['group_name'] != current_group: + func_lines.append(f" /* {func['group_description']} */") + current_group = func['group_name'] + + # Generate clean function name without backend_ prefix + clean_name = f"{func['group_name']}_{func['function_name']}" + func_lines.append(f" case {func['enum_name']}:") + func_lines.append(f" return \"{clean_name}\";") + + func_lines.append("") + func_lines.append(" default:") + func_lines.append(" return \"unknown\";") + func_lines.append(" }") + func_lines.append("}") + # Full header template - header_content = NL.join(enum_lines) + "\n" + header_content = NL.join(enum_lines) + "\n\n" + NL.join(func_lines) + "\n" return header_content @@ -170,19 +193,6 @@ class RemotingCodebaseGenerator: decl_lines.append(f"{signature} {func['backend_function']}({params});") - # Switch cases - switch_lines = [] - current_group = None - - for func in functions: - if func['group_name'] != current_group: - switch_lines.append(f" /* {func['group_description']} */") - current_group = func['group_name'] - - deprecated = " (DEPRECATED)" if func['deprecated'] else "" - - switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}{deprecated}\";") - # Dispatch table table_lines = [] current_group = None @@ -201,15 +211,6 @@ class RemotingCodebaseGenerator: {NL.join(decl_lines)} -static inline const char *backend_dispatch_command_name(ApirBackendCommandType type) -{{ - switch (type) {{ -{NL.join(switch_lines)} - - default: return "unknown"; - }} -}} - extern "C" {{ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {{ {NL.join(table_lines)} diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp index 07d9a66849..4593690c63 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp @@ -17,8 +17,8 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data); virtgpu_shmem temp_shmem; // Local storage for large buffers - virtgpu_shmem * shmem = &temp_shmem; - bool using_shared_shmem = false; + virtgpu_shmem * shmem = &temp_shmem; + bool using_shared_shmem = false; if (cgraph_size <= gpu->data_shmem.mmap_size) { // Lock mutex before using shared data_shmem buffer @@ -26,7 +26,7 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__); } using_shared_shmem = true; - shmem = &gpu->data_shmem; + shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) { GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__); } diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp index cab74fd170..38f8ec945e 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp @@ -62,7 +62,9 @@ size_t apir_buffer_type_get_max_size(virtgpu * gpu, apir_buffer_type_host_handle return max_size; } -apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, size_t size) { +apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, + apir_buffer_type_host_handle_t host_handle, + size_t size) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; @@ -84,7 +86,9 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_t return buffer_context; } -size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, const ggml_tensor * op) { +size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, + apir_buffer_type_host_handle_t host_handle, + const ggml_tensor * op) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp index 86eee358cf..228284f4a4 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp @@ -35,8 +35,8 @@ void apir_buffer_set_tensor(virtgpu * gpu, apir_encode_ggml_tensor(encoder, tensor); virtgpu_shmem temp_shmem; // Local storage for large buffers - virtgpu_shmem * shmem = &temp_shmem; - bool using_shared_shmem = false; + virtgpu_shmem * shmem = &temp_shmem; + bool using_shared_shmem = false; if (size <= gpu->data_shmem.mmap_size) { // Lock mutex before using shared data_shmem buffer @@ -44,7 +44,7 @@ void apir_buffer_set_tensor(virtgpu * gpu, GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__); } using_shared_shmem = true; - shmem = &gpu->data_shmem; + shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__); @@ -86,8 +86,8 @@ void apir_buffer_get_tensor(virtgpu * gpu, apir_encode_ggml_tensor(encoder, tensor); virtgpu_shmem temp_shmem; // Local storage for large buffers - virtgpu_shmem * shmem = &temp_shmem; - bool using_shared_shmem = false; + virtgpu_shmem * shmem = &temp_shmem; + bool using_shared_shmem = false; if (size <= gpu->data_shmem.mmap_size) { // Lock mutex before using shared data_shmem buffer @@ -95,7 +95,7 @@ void apir_buffer_get_tensor(virtgpu * gpu, GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__); } using_shared_shmem = true; - shmem = &gpu->data_shmem; + shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__); diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp index 4b6b8f527b..9f513c138d 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp @@ -26,7 +26,7 @@ char * apir_device_get_name(virtgpu * gpu) { REMOTE_CALL(gpu, encoder, decoder, ret); const size_t string_size = apir_decode_array_size_unchecked(decoder); - char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); + char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__); return NULL; @@ -173,7 +173,7 @@ apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, si REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR); if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) { - GGML_ABORT(GGML_VIRTGPU "Couldn't allocate the guest-host shared buffer"); + GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate %ldb of guest-host shared buffer", __func__, size); } apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id); diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h b/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h index f23c75bb96..4d0b6e05c7 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h @@ -1,29 +1,36 @@ -#include "virtgpu.h" +#pragma once +// clang-format off +#include "virtgpu.h" #include "ggml-remoting.h" #include "backend/shared/apir_backend.h" #include "backend/shared/apir_cs_ggml.h" - #include "ggml-backend-impl.h" +// clang-format on -#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ - do { \ - int32_t forward_flag = (int32_t) apir_command_type__; \ - encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, forward_flag); \ - if (!encoder_name) { \ - GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__); \ - } \ +#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ + int32_t REMOTE_CALL_PREPARE_forward_flag = (int32_t) apir_command_type__; \ + const char * REMOTE_CALL_PREPARE_command_name = apir_dispatch_command_name(apir_command_type__); \ + do { \ + encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, REMOTE_CALL_PREPARE_forward_flag); \ + if (!encoder_name) { \ + GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__); \ + } \ } while (0) -#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \ - do { \ - ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ - if (!decoder_name) { \ - GGML_ABORT(GGML_VIRTGPU "%s: failed to kick the remote call", __func__); \ - } \ - if (ret_name < APIR_FORWARD_BASE_INDEX) { \ - GGML_ABORT(GGML_VIRTGPU "%s: failed to forward the API call: %s: code %d", __func__, \ - apir_forward_error(ret_name), ret_name); \ - } \ - ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ +#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \ + do { \ + ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ + if (!decoder_name) { \ + GGML_ABORT(GGML_VIRTGPU "%s: failed to kick the remote call", __func__); \ + } \ + if (ret_name < APIR_FORWARD_BASE_INDEX) { \ + GGML_ABORT(GGML_VIRTGPU "%s: failed to forward the API call: %s: code %d", __func__, \ + apir_forward_error(ret_name), ret_name); \ + } \ + ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ + if (ret_name != 0) { \ + GGML_ABORT(GGML_VIRTGPU "backend function '%s' failed (return code: %d)", \ + REMOTE_CALL_PREPARE_command_name, ret_name); \ + } \ } while (0) diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h index fe4cae2025..44b0ad1ffa 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +++ b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h @@ -20,6 +20,7 @@ apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, char * apir_buffer_type_get_name(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); +/* apir_buffer_type_is_host is deprecated. */ apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, size_t size); diff --git a/ggml/src/ggml-virtgpu/virtgpu.cpp b/ggml/src/ggml-virtgpu/virtgpu.cpp index 1e650dc65b..a84a77399d 100644 --- a/ggml/src/ggml-virtgpu/virtgpu.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu.cpp @@ -53,9 +53,9 @@ static int virtgpu_handshake(virtgpu * gpu) { if (!decoder) { GGML_ABORT(GGML_VIRTGPU - "%s: failed to initiate the communication with the virglrenderer library. " - "Most likely, the wrong virglrenderer library was loaded in the hypervisor.", - __func__); + "%s: failed to initiate the communication with the virglrenderer library. " + "Most likely, the wrong virglrenderer library was loaded in the hypervisor.", + __func__); return 1; } @@ -65,8 +65,7 @@ static int virtgpu_handshake(virtgpu * gpu) { uint32_t host_minor; if (ret_magic != APIR_HANDSHAKE_MAGIC) { - GGML_ABORT(GGML_VIRTGPU - "%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic, + GGML_ABORT(GGML_VIRTGPU "%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic, apir_backend_initialize_error(ret_magic)); } else { apir_decode_uint32_t(decoder, &host_major); @@ -140,15 +139,13 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { "Make sure virglrenderer is correctly configured by the hypervisor. (%s) ", __func__, apir_load_library_error(ret)); } else { - GGML_ABORT(GGML_VIRTGPU - "%s: virglrenderer could not load the API Remoting backend library. (%s - code %d)", __func__, - apir_load_library_error(ret), ret); + GGML_ABORT(GGML_VIRTGPU "%s: virglrenderer could not load the API Remoting backend library. (%s - code %d)", + __func__, apir_load_library_error(ret), ret); } return ret; } - GGML_LOG_INFO(GGML_VIRTGPU - "%s: virglrenderer successfully loaded the API Remoting backend library.\n", __func__); + GGML_LOG_INFO(GGML_VIRTGPU "%s: virglrenderer successfully loaded the API Remoting backend library.\n", __func__); ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX); @@ -158,10 +155,11 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { "Make sure virglrenderer is correctly configured by the hypervisor. (%s)", __func__, apir_load_library_error(apir_ret)); } else if (apir_ret == APIR_LOAD_LIBRARY_SYMBOL_MISSING) { - GGML_ABORT(GGML_VIRTGPU - "%s: the API Remoting backend library couldn't load the GGML backend library, some symbols are missing. " - "Make sure virglrenderer is correctly configured by the hypervisor. (%s)", - __func__, apir_load_library_error(apir_ret)); + GGML_ABORT( + GGML_VIRTGPU + "%s: the API Remoting backend library couldn't load the GGML backend library, some symbols are missing. " + "Make sure virglrenderer is correctly configured by the hypervisor. (%s)", + __func__, apir_load_library_error(apir_ret)); } else if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { GGML_ABORT(GGML_VIRTGPU "%s: the API Remoting backend library couldn't load the GGML backend library: apir code=%d | %s)", @@ -169,8 +167,8 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { } else { uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX; GGML_ABORT(GGML_VIRTGPU - "%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__, - lib_ret); + "%s: the API Remoting backend library failed to initialize its backend library: apir code=%d)", + __func__, lib_ret); } return ret; } @@ -184,55 +182,49 @@ virtgpu * create_virtgpu() { // Initialize mutex to protect shared data_shmem buffer if (mtx_init(&gpu->data_shmem_mutex, mtx_plain) != thrd_success) { delete gpu; - GGML_ABORT(GGML_VIRTGPU - "%s: failed to initialize data_shmem mutex", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize data_shmem mutex", __func__); return NULL; } if (virtgpu_open(gpu) != APIR_SUCCESS) { - GGML_LOG_ERROR(GGML_VIRTGPU - "%s: failed to open the virtgpu device\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to open the virtgpu device\n", __func__); return NULL; } if (virtgpu_init_capset(gpu) != APIR_SUCCESS) { if (gpu->use_apir_capset) { GGML_ABORT(GGML_VIRTGPU - "%s: failed to initialize the virtgpu APIR capset. Make sure that the virglrenderer library supports it.", __func__); + "%s: failed to initialize the virtgpu APIR capset. Make sure that the virglrenderer library " + "supports it.", + __func__); } else { - GGML_ABORT(GGML_VIRTGPU - "%s: failed to initialize the virtgpu Venus capset", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu Venus capset", __func__); } return NULL; } if (virtgpu_init_context(gpu) != APIR_SUCCESS) { - GGML_ABORT(GGML_VIRTGPU - "%s: failed to initialize the GPU context", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the GPU context", __func__); return NULL; } if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) { - GGML_ABORT(GGML_VIRTGPU - "%s: failed to create the shared reply memory pages", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: failed to create the shared reply memory pages", __func__); return NULL; } if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) { - GGML_ABORT(GGML_VIRTGPU - "%s: failed to create the shared data memory pages", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: failed to create the shared data memory pages", __func__); return NULL; } if (virtgpu_handshake(gpu)) { - GGML_ABORT(GGML_VIRTGPU - "%s: failed to handshake with the virglrenderer library", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: failed to handshake with the virglrenderer library", __func__); return NULL; } if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) { - GGML_ABORT(GGML_VIRTGPU - "%s: failed to load the backend library", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: failed to load the backend library", __func__); return NULL; } @@ -243,8 +235,7 @@ static virt_gpu_result_t virtgpu_open(virtgpu * gpu) { drmDevicePtr devs[8]; int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs)); if (count < 0) { - GGML_LOG_ERROR(GGML_VIRTGPU - "%s: failed to enumerate DRM devices\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to enumerate DRM devices\n", __func__); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -266,19 +257,17 @@ static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr d int fd = open(node_path, O_RDWR | O_CLOEXEC); if (fd < 0) { - GGML_ABORT(GGML_VIRTGPU - "%s: failed to open %s", __func__, node_path); + GGML_ABORT(GGML_VIRTGPU "%s: failed to open %s", __func__, node_path); return APIR_ERROR_INITIALIZATION_FAILED; } drmVersionPtr version = drmGetVersion(fd); if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) { if (version) { - GGML_LOG_ERROR(GGML_VIRTGPU - "%s: unknown DRM driver %s version %d\n", __func__, version->name, version->version_major); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: unknown DRM driver %s version %d\n", __func__, version->name, + version->version_major); } else { - GGML_LOG_ERROR(GGML_VIRTGPU - "%s: failed to get DRM driver version\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to get DRM driver version\n", __func__); } if (version) { @@ -322,9 +311,8 @@ static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) { virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data)); if (ret) { - GGML_LOG_ERROR(GGML_VIRTGPU - "%s: failed to get APIR v%d capset: %s\n", - __func__, gpu->capset.version, strerror(errno)); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to get APIR v%d capset: %s\n", __func__, gpu->capset.version, + strerror(errno)); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -547,13 +535,10 @@ static void log_call_duration(long long call_duration_ns, const char * name) { double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds if (call_duration_s > 1) { - GGML_LOG_INFO(GGML_VIRTGPU - "waited %.2fs for the %s host reply...\n", call_duration_s, name); + GGML_LOG_INFO(GGML_VIRTGPU "waited %.2fs for the %s host reply...\n", call_duration_s, name); } else if (call_duration_ms > 1) { - GGML_LOG_INFO(GGML_VIRTGPU - "waited %.2fms for the %s host reply...\n", call_duration_ms, name); + GGML_LOG_INFO(GGML_VIRTGPU "waited %.2fms for the %s host reply...\n", call_duration_ms, name); } else { - GGML_LOG_INFO(GGML_VIRTGPU - "waited %lldns for the %s host reply...\n", call_duration_ns, name); + GGML_LOG_INFO(GGML_VIRTGPU "waited %lldns for the %s host reply...\n", call_duration_ns, name); } } diff --git a/ggml/src/ggml-virtgpu/virtgpu.h b/ggml/src/ggml-virtgpu/virtgpu.h index 68e0f3a376..f82d8fb50b 100644 --- a/ggml/src/ggml-virtgpu/virtgpu.h +++ b/ggml/src/ggml-virtgpu/virtgpu.h @@ -1,5 +1,6 @@ #pragma once +// clang-format off #include "virtgpu-utils.h" #include "virtgpu-shm.h" #include "virtgpu-apir.h" @@ -23,20 +24,21 @@ #include "apir_hw.h" #include #include "venus_hw.h" +// clang-format on #ifndef VIRTGPU_DRM_CAPSET_APIR // Will be defined include/drm/virtgpu_drm.h when // https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1590/diffs // is merged -#define VIRTGPU_DRM_CAPSET_APIR 10 +# define VIRTGPU_DRM_CAPSET_APIR 10 #endif // Mesa/Virlgrenderer Venus internal. Only necessary during the // Venus->APIR transition in Virglrenderer #define VENUS_COMMAND_TYPE_LENGTH 331 -#ifndef VIRTGPU_DRM_CAPSET_VENUS // only available with Linux >= v6.16 -#define VIRTGPU_DRM_CAPSET_VENUS 4 +#ifndef VIRTGPU_DRM_CAPSET_VENUS // only available with Linux >= v6.16 +# define VIRTGPU_DRM_CAPSET_VENUS 4 #endif typedef uint32_t virgl_renderer_capset;