From 015deb90485b9ebfff492e10fe7080a0439202b6 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Wed, 4 Feb 2026 03:46:18 +0100 Subject: [PATCH] ggml-virtgpu: make the code thread safe (#19204) * ggml-virtgpu: regenerate_remoting.py: add the ability to deprecate a function * ggml-virtgpu: deprecate buffer_type is_host remoting not necessary * ggml-virtgpu: stop using static vars as cache The static init isn't thread safe. * ggml-virtgpu: protect the use of the shared memory to transfer data * ggml-virtgpu: make the remote calls thread-safe * ggml-virtgpu: backend: don't continue if couldn't allocate the tensor memory * ggml-virtgpu: add a cleanup function for consistency * ggml-virtgpu: backend: don't crash if buft->iface.get_max_size is missing * fix style and ordering * Remove the static variable in apir_device_get_count * ggml-virtgpu: improve the logging * fix review minor formatting changes --- ggml/include/ggml-virtgpu.h | 2 - .../ggml-virtgpu/apir_cs_ggml-rpc-front.cpp | 2 +- .../backend/backend-dispatched-backend.cpp | 4 +- .../backend-dispatched-buffer-type.cpp | 12 +- .../backend/backend-dispatched-buffer.cpp | 6 +- .../backend/backend-dispatched-device.cpp | 2 +- .../backend/backend-dispatched.cpp | 8 +- .../backend/backend-dispatched.gen.h | 5 +- .../ggml-virtgpu/backend/backend-dispatched.h | 2 + ggml/src/ggml-virtgpu/backend/backend.cpp | 32 ++-- .../src/ggml-virtgpu/backend/shared/apir_cs.h | 11 +- .../backend/shared/apir_cs_ggml.h | 14 +- .../ggml-virtgpu/ggml-backend-buffer-type.cpp | 29 +--- ggml/src/ggml-virtgpu/ggml-backend-device.cpp | 61 ++++--- ggml/src/ggml-virtgpu/ggml-backend-reg.cpp | 94 ++++++++--- ggml/src/ggml-virtgpu/ggml-remoting.h | 5 +- .../ggml-virtgpu/ggmlremoting_functions.yaml | 20 ++- ggml/src/ggml-virtgpu/regenerate_remoting.py | 18 ++- .../ggml-virtgpu/virtgpu-forward-backend.cpp | 14 +- .../virtgpu-forward-buffer-type.cpp | 41 ++--- .../ggml-virtgpu/virtgpu-forward-buffer.cpp | 28 +++- .../ggml-virtgpu/virtgpu-forward-device.cpp | 22 +-- ggml/src/ggml-virtgpu/virtgpu-forward-impl.h | 6 +- ggml/src/ggml-virtgpu/virtgpu-forward.gen.h | 21 +-- ggml/src/ggml-virtgpu/virtgpu-shm.cpp | 3 +- ggml/src/ggml-virtgpu/virtgpu.cpp | 149 ++++++++++++------ ggml/src/ggml-virtgpu/virtgpu.h | 23 +++ 27 files changed, 397 insertions(+), 237 deletions(-) diff --git a/ggml/include/ggml-virtgpu.h b/ggml/include/ggml-virtgpu.h index 1cb4bd7a03..faaba8f246 100644 --- a/ggml/include/ggml-virtgpu.h +++ b/ggml/include/ggml-virtgpu.h @@ -7,8 +7,6 @@ extern "C" { #endif -#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend" - GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg(); #ifdef __cplusplus diff --git a/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp b/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp index f60ae3556c..d2e87330a6 100644 --- a/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +++ b/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp @@ -36,7 +36,7 @@ apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) { result.data = reinterpret_cast(tensor->data); if (tensor->data) { if (!tensor->buffer) { - GGML_ABORT("tensor has data but not buffer"); + GGML_ABORT("%s: tensor has data but not buffer", __func__); } // tensor->data is serialized as an offset to the buffer base address result.data -= reinterpret_cast(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base); diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp index 77b4ee71e1..cc879e51d0 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp @@ -27,7 +27,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v const void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); if (!shmem_data) { - GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__); apir_decoder_set_fatal(dec); return 1; } @@ -45,7 +45,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v if (dev->iface.supports_op(dev, op)) { continue; } - GGML_LOG_ERROR("Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op)); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op)); status = GGML_STATUS_ABORTED; apir_encode_ggml_status(enc, &status); diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp index 8ea1bb4fb4..d55eec2761 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp @@ -36,18 +36,22 @@ uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); - size_t value = buft->iface.get_max_size(buft); + size_t value = SIZE_MAX; + if (buft->iface.get_max_size) { + value = buft->iface.get_max_size(buft); + } + apir_encode_size_t(enc, &value); return 0; } +/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST is deprecated. Keeping the handler for backward compatibility. */ uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { GGML_UNUSED(ctx); - ggml_backend_buffer_type_t buft; - buft = apir_decode_ggml_buffer_type(dec); + GGML_UNUSED(dec); + const bool is_host = false; - bool is_host = buft->iface.is_host(buft); apir_encode_bool_t(enc, &is_host); return 0; diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp index cf81888e98..8cc063ff0a 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp @@ -40,7 +40,7 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); if (!shmem_data) { - GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__); return 1; } @@ -71,7 +71,7 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); if (!shmem_data) { - GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__); return 1; } @@ -121,7 +121,7 @@ uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virg buffer = apir_decode_ggml_buffer(dec); if (!apir_untrack_backend_buffer(buffer)) { - GGML_LOG_WARN("%s: unknown buffer %p\n", __func__, (void *) buffer); + GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: unknown buffer %p\n", __func__, (void *) buffer); return 1; } diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp index 497f737a88..c7acb8b51c 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp @@ -124,7 +124,7 @@ uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, void * shmem_ptr = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); if (!shmem_ptr) { - GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__); apir_decoder_set_fatal(dec); return 1; } diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp index 51d445725f..64152eef0d 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp @@ -17,26 +17,26 @@ uint64_t timer_count = 0; uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) { if (reg != NULL) { - GGML_LOG_WARN("%s: already initialized\n", __func__); + GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: already initialized\n", __func__); return APIR_BACKEND_INITIALIZE_ALREADY_INITED; } ggml_backend_reg_t (*ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p; reg = ggml_backend_reg_fct(); if (reg == NULL) { - GGML_LOG_ERROR("%s: backend registration failed\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend registration failed\n", __func__); return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED; } if (!reg->iface.get_device_count(reg)) { - GGML_LOG_ERROR("%s: backend initialization failed: no device found\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device found\n", __func__); return APIR_BACKEND_INITIALIZE_NO_DEVICE; } dev = reg->iface.get_device(reg, 0); if (!dev) { - GGML_LOG_ERROR("%s: backend initialization failed: no device received\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device received\n", __func__); return APIR_BACKEND_INITIALIZE_NO_DEVICE; } diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h b/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h index b81fd5039b..481d7f3150 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h @@ -16,6 +16,7 @@ uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST is deprecated. Keeping the handler for backward compatibility. */ uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); @@ -62,7 +63,7 @@ static inline const char * backend_dispatch_command_name(ApirBackendCommandType case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE: return "backend_buffer_type_get_max_size"; case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST: - return "backend_buffer_type_is_host"; + return "backend_buffer_type_is_host (DEPRECATED)"; case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER: return "backend_buffer_type_alloc_buffer"; case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE: @@ -110,7 +111,7 @@ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATC /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = */ backend_buffer_type_get_name, /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = */ backend_buffer_type_get_alignment, /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = */ backend_buffer_type_get_max_size, - /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host /* DEPRECATED */, /* APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = */ backend_buffer_type_alloc_buffer, /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = */ backend_buffer_type_get_alloc_size, diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched.h b/ggml/src/ggml-virtgpu/backend/backend-dispatched.h index 6ccbecf078..10311631d4 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched.h @@ -11,6 +11,8 @@ #include "shared/apir_cs.h" #include "shared/apir_cs_ggml.h" +#define GGML_VIRTGPU_BCK "ggml-virtgpu-backend: " + struct virgl_apir_context { uint32_t ctx_id; virgl_apir_callbacks * iface; diff --git a/ggml/src/ggml-virtgpu/backend/backend.cpp b/ggml/src/ggml-virtgpu/backend/backend.cpp index 95d602ed60..d93414a078 100644 --- a/ggml/src/ggml-virtgpu/backend/backend.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend.cpp @@ -35,14 +35,8 @@ void apir_backend_deinit(uint32_t virgl_ctx_id) { buffer->iface.free_buffer(buffer); } - if (dev) { - size_t free, total; - dev->iface.get_memory(dev, &free, &total); - GGML_LOG_INFO("%s: free memory: %ld MB\n", __func__, (size_t) free / 1024 / 1024); - } - if (backend_library_handle) { - GGML_LOG_INFO("%s: The GGML backend library was loaded. Unloading it.\n", __func__); + GGML_LOG_INFO(GGML_VIRTGPU_BCK "The GGML backend library was loaded. Unloading it.\n"); dlclose(backend_library_handle); backend_library_handle = NULL; } @@ -65,7 +59,7 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct if (apir_logfile) { ggml_log_set(log_to_file_callback, apir_logfile); } else { - GGML_LOG_INFO("Could not open the log file at '%s'\n", apir_log_to_file); + GGML_LOG_INFO(GGML_VIRTGPU_BCK "Could not open the log file at '%s'\n", apir_log_to_file); } } @@ -74,7 +68,10 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG; if (!library_name) { - GGML_LOG_ERROR("cannot open the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK + "%s: cannot open the GGML library: env var '%s' not defined\n", + __func__, APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV); + return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } @@ -82,13 +79,16 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct backend_library_handle = dlopen(library_name, RTLD_LAZY); if (!backend_library_handle) { - GGML_LOG_ERROR("cannot open the GGML library: %s\n", dlerror()); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK + "%s: cannot open the GGML library: %s\n", __func__, dlerror()); return APIR_LOAD_LIBRARY_CANNOT_OPEN; } if (!library_reg) { - GGML_LOG_ERROR("cannot register the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK + "%s: cannot register the GGML library: env var '%s' not defined\n", + __func__, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV); return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } @@ -96,8 +96,10 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg); dlsym_error = dlerror(); if (dlsym_error) { - GGML_LOG_ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s\n", library_reg, - APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK + "%s: cannot find the GGML backend registration symbol '%s' (from %s): %s\n", + __func__, library_reg, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error); + return APIR_LOAD_LIBRARY_SYMBOL_MISSING; } @@ -134,7 +136,9 @@ uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id, }; if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) { - GGML_LOG_ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT); + GGML_LOG_ERROR(GGML_VIRTGPU_BCK + "%s: Received an invalid dispatch index (%d >= %d)\n", + __func__, cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT); return APIR_BACKEND_FORWARD_INDEX_INVALID; } diff --git a/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h index 27a61091ff..1bc3a5f685 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +++ b/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h @@ -86,7 +86,7 @@ static inline bool apir_decoder_peek_internal(apir_decoder * dec, assert(val_size <= size); if (unlikely(size > (size_t) (dec->end - dec->cur))) { - GGML_LOG_ERROR("reading too much from the decoder ...\n"); + GGML_LOG_ERROR("%s: reading too much from the decoder ...\n", __func__); apir_decoder_set_fatal(dec); memset(val, 0, val_size); return false; @@ -103,7 +103,7 @@ static inline void apir_decoder_peek(apir_decoder * dec, size_t size, void * val static inline const void * apir_decoder_use_inplace(apir_decoder * dec, size_t size) { if (unlikely(size > (size_t) (dec->end - dec->cur))) { - GGML_LOG_ERROR("reading too much from the decoder ...\n"); + GGML_LOG_ERROR("%s: reading too much from the decoder ...\n", __func__); apir_decoder_set_fatal(dec); return NULL; } @@ -221,7 +221,7 @@ static inline uint64_t apir_decode_array_size(apir_decoder * dec, uint64_t expec uint64_t size; apir_decode_uint64_t(dec, &size); if (size != expected_size) { - GGML_LOG_ERROR("Couldn't decode array from the decoder\n"); + GGML_LOG_ERROR("%s: Couldn't decode array from the decoder\n", __func__); apir_decoder_set_fatal(dec); size = 0; } @@ -322,7 +322,7 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t if (size) { val[size - 1] = '\0'; } else { - GGML_LOG_ERROR("Couldn't decode the blog array\n"); + GGML_LOG_ERROR("%s: Couldn't decode the blog array\n", __func__); apir_decoder_set_fatal(dec); } } @@ -332,7 +332,8 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t static inline void * apir_decoder_alloc_array(size_t size, size_t count) { size_t alloc_size; if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) { - GGML_LOG_ERROR("overflow in array allocation of %zu * %zu bytes\n", size, count); + GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n", + __func__, size, count); return NULL; } diff --git a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h index 070c3b25fb..289f4b77d7 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +++ b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h @@ -39,11 +39,17 @@ static inline void apir_encode_ggml_tensor(apir_encoder * enc, const ggml_tensor static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) { const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec); + + if (!apir_rpc_tensor) { + return NULL; + } + ggml_init_params params{ /*.mem_size =*/ ggml_tensor_overhead(), /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; + ggml_context * ctx = ggml_init(params); const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor); @@ -71,6 +77,10 @@ static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(apir_decod return (ggml_backend_buffer_type_t) handle; } +static inline void apir_encode_apir_buffer_type_host_handle(apir_encoder * enc, apir_buffer_type_host_handle_t handle) { + apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle)); +} + static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) { apir_buffer_type_host_handle_t handle; @@ -154,13 +164,13 @@ static inline void apir_encode_ggml_tensor_inline(apir_encoder * enc, const ggml size_t tensor_size = sizeof(*tensor); if (tensor->extra) { - GGML_ABORT("Cannot pass tensors with extra"); + GGML_ABORT("%s: Cannot pass tensors with extra", __func__); } if (tensor->src[0] && tensor->buffer) { static int first = 1; if (first) { - GGML_LOG_WARN("Cannot pass tensors with src and buffer\n"); + GGML_LOG_WARN("%s: Cannot pass tensors with src and buffer\n", __func__); first = 0; } } diff --git a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp index 7f650659b8..c493a8e2ae 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp @@ -6,7 +6,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); if (!context) { - GGML_ABORT("Couldn't allocate the buffer context ..."); + GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the buffer context ...", __func__); } context->gpu = gpu; @@ -20,7 +20,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml context->base = context->apir_context.shmem.mmap_ptr; context->is_from_ptr = true; } else { - context->apir_context = apir_buffer_type_alloc_buffer(gpu, buft, size); + context->apir_context = apir_buffer_type_alloc_buffer(gpu, gpu->cached_buffer_type.host_handle, size); context->is_from_ptr = false; context->base = NULL; } @@ -34,36 +34,19 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) { virtgpu * gpu = BUFT_TO_GPU(buft); - return apir_buffer_type_get_name(gpu, buft); + return gpu->cached_buffer_type.name; } static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { virtgpu * gpu = BUFT_TO_GPU(buft); - static size_t align = 0; - - if (align == 0) { - align = apir_buffer_type_get_alignment(gpu, buft); - } - - return align; + return gpu->cached_buffer_type.alignment; } static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { virtgpu * gpu = BUFT_TO_GPU(buft); - static size_t max_size = 0; - if (max_size == 0) { - max_size = apir_buffer_type_get_max_size(gpu, buft); - } - - return max_size; -} - -static bool ggml_backend_remoting_buffer_type_is_host(ggml_backend_buffer_type_t buft) { - virtgpu * gpu = BUFT_TO_GPU(buft); - - return apir_buffer_type_is_host(gpu, buft); + return gpu->cached_buffer_type.max_size; } static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, @@ -76,7 +59,7 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff return ggml_nbytes(tensor); } - return apir_buffer_type_get_alloc_size(gpu, buft, tensor); + return apir_buffer_type_get_alloc_size(gpu, gpu->cached_buffer_type.host_handle, tensor); } const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = { diff --git a/ggml/src/ggml-virtgpu/ggml-backend-device.cpp b/ggml/src/ggml-virtgpu/ggml-backend-device.cpp index 579eb99078..c7d2881058 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-device.cpp @@ -3,32 +3,27 @@ static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - return apir_device_get_name(gpu); + return gpu->cached_device_info.name; } static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - return apir_device_get_description(gpu); + // Return the pre-cached description from the virtgpu structure + return gpu->cached_device_info.description; } static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - static enum ggml_backend_dev_type type; - static bool has_type = false; - if (!has_type) { - has_type = true; - type = (enum ggml_backend_dev_type) apir_device_get_type(gpu); - } - - return type; + return (enum ggml_backend_dev_type) gpu->cached_device_info.type; } static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { virtgpu * gpu = DEV_TO_GPU(dev); - return apir_device_get_memory(gpu, free, total); + *free = gpu->cached_device_info.memory_free; + *total = gpu->cached_device_info.memory_total; } static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) { @@ -77,13 +72,22 @@ static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); + static std::atomic initialized = false; + static ggml_backend_buffer_type buft; - static ggml_backend_buffer_type buft{ - /* .iface = */ ggml_backend_remoting_buffer_type_interface, - /* .device = */ dev, - /* .context = */ (void *) ctx, - }; + if (!initialized) { + static std::mutex mutex; + std::lock_guard lock(mutex); + + if (!initialized) { + buft = { + /* .iface = */ ggml_backend_remoting_buffer_type_interface, + /* .device = */ dev, + /* .context = */ (void *) gpu->cached_buffer_type.host_handle, + }; + initialized = true; + } + } return &buft; } @@ -91,13 +95,22 @@ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_bac static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); + static std::atomic initialized = false; + static ggml_backend_buffer_type buft; - static ggml_backend_buffer_type buft{ - /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface, - /* .device = */ dev, - /* .context = */ (void *) ctx, - }; + if (!initialized) { + static std::mutex mutex; + std::lock_guard lock(mutex); + + if (!initialized) { + buft = { + /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface, + /* .device = */ dev, + /* .context = */ (void *) gpu->cached_buffer_type.host_handle, + }; + initialized = true; + } + } return &buft; } @@ -110,7 +123,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_b ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); if (!context) { - GGML_ABORT("Couldn't allocate the buffer context ..."); + GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the buffer context ...", __func__); } context->gpu = gpu; diff --git a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp index c46cf51c02..2d02cfec1d 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp @@ -4,37 +4,70 @@ #include #include +void ggml_virtgpu_cleanup(virtgpu * gpu); + static virtgpu * apir_initialize() { - static virtgpu * apir_gpu_instance = NULL; - static bool apir_initialized = false; + static virtgpu * gpu = NULL; + static std::atomic initialized = false; + + if (initialized) { + // fast track + return gpu; + } { static std::mutex mutex; std::lock_guard lock(mutex); - if (apir_initialized) { - return apir_gpu_instance; + if (initialized) { + // thread safe + return gpu; } - apir_gpu_instance = create_virtgpu(); - if (!apir_gpu_instance) { - GGML_ABORT("failed to initialize the virtgpu"); + gpu = create_virtgpu(); + if (!gpu) { + initialized = true; + return NULL; } - apir_initialized = true; + // Pre-fetch and cache all device information, it will not change + gpu->cached_device_info.description = apir_device_get_description(gpu); + if (!gpu->cached_device_info.description) { + GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device description", __func__); + } + gpu->cached_device_info.name = apir_device_get_name(gpu); + if (!gpu->cached_device_info.name) { + GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device name", __func__); + } + gpu->cached_device_info.device_count = apir_device_get_count(gpu); + gpu->cached_device_info.type = apir_device_get_type(gpu); + + apir_device_get_memory(gpu, + &gpu->cached_device_info.memory_free, + &gpu->cached_device_info.memory_total); + + apir_buffer_type_host_handle_t buft_host_handle = apir_device_get_buffer_type(gpu); + gpu->cached_buffer_type.host_handle = buft_host_handle; + gpu->cached_buffer_type.name = apir_buffer_type_get_name(gpu, buft_host_handle); + if (!gpu->cached_buffer_type.name) { + GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu buffer type name", __func__); + } + gpu->cached_buffer_type.alignment = apir_buffer_type_get_alignment(gpu, buft_host_handle); + gpu->cached_buffer_type.max_size = apir_buffer_type_get_max_size(gpu, buft_host_handle); + + initialized = true; } - return apir_gpu_instance; + return gpu; } static int ggml_backend_remoting_get_device_count() { virtgpu * gpu = apir_initialize(); if (!gpu) { - GGML_LOG_WARN("apir_initialize failed\n"); return 0; } - return apir_device_get_count(gpu); + return gpu->cached_device_info.device_count; } static size_t ggml_backend_remoting_reg_get_device_count(ggml_backend_reg_t reg) { @@ -52,17 +85,21 @@ ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device) { static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) { if (devices.size() > 0) { - GGML_LOG_INFO("%s: already initialized\n", __func__); + GGML_LOG_INFO(GGML_VIRTGPU "%s: already initialized\n", __func__); return; } virtgpu * gpu = apir_initialize(); if (!gpu) { - GGML_LOG_ERROR("apir_initialize failed\n"); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: apir_initialize failed\n", __func__); return; } - static bool initialized = false; + static std::atomic initialized = false; + + if (initialized) { + return; // fast track + } { static std::mutex mutex; @@ -70,10 +107,10 @@ static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) { if (!initialized) { for (int i = 0; i < ggml_backend_remoting_get_device_count(); i++) { ggml_backend_remoting_device_context * ctx = new ggml_backend_remoting_device_context; - char desc[256] = "API Remoting device"; + char desc[256] = "ggml-virtgpu API Remoting device"; ctx->device = i; - ctx->name = GGML_REMOTING_FRONTEND_NAME + std::to_string(i); + ctx->name = GGML_VIRTGPU_NAME + std::to_string(i); ctx->description = desc; ctx->gpu = gpu; @@ -98,7 +135,7 @@ static ggml_backend_dev_t ggml_backend_remoting_reg_get_device(ggml_backend_reg_ static const char * ggml_backend_remoting_reg_get_name(ggml_backend_reg_t reg) { UNUSED(reg); - return GGML_REMOTING_FRONTEND_NAME; + return GGML_VIRTGPU_NAME; } static const ggml_backend_reg_i ggml_backend_remoting_reg_i = { @@ -111,8 +148,7 @@ static const ggml_backend_reg_i ggml_backend_remoting_reg_i = { ggml_backend_reg_t ggml_backend_virtgpu_reg() { virtgpu * gpu = apir_initialize(); if (!gpu) { - GGML_LOG_ERROR("virtgpu_apir_initialize failed\n"); - return NULL; + GGML_LOG_ERROR(GGML_VIRTGPU "%s: virtgpu_apir_initialize failed\n", __func__); } static ggml_backend_reg reg = { @@ -129,9 +165,25 @@ ggml_backend_reg_t ggml_backend_virtgpu_reg() { ggml_backend_remoting_reg_init_devices(®); - GGML_LOG_INFO("%s: initialized\n", __func__); - return ® } +// public function, not exposed in the GGML interface at the moment +void ggml_virtgpu_cleanup(virtgpu * gpu) { + if (gpu->cached_device_info.name) { + free(gpu->cached_device_info.name); + gpu->cached_device_info.name = NULL; + } + if (gpu->cached_device_info.description) { + free(gpu->cached_device_info.description); + gpu->cached_device_info.description = NULL; + } + if (gpu->cached_buffer_type.name) { + free(gpu->cached_buffer_type.name); + gpu->cached_buffer_type.name = NULL; + } + + mtx_destroy(&gpu->data_shmem_mutex); +} + GGML_BACKEND_DL_IMPL(ggml_backend_virtgpu_reg) diff --git a/ggml/src/ggml-virtgpu/ggml-remoting.h b/ggml/src/ggml-virtgpu/ggml-remoting.h index 36fc6b2a7b..0876640867 100644 --- a/ggml/src/ggml-virtgpu/ggml-remoting.h +++ b/ggml/src/ggml-virtgpu/ggml-remoting.h @@ -8,6 +8,9 @@ #include #include +#define GGML_VIRTGPU_NAME "ggml-virtgpu" +#define GGML_VIRTGPU "ggml-virtgpu: " + // USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes #define USE_ALWAYS_TRUE_SUPPORTS_OP 1 @@ -62,7 +65,7 @@ static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggm static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) { if (!buffer->context) { - GGML_ABORT("%s: no context available :/", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: no context available :/", __func__); } return BUFFER_TO_HOST_HANDLE(buffer); } diff --git a/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml index 0b7cccfe9c..14ef2433e4 100644 --- a/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +++ b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml @@ -24,10 +24,10 @@ functions: frontend_return: "int" get_name: - frontend_return: "const char *" + frontend_return: "char *" get_description: - frontend_return: "const char *" + frontend_return: "char *" get_type: frontend_return: "uint32_t" @@ -64,35 +64,33 @@ functions: group_description: "buffer-type" functions: get_name: - frontend_return: "const char *" + frontend_return: "char *" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" get_alignment: frontend_return: "size_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" get_max_size: frontend_return: "size_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" is_host: - frontend_return: "bool" - frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + deprecated: true alloc_buffer: frontend_return: "apir_buffer_context_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buffer_buft" + - "apir_buffer_type_host_handle_t host_handle" - "size_t size" get_alloc_size: frontend_return: "size_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" - "const ggml_tensor *op" buffer: diff --git a/ggml/src/ggml-virtgpu/regenerate_remoting.py b/ggml/src/ggml-virtgpu/regenerate_remoting.py index 4174a24327..aeb48a4087 100755 --- a/ggml/src/ggml-virtgpu/regenerate_remoting.py +++ b/ggml/src/ggml-virtgpu/regenerate_remoting.py @@ -116,7 +116,7 @@ class RemotingCodebaseGenerator: 'frontend_return': func_metadata.get('frontend_return', 'void'), 'frontend_extra_params': func_metadata.get('frontend_extra_params', []), 'group_description': group_description, - 'newly_added': func_metadata.get('newly_added', False) + 'deprecated': func_metadata.get('deprecated', False), }) enum_value += 1 @@ -165,6 +165,9 @@ class RemotingCodebaseGenerator: signature = "uint32_t" params = "apir_encoder *enc, apir_decoder *dec, virgl_apir_context *ctx" + if func['deprecated']: + decl_lines.append(f"/* {func['enum_name']} is deprecated. Keeping the handler for backward compatibility. */") + decl_lines.append(f"{signature} {func['backend_function']}({params});") # Switch cases @@ -176,7 +179,9 @@ class RemotingCodebaseGenerator: switch_lines.append(f" /* {func['group_description']} */") current_group = func['group_name'] - switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}\";") + deprecated = " (DEPRECATED)" if func['deprecated'] else "" + + switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}{deprecated}\";") # Dispatch table table_lines = [] @@ -188,7 +193,8 @@ class RemotingCodebaseGenerator: table_lines.append("") current_group = func['group_name'] - table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']},") + deprecated = " /* DEPRECATED */" if func['deprecated'] else "" + table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']}{deprecated},") header_content = f'''\ #pragma once @@ -225,6 +231,10 @@ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATC decl_lines.append(f"/* {func['group_description']} */") current_group = func['group_name'] + if func['deprecated']: + decl_lines.append(f"/* {func['frontend_function']} is deprecated. */") + continue + # Build parameter list params = [self.naming_patterns['frontend_base_param']] params.extend(func['frontend_extra_params']) @@ -287,7 +297,7 @@ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATC generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path] if not self.clang_format_available: - logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted." + logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted.\n" " Install clang-format to enable automatic code formatting.") else: logging.info("\n🎨 Formatting files with clang-format...") diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp index bf3c41011a..07d9a66849 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp @@ -18,12 +18,17 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { virtgpu_shmem temp_shmem; // Local storage for large buffers virtgpu_shmem * shmem = &temp_shmem; + bool using_shared_shmem = false; if (cgraph_size <= gpu->data_shmem.mmap_size) { - // prefer the init-time allocated page, if large enough + // Lock mutex before using shared data_shmem buffer + if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) { + GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__); + } + using_shared_shmem = true; shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) { - GGML_ABORT("Couldn't allocate the guest-host shared buffer"); + GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__); } apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); @@ -42,7 +47,10 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { remote_call_finish(gpu, encoder, decoder); - if (shmem != &gpu->data_shmem) { + // Unlock mutex before cleanup + if (using_shared_shmem) { + mtx_unlock(&gpu->data_shmem_mutex); + } else { virtgpu_shmem_destroy(gpu, shmem); } diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp index 03cb09e064..cab74fd170 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp @@ -1,20 +1,20 @@ #include "virtgpu-forward-impl.h" -const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft) { +char * apir_buffer_type_get_name(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); REMOTE_CALL(gpu, encoder, decoder, ret); const size_t string_size = apir_decode_array_size_unchecked(decoder); char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__); apir_decoder_set_fatal(decoder); } apir_decode_char_array(decoder, string, string_size); @@ -24,14 +24,14 @@ const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t return string; } -size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft) { +size_t apir_buffer_type_get_alignment(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); REMOTE_CALL(gpu, encoder, decoder, ret); @@ -43,14 +43,14 @@ size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t return alignment; } -size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft) { +size_t apir_buffer_type_get_max_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); REMOTE_CALL(gpu, encoder, decoder, ret); @@ -62,26 +62,7 @@ size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t b return max_size; } -bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft) { - apir_encoder * encoder; - apir_decoder * decoder; - ApirForwardReturnCode ret; - - REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST); - - apir_encode_ggml_buffer_type(encoder, buft); - - REMOTE_CALL(gpu, encoder, decoder, ret); - - bool is_host; - apir_decode_bool_t(decoder, &is_host); - - remote_call_finish(gpu, encoder, decoder); - - return is_host; -} - -apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buft, size_t size) { +apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, size_t size) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; @@ -90,7 +71,7 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); apir_encode_size_t(encoder, &size); @@ -103,14 +84,14 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_ return buffer_context; } -size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) { +size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, const ggml_tensor * op) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); apir_encode_ggml_tensor_inline(encoder, op); diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp index 3181e39440..86eee358cf 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp @@ -36,13 +36,18 @@ void apir_buffer_set_tensor(virtgpu * gpu, virtgpu_shmem temp_shmem; // Local storage for large buffers virtgpu_shmem * shmem = &temp_shmem; + bool using_shared_shmem = false; if (size <= gpu->data_shmem.mmap_size) { - // prefer the init-time allocated page, if large enough + // Lock mutex before using shared data_shmem buffer + if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) { + GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__); + } + using_shared_shmem = true; shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { - GGML_ABORT("Couldn't allocate the guest-host shared buffer"); + GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__); } memcpy(shmem->mmap_ptr, data, size); @@ -55,7 +60,10 @@ void apir_buffer_set_tensor(virtgpu * gpu, remote_call_finish(gpu, encoder, decoder); - if (shmem != &gpu->data_shmem) { + // Unlock mutex before cleanup + if (using_shared_shmem) { + mtx_unlock(&gpu->data_shmem_mutex); + } else { virtgpu_shmem_destroy(gpu, shmem); } @@ -79,13 +87,18 @@ void apir_buffer_get_tensor(virtgpu * gpu, virtgpu_shmem temp_shmem; // Local storage for large buffers virtgpu_shmem * shmem = &temp_shmem; + bool using_shared_shmem = false; if (size <= gpu->data_shmem.mmap_size) { - // prefer the init-time allocated page, if large enough + // Lock mutex before using shared data_shmem buffer + if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) { + GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__); + } + using_shared_shmem = true; shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { - GGML_ABORT("Couldn't allocate the guest-host shared buffer"); + GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__); } apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); @@ -98,7 +111,10 @@ void apir_buffer_get_tensor(virtgpu * gpu, remote_call_finish(gpu, encoder, decoder); - if (shmem != &gpu->data_shmem) { + // Unlock mutex before cleanup + if (using_shared_shmem) { + mtx_unlock(&gpu->data_shmem_mutex); + } else { virtgpu_shmem_destroy(gpu, shmem); } } diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp index 3e45e55bdc..4b6b8f527b 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp @@ -2,11 +2,6 @@ #include "virtgpu-shm.h" int apir_device_get_count(virtgpu * gpu) { - static int32_t dev_count = -1; - if (dev_count != -1) { - return dev_count; - } - apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; @@ -14,6 +9,7 @@ int apir_device_get_count(virtgpu * gpu) { REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT); REMOTE_CALL(gpu, encoder, decoder, ret); + int32_t dev_count = -1; apir_decode_int32_t(decoder, &dev_count); remote_call_finish(gpu, encoder, decoder); @@ -21,11 +17,7 @@ int apir_device_get_count(virtgpu * gpu) { return dev_count; } -const char * apir_device_get_name(virtgpu * gpu) { - static char * string = nullptr; - if (string) { - return string; - } +char * apir_device_get_name(virtgpu * gpu) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; @@ -34,9 +26,9 @@ const char * apir_device_get_name(virtgpu * gpu) { REMOTE_CALL(gpu, encoder, decoder, ret); const size_t string_size = apir_decode_array_size_unchecked(decoder); - string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); + char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__); return NULL; } apir_decode_char_array(decoder, string, string_size); @@ -46,7 +38,7 @@ const char * apir_device_get_name(virtgpu * gpu) { return string; } -const char * apir_device_get_description(virtgpu * gpu) { +char * apir_device_get_description(virtgpu * gpu) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; @@ -58,7 +50,7 @@ const char * apir_device_get_description(virtgpu * gpu) { const size_t string_size = apir_decode_array_size_unchecked(decoder); char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - GGML_LOG_ERROR("%s: Could not allocate the device description buffer\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device description buffer\n", __func__); return NULL; } @@ -181,7 +173,7 @@ apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, si REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR); if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) { - GGML_ABORT("Couldn't allocate the guest-host shared buffer"); + GGML_ABORT(GGML_VIRTGPU "Couldn't allocate the guest-host shared buffer"); } apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id); diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h b/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h index eea3e7e5a9..f23c75bb96 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h @@ -11,7 +11,7 @@ int32_t forward_flag = (int32_t) apir_command_type__; \ encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, forward_flag); \ if (!encoder_name) { \ - GGML_ABORT("%s: failed to prepare the remote call encoder", __func__); \ + GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__); \ } \ } while (0) @@ -19,10 +19,10 @@ do { \ ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ if (!decoder_name) { \ - GGML_ABORT("%s: failed to kick the remote call", __func__); \ + GGML_ABORT(GGML_VIRTGPU "%s: failed to kick the remote call", __func__); \ } \ if (ret_name < APIR_FORWARD_BASE_INDEX) { \ - GGML_ABORT("%s: failed to forward the API call: %s: code %d", __func__, \ + GGML_ABORT(GGML_VIRTGPU "%s: failed to forward the API call: %s: code %d", __func__, \ apir_forward_error(ret_name), ret_name); \ } \ ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h index c27c07f086..fe4cae2025 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +++ b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h @@ -3,8 +3,8 @@ /* device */ void apir_device_get_device_count(struct virtgpu * gpu); int apir_device_get_count(struct virtgpu * gpu); -const char * apir_device_get_name(struct virtgpu * gpu); -const char * apir_device_get_description(struct virtgpu * gpu); +char * apir_device_get_name(struct virtgpu * gpu); +char * apir_device_get_description(struct virtgpu * gpu); uint32_t apir_device_get_type(struct virtgpu * gpu); void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total); bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op); @@ -17,14 +17,15 @@ void apir_device_get_props(struct virtgpu * gpu, apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size); /* buffer-type */ -const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, - ggml_backend_buffer_type_t buffer_buft, - size_t size); -size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op); +char * apir_buffer_type_get_name(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); +size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); +size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); +apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, + apir_buffer_type_host_handle_t host_handle, + size_t size); +size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, + apir_buffer_type_host_handle_t host_handle, + const ggml_tensor * op); /* buffer */ void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context); diff --git a/ggml/src/ggml-virtgpu/virtgpu-shm.cpp b/ggml/src/ggml-virtgpu/virtgpu-shm.cpp index 4def405a62..ce6b3b3e60 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-shm.cpp @@ -85,8 +85,7 @@ int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem) { void * ptr = virtgpu_ioctl_map(gpu, gem_handle, size); if (!ptr) { virtgpu_ioctl_gem_close(gpu, gem_handle); - GGML_LOG_ERROR("virtgpu_ioctl_map FAILED\n"); - exit(1); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: virtgpu_ioctl_map failed\n", __func__); return 1; } diff --git a/ggml/src/ggml-virtgpu/virtgpu.cpp b/ggml/src/ggml-virtgpu/virtgpu.cpp index 005c8e21db..1e650dc65b 100644 --- a/ggml/src/ggml-virtgpu/virtgpu.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu.cpp @@ -33,7 +33,7 @@ static int virtgpu_handshake(virtgpu * gpu) { encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HANDSHAKE, 0); if (!encoder) { - GGML_ABORT("%s: failed to prepare the remote call encoder", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__); return 1; } @@ -52,7 +52,7 @@ static int virtgpu_handshake(virtgpu * gpu) { log_call_duration(call_duration_ns, "API Remoting handshake"); if (!decoder) { - GGML_ABORT( + GGML_ABORT(GGML_VIRTGPU "%s: failed to initiate the communication with the virglrenderer library. " "Most likely, the wrong virglrenderer library was loaded in the hypervisor.", __func__); @@ -65,7 +65,8 @@ static int virtgpu_handshake(virtgpu * gpu) { uint32_t host_minor; if (ret_magic != APIR_HANDSHAKE_MAGIC) { - GGML_ABORT("%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic, + GGML_ABORT(GGML_VIRTGPU + "%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic, apir_backend_initialize_error(ret_magic)); } else { apir_decode_uint32_t(decoder, &host_major); @@ -78,13 +79,13 @@ static int virtgpu_handshake(virtgpu * gpu) { return 1; } - GGML_LOG_INFO("%s: Guest is running with %u.%u\n", __func__, guest_major, guest_minor); - GGML_LOG_INFO("%s: Host is running with %u.%u\n", __func__, host_major, host_minor); + GGML_LOG_INFO(GGML_VIRTGPU "%s: Guest is running with %u.%u\n", __func__, guest_major, guest_minor); + GGML_LOG_INFO(GGML_VIRTGPU "%s: Host is running with %u.%u\n", __func__, host_major, host_minor); if (guest_major != host_major) { - GGML_LOG_ERROR("Host major (%d) and guest major (%d) version differ\n", host_major, guest_major); + GGML_LOG_ERROR(GGML_VIRTGPU "Host major (%d) and guest major (%d) version differ\n", host_major, guest_major); } else if (guest_minor != host_minor) { - GGML_LOG_WARN("Host minor (%d) and guest minor (%d) version differ\n", host_minor, guest_minor); + GGML_LOG_WARN(GGML_VIRTGPU "Host minor (%d) and guest minor (%d) version differ\n", host_minor, guest_minor); } return 0; @@ -97,7 +98,7 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LOADLIBRARY, 0); if (!encoder) { - GGML_ABORT("%s: hypercall error: failed to prepare the remote call encoder", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: hypercall error: failed to prepare the API Remoting command encoder", __func__); return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR; } @@ -108,36 +109,67 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { log_call_duration(call_duration_ns, "API Remoting LoadLibrary"); if (!decoder) { - GGML_ABORT("%s: hypercall error: failed to kick the API remoting hypercall.\n", __func__); + GGML_ABORT(GGML_VIRTGPU "%s: hypercall error: failed to trigger the API Remoting hypercall.\n", __func__); return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR; } remote_call_finish(gpu, encoder, decoder); if (ret == APIR_LOAD_LIBRARY_SUCCESS) { - GGML_LOG_INFO("%s: The API Remoting backend was successfully loaded and initialized\n", __func__); + GGML_LOG_INFO(GGML_VIRTGPU "The API Remoting backend was successfully loaded and initialized\n"); return ret; } // something wrong happened, find out what. - if (ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { - GGML_ABORT("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", __func__, - apir_load_library_error(ret), ret); + if (ret == APIR_LOAD_LIBRARY_ENV_VAR_MISSING) { + GGML_ABORT(GGML_VIRTGPU + "%s: virglrenderer could not open the API Remoting backend library, " + "some environment variables are missing. " + "Make sure virglrenderer is correctly configured by the hypervisor. (%s)", + __func__, apir_load_library_error(ret)); + } else if (ret == APIR_LOAD_LIBRARY_CANNOT_OPEN) { + GGML_ABORT(GGML_VIRTGPU + "%s: virglrenderer could not open the API Remoting backend library. " + "Make sure virglrenderer is correctly configured by the hypervisor. (%s)", + __func__, apir_load_library_error(ret)); + } else if (ret == APIR_LOAD_LIBRARY_ENV_VAR_MISSING) { + GGML_ABORT(GGML_VIRTGPU + "%s: could not load the backend library, some symbols are missing. " + "Make sure virglrenderer is correctly configured by the hypervisor. (%s) ", + __func__, apir_load_library_error(ret)); + } else { + GGML_ABORT(GGML_VIRTGPU + "%s: virglrenderer could not load the API Remoting backend library. (%s - code %d)", __func__, + apir_load_library_error(ret), ret); + } return ret; } - GGML_LOG_INFO("%s: virglrenderer successfully loaded the API Remoting backend library", __func__); + GGML_LOG_INFO(GGML_VIRTGPU + "%s: virglrenderer successfully loaded the API Remoting backend library.\n", __func__); ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX); - if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { - GGML_ABORT("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s)", + if (apir_ret == APIR_LOAD_LIBRARY_CANNOT_OPEN) { + GGML_ABORT(GGML_VIRTGPU + "%s: the API Remoting backend library couldn't load the GGML backend library. " + "Make sure virglrenderer is correctly configured by the hypervisor. (%s)", + __func__, apir_load_library_error(apir_ret)); + } else if (apir_ret == APIR_LOAD_LIBRARY_SYMBOL_MISSING) { + GGML_ABORT(GGML_VIRTGPU + "%s: the API Remoting backend library couldn't load the GGML backend library, some symbols are missing. " + "Make sure virglrenderer is correctly configured by the hypervisor. (%s)", + __func__, apir_load_library_error(apir_ret)); + } else if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { + GGML_ABORT(GGML_VIRTGPU + "%s: the API Remoting backend library couldn't load the GGML backend library: apir code=%d | %s)", __func__, apir_ret, apir_load_library_error(apir_ret)); } else { uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX; - GGML_ABORT("%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__, + GGML_ABORT(GGML_VIRTGPU + "%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__, lib_ret); } return ret; @@ -149,38 +181,58 @@ virtgpu * create_virtgpu() { gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr; util_sparse_array_init(&gpu->shmem_array, sizeof(virtgpu_shmem), 1024); + // Initialize mutex to protect shared data_shmem buffer + if (mtx_init(&gpu->data_shmem_mutex, mtx_plain) != thrd_success) { + delete gpu; + GGML_ABORT(GGML_VIRTGPU + "%s: failed to initialize data_shmem mutex", __func__); + return NULL; + } + if (virtgpu_open(gpu) != APIR_SUCCESS) { - GGML_ABORT("%s: failed to open the virtgpu device", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU + "%s: failed to open the virtgpu device\n", __func__); return NULL; } if (virtgpu_init_capset(gpu) != APIR_SUCCESS) { - GGML_ABORT("%s: failed to initialize the GPU capset", __func__); + if (gpu->use_apir_capset) { + GGML_ABORT(GGML_VIRTGPU + "%s: failed to initialize the virtgpu APIR capset. Make sure that the virglrenderer library supports it.", __func__); + } else { + GGML_ABORT(GGML_VIRTGPU + "%s: failed to initialize the virtgpu Venus capset", __func__); + } return NULL; } if (virtgpu_init_context(gpu) != APIR_SUCCESS) { - GGML_ABORT("%s: failed to initialize the GPU context", __func__); + GGML_ABORT(GGML_VIRTGPU + "%s: failed to initialize the GPU context", __func__); return NULL; } if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) { - GGML_ABORT("%s: failed to create the shared reply memory pages", __func__); + GGML_ABORT(GGML_VIRTGPU + "%s: failed to create the shared reply memory pages", __func__); return NULL; } if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) { - GGML_ABORT("%s: failed to create the shared data memory pages", __func__); + GGML_ABORT(GGML_VIRTGPU + "%s: failed to create the shared data memory pages", __func__); return NULL; } if (virtgpu_handshake(gpu)) { - GGML_ABORT("%s: failed to handshake with the virglrenderer library", __func__); + GGML_ABORT(GGML_VIRTGPU + "%s: failed to handshake with the virglrenderer library", __func__); return NULL; } if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) { - GGML_ABORT("%s: failed to load the backend library", __func__); + GGML_ABORT(GGML_VIRTGPU + "%s: failed to load the backend library", __func__); return NULL; } @@ -191,7 +243,8 @@ static virt_gpu_result_t virtgpu_open(virtgpu * gpu) { drmDevicePtr devs[8]; int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs)); if (count < 0) { - GGML_LOG_ERROR("%s: failed to enumerate DRM devices\n", __func__); + GGML_LOG_ERROR(GGML_VIRTGPU + "%s: failed to enumerate DRM devices\n", __func__); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -213,16 +266,19 @@ static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr d int fd = open(node_path, O_RDWR | O_CLOEXEC); if (fd < 0) { - GGML_ABORT("failed to open %s", node_path); + GGML_ABORT(GGML_VIRTGPU + "%s: failed to open %s", __func__, node_path); return APIR_ERROR_INITIALIZATION_FAILED; } drmVersionPtr version = drmGetVersion(fd); if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) { if (version) { - GGML_ABORT("unknown DRM driver %s version %d", version->name, version->version_major); + GGML_LOG_ERROR(GGML_VIRTGPU + "%s: unknown DRM driver %s version %d\n", __func__, version->name, version->version_major); } else { - GGML_ABORT("failed to get DRM driver version"); + GGML_LOG_ERROR(GGML_VIRTGPU + "%s: failed to get DRM driver version\n", __func__); } if (version) { @@ -236,7 +292,7 @@ static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr d drmFreeVersion(version); - GGML_LOG_INFO("using DRM device %s\n", node_path); + GGML_LOG_INFO(GGML_VIRTGPU "using DRM device %s\n", node_path); return APIR_SUCCESS; } @@ -245,7 +301,7 @@ static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) { assert(!gpu->capset.version); const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id); if (ret) { - GGML_LOG_INFO("failed to initialize context: %s\n", strerror(errno)); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to initialize context: %s\n", __func__, strerror(errno)); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -254,10 +310,10 @@ static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) { static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) { if (gpu->use_apir_capset) { - GGML_LOG_INFO("Using the APIR capset\n"); + GGML_LOG_INFO(GGML_VIRTGPU "Using the APIR capset\n"); gpu->capset.id = VIRTGPU_DRM_CAPSET_APIR; } else { - GGML_LOG_INFO("Using the Venus capset\n"); + GGML_LOG_INFO(GGML_VIRTGPU "Using the Venus capset\n"); gpu->capset.id = VIRTGPU_DRM_CAPSET_VENUS; } gpu->capset.version = 0; @@ -266,7 +322,9 @@ static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) { virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data)); if (ret) { - GGML_LOG_INFO("failed to get APIR v%d capset: %s\n", gpu->capset.version, strerror(errno)); + GGML_LOG_ERROR(GGML_VIRTGPU + "%s: failed to get APIR v%d capset: %s\n", + __func__, gpu->capset.version, strerror(errno)); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -333,9 +391,9 @@ apir_encoder * remote_call_prepare(virtgpu * gpu, ApirCommandType apir_cmd_type, * Prepare the command encoder and its buffer */ - static char encoder_buffer[4096]; + thread_local char encoder_buffer[4096]; - static apir_encoder enc; + thread_local apir_encoder enc; enc = { .cur = encoder_buffer, .start = encoder_buffer, @@ -369,19 +427,19 @@ void remote_call_finish(virtgpu * gpu, apir_encoder * enc, apir_decoder * dec) { UNUSED(gpu); if (!enc) { - GGML_LOG_ERROR("Invalid (null) encoder\n"); + GGML_ABORT(GGML_VIRTGPU "%s: Invalid (null) encoder", __func__); } if (!dec) { - GGML_LOG_ERROR("Invalid (null) decoder\n"); + GGML_ABORT(GGML_VIRTGPU "%s: Invalid (null) decoder", __func__); } if (apir_encoder_get_fatal(enc)) { - GGML_LOG_ERROR("Failed to encode the output parameters.\n"); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: Failed to encode the output parameters.", __func__); } if (apir_decoder_get_fatal(dec)) { - GGML_LOG_ERROR("Failed to decode the input parameters.\n"); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: Failed to decode the input parameters.", __func__); } } @@ -423,7 +481,7 @@ uint32_t remote_call(virtgpu * gpu, int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args); if (ret != 0) { - GGML_ABORT("%s: the virtgpu EXECBUFFER ioctl failed (%d)", __func__, ret); + GGML_ABORT(GGML_VIRTGPU "%s: the virtgpu EXECBUFFER ioctl failed (%d)", __func__, ret); } /* @@ -467,7 +525,7 @@ uint32_t remote_call(virtgpu * gpu, } if (max_wait_ms && timedout) { - GGML_LOG_ERROR("timed out waiting for the host answer...\n"); + GGML_LOG_ERROR(GGML_VIRTGPU "%s: timed out waiting for the host answer...\n", __func__); return APIR_FORWARD_TIMEOUT; } @@ -489,10 +547,13 @@ static void log_call_duration(long long call_duration_ns, const char * name) { double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds if (call_duration_s > 1) { - GGML_LOG_INFO("%s: waited %.2fs for the %s host reply...\n", __func__, call_duration_s, name); + GGML_LOG_INFO(GGML_VIRTGPU + "waited %.2fs for the %s host reply...\n", call_duration_s, name); } else if (call_duration_ms > 1) { - GGML_LOG_INFO("%s: waited %.2fms for the %s host reply...\n", __func__, call_duration_ms, name); + GGML_LOG_INFO(GGML_VIRTGPU + "waited %.2fms for the %s host reply...\n", call_duration_ms, name); } else { - GGML_LOG_INFO("%s: waited %lldns for the %s host reply...\n", __func__, call_duration_ns, name); + GGML_LOG_INFO(GGML_VIRTGPU + "waited %lldns for the %s host reply...\n", call_duration_ns, name); } } diff --git a/ggml/src/ggml-virtgpu/virtgpu.h b/ggml/src/ggml-virtgpu/virtgpu.h index d4bb42e20b..68e0f3a376 100644 --- a/ggml/src/ggml-virtgpu/virtgpu.h +++ b/ggml/src/ggml-virtgpu/virtgpu.h @@ -17,6 +17,8 @@ #include +#include "ggml-remoting.h" + #define VIRGL_RENDERER_UNSTABLE_APIS 1 #include "apir_hw.h" #include @@ -73,6 +75,27 @@ struct virtgpu { /* APIR communication pages */ virtgpu_shmem reply_shmem; virtgpu_shmem data_shmem; + + /* Mutex to protect shared data_shmem buffer from concurrent access */ + mtx_t data_shmem_mutex; + + /* Cached device information to prevent memory leaks and race conditions */ + struct { + char * description; + char * name; + int32_t device_count; + uint32_t type; + size_t memory_free; + size_t memory_total; + } cached_device_info; + + /* Cached buffer type information to prevent memory leaks and race conditions */ + struct { + apir_buffer_type_host_handle_t host_handle; + char * name; + size_t alignment; + size_t max_size; + } cached_buffer_type; }; static inline int virtgpu_ioctl(virtgpu * gpu, unsigned long request, void * args) {