From 92390ad9f6e08d6b83c31171781e5112f559dc5e Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Wed, 28 Jan 2026 12:59:49 +0100 Subject: [PATCH] ggml-virtgpu: stop using static vars as cache The static init isn't thread safe. --- .../backend/shared/apir_cs_ggml.h | 4 ++ .../ggml-virtgpu/ggml-backend-buffer-type.cpp | 21 ++----- ggml/src/ggml-virtgpu/ggml-backend-device.cpp | 59 +++++++++++-------- ggml/src/ggml-virtgpu/ggml-backend-reg.cpp | 55 +++++++++++++---- .../ggml-virtgpu/ggmlremoting_functions.yaml | 16 ++--- .../virtgpu-forward-buffer-type.cpp | 20 +++---- .../ggml-virtgpu/virtgpu-forward-device.cpp | 10 +--- ggml/src/ggml-virtgpu/virtgpu-forward.gen.h | 20 ++++--- ggml/src/ggml-virtgpu/virtgpu.h | 18 ++++++ 9 files changed, 140 insertions(+), 83 deletions(-) diff --git a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h index 070c3b25fb..28f7f270ef 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +++ b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h @@ -71,6 +71,10 @@ static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(apir_decod return (ggml_backend_buffer_type_t) handle; } +static inline void apir_encode_apir_buffer_type_host_handle(apir_encoder * enc, apir_buffer_type_host_handle_t handle) { + apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle)); +} + static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) { apir_buffer_type_host_handle_t handle; diff --git a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp index a2fa5246aa..68f378e516 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp @@ -20,7 +20,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml context->base = context->apir_context.shmem.mmap_ptr; context->is_from_ptr = true; } else { - context->apir_context = apir_buffer_type_alloc_buffer(gpu, buft, size); + context->apir_context = apir_buffer_type_alloc_buffer(gpu, gpu->cached_buffer_type.host_handle, size); context->is_from_ptr = false; context->base = NULL; } @@ -34,30 +34,19 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) { virtgpu * gpu = BUFT_TO_GPU(buft); - return apir_buffer_type_get_name(gpu, buft); + return gpu->cached_buffer_type.name; } static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { virtgpu * gpu = BUFT_TO_GPU(buft); - static size_t align = 0; - - if (align == 0) { - align = apir_buffer_type_get_alignment(gpu, buft); - } - - return align; + return gpu->cached_buffer_type.alignment; } static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { virtgpu * gpu = BUFT_TO_GPU(buft); - static size_t max_size = 0; - if (max_size == 0) { - max_size = apir_buffer_type_get_max_size(gpu, buft); - } - - return max_size; + return gpu->cached_buffer_type.max_size; } static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, @@ -70,7 +59,7 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff return ggml_nbytes(tensor); } - return apir_buffer_type_get_alloc_size(gpu, buft, tensor); + return apir_buffer_type_get_alloc_size(gpu, gpu->cached_buffer_type.host_handle, tensor); } const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = { diff --git a/ggml/src/ggml-virtgpu/ggml-backend-device.cpp b/ggml/src/ggml-virtgpu/ggml-backend-device.cpp index 579eb99078..3f98ee58d4 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-device.cpp @@ -3,32 +3,27 @@ static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - return apir_device_get_name(gpu); + return gpu->cached_device_info.name; } static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - return apir_device_get_description(gpu); + // Return the pre-cached description from the virtgpu structure + return gpu->cached_device_info.description; } static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - static enum ggml_backend_dev_type type; - static bool has_type = false; - if (!has_type) { - has_type = true; - type = (enum ggml_backend_dev_type) apir_device_get_type(gpu); - } - - return type; + return (enum ggml_backend_dev_type) gpu->cached_device_info.type; } static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { virtgpu * gpu = DEV_TO_GPU(dev); - return apir_device_get_memory(gpu, free, total); + *free = gpu->cached_device_info.memory_free; + *total = gpu->cached_device_info.memory_total; } static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) { @@ -77,13 +72,22 @@ static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); + static std::atomic initialized = false; + static ggml_backend_buffer_type buft; - static ggml_backend_buffer_type buft{ - /* .iface = */ ggml_backend_remoting_buffer_type_interface, - /* .device = */ dev, - /* .context = */ (void *) ctx, - }; + if (!initialized) { + static std::mutex mutex; + std::lock_guard lock(mutex); + + if (!initialized) { + buft = { + /* .iface = */ ggml_backend_remoting_buffer_type_interface, + /* .device = */ dev, + /* .context = */ (void *) gpu->cached_buffer_type.host_handle, + }; + initialized = true; + } + } return &buft; } @@ -91,13 +95,22 @@ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_bac static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); + static std::atomic initialized = false; + static ggml_backend_buffer_type buft; - static ggml_backend_buffer_type buft{ - /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface, - /* .device = */ dev, - /* .context = */ (void *) ctx, - }; + if (!initialized) { + static std::mutex mutex; + std::lock_guard lock(mutex); + + if (!initialized) { + buft = { + /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface, + /* .device = */ dev, + /* .context = */ (void *) gpu->cached_buffer_type.host_handle, + }; + initialized = true; + } + } return &buft; } diff --git a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp index c46cf51c02..ca8235bb65 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp @@ -5,26 +5,57 @@ #include static virtgpu * apir_initialize() { - static virtgpu * apir_gpu_instance = NULL; - static bool apir_initialized = false; + static virtgpu * gpu = NULL; + static std::atomic initialized = false; + + if (initialized) { + // fast track + return gpu; + } { static std::mutex mutex; std::lock_guard lock(mutex); - if (apir_initialized) { - return apir_gpu_instance; + if (initialized) { + // thread safe + return gpu; } - apir_gpu_instance = create_virtgpu(); - if (!apir_gpu_instance) { + gpu = create_virtgpu(); + if (!gpu) { GGML_ABORT("failed to initialize the virtgpu"); } - apir_initialized = true; + // Pre-fetch and cache all device information, it will not change + gpu->cached_device_info.description = apir_device_get_description(gpu); + if (!gpu->cached_device_info.description) { + GGML_ABORT("failed to initialize the virtgpu device description"); + } + gpu->cached_device_info.name = apir_device_get_name(gpu); + if (!gpu->cached_device_info.name) { + GGML_ABORT("failed to initialize the virtgpu device name"); + } + gpu->cached_device_info.device_count = apir_device_get_count(gpu); + gpu->cached_device_info.type = apir_device_get_type(gpu); + + apir_device_get_memory(gpu, + &gpu->cached_device_info.memory_free, + &gpu->cached_device_info.memory_total); + + apir_buffer_type_host_handle_t buft_host_handle = apir_device_get_buffer_type(gpu); + gpu->cached_buffer_type.host_handle = buft_host_handle; + gpu->cached_buffer_type.name = apir_buffer_type_get_name(gpu, buft_host_handle); + if (!gpu->cached_buffer_type.name) { + GGML_ABORT("failed to initialize the virtgpu buffer type name"); + } + gpu->cached_buffer_type.alignment = apir_buffer_type_get_alignment(gpu, buft_host_handle); + gpu->cached_buffer_type.max_size = apir_buffer_type_get_max_size(gpu, buft_host_handle); + + initialized = true; } - return apir_gpu_instance; + return gpu; } static int ggml_backend_remoting_get_device_count() { @@ -34,7 +65,7 @@ static int ggml_backend_remoting_get_device_count() { return 0; } - return apir_device_get_count(gpu); + return gpu->cached_device_info.device_count; } static size_t ggml_backend_remoting_reg_get_device_count(ggml_backend_reg_t reg) { @@ -62,7 +93,11 @@ static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) { return; } - static bool initialized = false; + static std::atomic initialized = false; + + if (initialized) { + return; // fast track + } { static std::mutex mutex; diff --git a/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml index 775c5ca573..14ef2433e4 100644 --- a/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +++ b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml @@ -24,10 +24,10 @@ functions: frontend_return: "int" get_name: - frontend_return: "const char *" + frontend_return: "char *" get_description: - frontend_return: "const char *" + frontend_return: "char *" get_type: frontend_return: "uint32_t" @@ -64,19 +64,19 @@ functions: group_description: "buffer-type" functions: get_name: - frontend_return: "const char *" + frontend_return: "char *" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" get_alignment: frontend_return: "size_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" get_max_size: frontend_return: "size_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" is_host: deprecated: true @@ -84,13 +84,13 @@ functions: alloc_buffer: frontend_return: "apir_buffer_context_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buffer_buft" + - "apir_buffer_type_host_handle_t host_handle" - "size_t size" get_alloc_size: frontend_return: "size_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" - "const ggml_tensor *op" buffer: diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp index 0069b47a5b..1c0083e8e3 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp @@ -1,13 +1,13 @@ #include "virtgpu-forward-impl.h" -const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft) { +char * apir_buffer_type_get_name(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); REMOTE_CALL(gpu, encoder, decoder, ret); @@ -24,14 +24,14 @@ const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t return string; } -size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft) { +size_t apir_buffer_type_get_alignment(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); REMOTE_CALL(gpu, encoder, decoder, ret); @@ -43,14 +43,14 @@ size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t return alignment; } -size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft) { +size_t apir_buffer_type_get_max_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); REMOTE_CALL(gpu, encoder, decoder, ret); @@ -62,7 +62,7 @@ size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t b return max_size; } -apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buft, size_t size) { +apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, size_t size) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; @@ -71,7 +71,7 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); apir_encode_size_t(encoder, &size); @@ -84,14 +84,14 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_ return buffer_context; } -size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) { +size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, const ggml_tensor * op) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); apir_encode_ggml_tensor_inline(encoder, op); diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp index 3e45e55bdc..627121b9ee 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp @@ -21,11 +21,7 @@ int apir_device_get_count(virtgpu * gpu) { return dev_count; } -const char * apir_device_get_name(virtgpu * gpu) { - static char * string = nullptr; - if (string) { - return string; - } +char * apir_device_get_name(virtgpu * gpu) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; @@ -34,7 +30,7 @@ const char * apir_device_get_name(virtgpu * gpu) { REMOTE_CALL(gpu, encoder, decoder, ret); const size_t string_size = apir_decode_array_size_unchecked(decoder); - string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); + char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__); return NULL; @@ -46,7 +42,7 @@ const char * apir_device_get_name(virtgpu * gpu) { return string; } -const char * apir_device_get_description(virtgpu * gpu) { +char * apir_device_get_description(virtgpu * gpu) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h index 6216ebad1f..fe4cae2025 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +++ b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h @@ -3,8 +3,8 @@ /* device */ void apir_device_get_device_count(struct virtgpu * gpu); int apir_device_get_count(struct virtgpu * gpu); -const char * apir_device_get_name(struct virtgpu * gpu); -const char * apir_device_get_description(struct virtgpu * gpu); +char * apir_device_get_name(struct virtgpu * gpu); +char * apir_device_get_description(struct virtgpu * gpu); uint32_t apir_device_get_type(struct virtgpu * gpu); void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total); bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op); @@ -17,13 +17,15 @@ void apir_device_get_props(struct virtgpu * gpu, apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size); /* buffer-type */ -const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, - ggml_backend_buffer_type_t buffer_buft, - size_t size); -size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op); +char * apir_buffer_type_get_name(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); +size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); +size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); +apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, + apir_buffer_type_host_handle_t host_handle, + size_t size); +size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, + apir_buffer_type_host_handle_t host_handle, + const ggml_tensor * op); /* buffer */ void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context); diff --git a/ggml/src/ggml-virtgpu/virtgpu.h b/ggml/src/ggml-virtgpu/virtgpu.h index d4bb42e20b..6144319e4d 100644 --- a/ggml/src/ggml-virtgpu/virtgpu.h +++ b/ggml/src/ggml-virtgpu/virtgpu.h @@ -73,6 +73,24 @@ struct virtgpu { /* APIR communication pages */ virtgpu_shmem reply_shmem; virtgpu_shmem data_shmem; + + /* Cached device information to prevent memory leaks and race conditions */ + struct { + char * description; + char * name; + int32_t device_count; + uint32_t type; + size_t memory_free; + size_t memory_total; + } cached_device_info; + + /* Cached buffer type information to prevent memory leaks and race conditions */ + struct { + apir_buffer_type_host_handle_t host_handle; + char * name; + size_t alignment; + size_t max_size; + } cached_buffer_type; }; static inline int virtgpu_ioctl(virtgpu * gpu, unsigned long request, void * args) {