From e9d130a4867153b814427fb8f39bac53b53f9b88 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Thu, 29 Jan 2026 09:11:22 +0100 Subject: [PATCH 01/10] ggml-virtgpu: regenerate_remoting.py: add the ability to deprecate a function --- ggml/src/ggml-virtgpu/regenerate_remoting.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-virtgpu/regenerate_remoting.py b/ggml/src/ggml-virtgpu/regenerate_remoting.py index 4174a24327..aeb48a4087 100755 --- a/ggml/src/ggml-virtgpu/regenerate_remoting.py +++ b/ggml/src/ggml-virtgpu/regenerate_remoting.py @@ -116,7 +116,7 @@ class RemotingCodebaseGenerator: 'frontend_return': func_metadata.get('frontend_return', 'void'), 'frontend_extra_params': func_metadata.get('frontend_extra_params', []), 'group_description': group_description, - 'newly_added': func_metadata.get('newly_added', False) + 'deprecated': func_metadata.get('deprecated', False), }) enum_value += 1 @@ -165,6 +165,9 @@ class RemotingCodebaseGenerator: signature = "uint32_t" params = "apir_encoder *enc, apir_decoder *dec, virgl_apir_context *ctx" + if func['deprecated']: + decl_lines.append(f"/* {func['enum_name']} is deprecated. Keeping the handler for backward compatibility. */") + decl_lines.append(f"{signature} {func['backend_function']}({params});") # Switch cases @@ -176,7 +179,9 @@ class RemotingCodebaseGenerator: switch_lines.append(f" /* {func['group_description']} */") current_group = func['group_name'] - switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}\";") + deprecated = " (DEPRECATED)" if func['deprecated'] else "" + + switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}{deprecated}\";") # Dispatch table table_lines = [] @@ -188,7 +193,8 @@ class RemotingCodebaseGenerator: table_lines.append("") current_group = func['group_name'] - table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']},") + deprecated = " /* DEPRECATED */" if func['deprecated'] else "" + table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']}{deprecated},") header_content = f'''\ #pragma once @@ -225,6 +231,10 @@ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATC decl_lines.append(f"/* {func['group_description']} */") current_group = func['group_name'] + if func['deprecated']: + decl_lines.append(f"/* {func['frontend_function']} is deprecated. */") + continue + # Build parameter list params = [self.naming_patterns['frontend_base_param']] params.extend(func['frontend_extra_params']) @@ -287,7 +297,7 @@ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATC generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path] if not self.clang_format_available: - logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted." + logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted.\n" " Install clang-format to enable automatic code formatting.") else: logging.info("\n🎨 Formatting files with clang-format...") From c6a60859721554d71c952997eb7015ab53657297 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Wed, 28 Jan 2026 21:04:02 +0100 Subject: [PATCH 02/10] ggml-virtgpu: deprecate buffer_type is_host remoting not necessary --- .../backend-dispatched-buffer-type.cpp | 6 +++--- .../backend/backend-dispatched.gen.h | 5 +++-- .../ggml-virtgpu/ggml-backend-buffer-type.cpp | 6 ------ .../ggml-virtgpu/ggmlremoting_functions.yaml | 4 +--- .../virtgpu-forward-buffer-type.cpp | 19 ------------------- ggml/src/ggml-virtgpu/virtgpu-forward.gen.h | 1 - 6 files changed, 7 insertions(+), 34 deletions(-) diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp index 8ea1bb4fb4..3a97fbd956 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp @@ -42,12 +42,12 @@ uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec return 0; } +/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST is deprecated. Keeping the handler for backward compatibility. */ uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { GGML_UNUSED(ctx); - ggml_backend_buffer_type_t buft; - buft = apir_decode_ggml_buffer_type(dec); + GGML_UNUSED(dec); + const bool is_host = false; - bool is_host = buft->iface.is_host(buft); apir_encode_bool_t(enc, &is_host); return 0; diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h b/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h index b81fd5039b..481d7f3150 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h @@ -16,6 +16,7 @@ uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST is deprecated. Keeping the handler for backward compatibility. */ uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); @@ -62,7 +63,7 @@ static inline const char * backend_dispatch_command_name(ApirBackendCommandType case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE: return "backend_buffer_type_get_max_size"; case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST: - return "backend_buffer_type_is_host"; + return "backend_buffer_type_is_host (DEPRECATED)"; case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER: return "backend_buffer_type_alloc_buffer"; case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE: @@ -110,7 +111,7 @@ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATC /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = */ backend_buffer_type_get_name, /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = */ backend_buffer_type_get_alignment, /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = */ backend_buffer_type_get_max_size, - /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host /* DEPRECATED */, /* APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = */ backend_buffer_type_alloc_buffer, /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = */ backend_buffer_type_get_alloc_size, diff --git a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp index 7f650659b8..a2fa5246aa 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp @@ -60,12 +60,6 @@ static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer return max_size; } -static bool ggml_backend_remoting_buffer_type_is_host(ggml_backend_buffer_type_t buft) { - virtgpu * gpu = BUFT_TO_GPU(buft); - - return apir_buffer_type_is_host(gpu, buft); -} - static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { virtgpu * gpu = BUFT_TO_GPU(buft); diff --git a/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml index 0b7cccfe9c..775c5ca573 100644 --- a/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +++ b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml @@ -79,9 +79,7 @@ functions: - "ggml_backend_buffer_type_t buft" is_host: - frontend_return: "bool" - frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + deprecated: true alloc_buffer: frontend_return: "apir_buffer_context_t" diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp index 03cb09e064..0069b47a5b 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp @@ -62,25 +62,6 @@ size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t b return max_size; } -bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft) { - apir_encoder * encoder; - apir_decoder * decoder; - ApirForwardReturnCode ret; - - REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST); - - apir_encode_ggml_buffer_type(encoder, buft); - - REMOTE_CALL(gpu, encoder, decoder, ret); - - bool is_host; - apir_decode_bool_t(decoder, &is_host); - - remote_call_finish(gpu, encoder, decoder); - - return is_host; -} - apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buft, size_t size) { apir_encoder * encoder; apir_decoder * decoder; diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h index c27c07f086..6216ebad1f 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +++ b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h @@ -20,7 +20,6 @@ apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, ggml_backend_buffer_type_t buffer_buft, size_t size); From 92390ad9f6e08d6b83c31171781e5112f559dc5e Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Wed, 28 Jan 2026 12:59:49 +0100 Subject: [PATCH 03/10] ggml-virtgpu: stop using static vars as cache The static init isn't thread safe. --- .../backend/shared/apir_cs_ggml.h | 4 ++ .../ggml-virtgpu/ggml-backend-buffer-type.cpp | 21 ++----- ggml/src/ggml-virtgpu/ggml-backend-device.cpp | 59 +++++++++++-------- ggml/src/ggml-virtgpu/ggml-backend-reg.cpp | 55 +++++++++++++---- .../ggml-virtgpu/ggmlremoting_functions.yaml | 16 ++--- .../virtgpu-forward-buffer-type.cpp | 20 +++---- .../ggml-virtgpu/virtgpu-forward-device.cpp | 10 +--- ggml/src/ggml-virtgpu/virtgpu-forward.gen.h | 20 ++++--- ggml/src/ggml-virtgpu/virtgpu.h | 18 ++++++ 9 files changed, 140 insertions(+), 83 deletions(-) diff --git a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h index 070c3b25fb..28f7f270ef 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +++ b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h @@ -71,6 +71,10 @@ static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(apir_decod return (ggml_backend_buffer_type_t) handle; } +static inline void apir_encode_apir_buffer_type_host_handle(apir_encoder * enc, apir_buffer_type_host_handle_t handle) { + apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle)); +} + static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) { apir_buffer_type_host_handle_t handle; diff --git a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp index a2fa5246aa..68f378e516 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp @@ -20,7 +20,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml context->base = context->apir_context.shmem.mmap_ptr; context->is_from_ptr = true; } else { - context->apir_context = apir_buffer_type_alloc_buffer(gpu, buft, size); + context->apir_context = apir_buffer_type_alloc_buffer(gpu, gpu->cached_buffer_type.host_handle, size); context->is_from_ptr = false; context->base = NULL; } @@ -34,30 +34,19 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) { virtgpu * gpu = BUFT_TO_GPU(buft); - return apir_buffer_type_get_name(gpu, buft); + return gpu->cached_buffer_type.name; } static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { virtgpu * gpu = BUFT_TO_GPU(buft); - static size_t align = 0; - - if (align == 0) { - align = apir_buffer_type_get_alignment(gpu, buft); - } - - return align; + return gpu->cached_buffer_type.alignment; } static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { virtgpu * gpu = BUFT_TO_GPU(buft); - static size_t max_size = 0; - if (max_size == 0) { - max_size = apir_buffer_type_get_max_size(gpu, buft); - } - - return max_size; + return gpu->cached_buffer_type.max_size; } static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, @@ -70,7 +59,7 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff return ggml_nbytes(tensor); } - return apir_buffer_type_get_alloc_size(gpu, buft, tensor); + return apir_buffer_type_get_alloc_size(gpu, gpu->cached_buffer_type.host_handle, tensor); } const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = { diff --git a/ggml/src/ggml-virtgpu/ggml-backend-device.cpp b/ggml/src/ggml-virtgpu/ggml-backend-device.cpp index 579eb99078..3f98ee58d4 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-device.cpp @@ -3,32 +3,27 @@ static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - return apir_device_get_name(gpu); + return gpu->cached_device_info.name; } static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - return apir_device_get_description(gpu); + // Return the pre-cached description from the virtgpu structure + return gpu->cached_device_info.description; } static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - static enum ggml_backend_dev_type type; - static bool has_type = false; - if (!has_type) { - has_type = true; - type = (enum ggml_backend_dev_type) apir_device_get_type(gpu); - } - - return type; + return (enum ggml_backend_dev_type) gpu->cached_device_info.type; } static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { virtgpu * gpu = DEV_TO_GPU(dev); - return apir_device_get_memory(gpu, free, total); + *free = gpu->cached_device_info.memory_free; + *total = gpu->cached_device_info.memory_total; } static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) { @@ -77,13 +72,22 @@ static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); + static std::atomic initialized = false; + static ggml_backend_buffer_type buft; - static ggml_backend_buffer_type buft{ - /* .iface = */ ggml_backend_remoting_buffer_type_interface, - /* .device = */ dev, - /* .context = */ (void *) ctx, - }; + if (!initialized) { + static std::mutex mutex; + std::lock_guard lock(mutex); + + if (!initialized) { + buft = { + /* .iface = */ ggml_backend_remoting_buffer_type_interface, + /* .device = */ dev, + /* .context = */ (void *) gpu->cached_buffer_type.host_handle, + }; + initialized = true; + } + } return &buft; } @@ -91,13 +95,22 @@ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_bac static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { virtgpu * gpu = DEV_TO_GPU(dev); - apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); + static std::atomic initialized = false; + static ggml_backend_buffer_type buft; - static ggml_backend_buffer_type buft{ - /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface, - /* .device = */ dev, - /* .context = */ (void *) ctx, - }; + if (!initialized) { + static std::mutex mutex; + std::lock_guard lock(mutex); + + if (!initialized) { + buft = { + /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface, + /* .device = */ dev, + /* .context = */ (void *) gpu->cached_buffer_type.host_handle, + }; + initialized = true; + } + } return &buft; } diff --git a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp index c46cf51c02..ca8235bb65 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp @@ -5,26 +5,57 @@ #include static virtgpu * apir_initialize() { - static virtgpu * apir_gpu_instance = NULL; - static bool apir_initialized = false; + static virtgpu * gpu = NULL; + static std::atomic initialized = false; + + if (initialized) { + // fast track + return gpu; + } { static std::mutex mutex; std::lock_guard lock(mutex); - if (apir_initialized) { - return apir_gpu_instance; + if (initialized) { + // thread safe + return gpu; } - apir_gpu_instance = create_virtgpu(); - if (!apir_gpu_instance) { + gpu = create_virtgpu(); + if (!gpu) { GGML_ABORT("failed to initialize the virtgpu"); } - apir_initialized = true; + // Pre-fetch and cache all device information, it will not change + gpu->cached_device_info.description = apir_device_get_description(gpu); + if (!gpu->cached_device_info.description) { + GGML_ABORT("failed to initialize the virtgpu device description"); + } + gpu->cached_device_info.name = apir_device_get_name(gpu); + if (!gpu->cached_device_info.name) { + GGML_ABORT("failed to initialize the virtgpu device name"); + } + gpu->cached_device_info.device_count = apir_device_get_count(gpu); + gpu->cached_device_info.type = apir_device_get_type(gpu); + + apir_device_get_memory(gpu, + &gpu->cached_device_info.memory_free, + &gpu->cached_device_info.memory_total); + + apir_buffer_type_host_handle_t buft_host_handle = apir_device_get_buffer_type(gpu); + gpu->cached_buffer_type.host_handle = buft_host_handle; + gpu->cached_buffer_type.name = apir_buffer_type_get_name(gpu, buft_host_handle); + if (!gpu->cached_buffer_type.name) { + GGML_ABORT("failed to initialize the virtgpu buffer type name"); + } + gpu->cached_buffer_type.alignment = apir_buffer_type_get_alignment(gpu, buft_host_handle); + gpu->cached_buffer_type.max_size = apir_buffer_type_get_max_size(gpu, buft_host_handle); + + initialized = true; } - return apir_gpu_instance; + return gpu; } static int ggml_backend_remoting_get_device_count() { @@ -34,7 +65,7 @@ static int ggml_backend_remoting_get_device_count() { return 0; } - return apir_device_get_count(gpu); + return gpu->cached_device_info.device_count; } static size_t ggml_backend_remoting_reg_get_device_count(ggml_backend_reg_t reg) { @@ -62,7 +93,11 @@ static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) { return; } - static bool initialized = false; + static std::atomic initialized = false; + + if (initialized) { + return; // fast track + } { static std::mutex mutex; diff --git a/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml index 775c5ca573..14ef2433e4 100644 --- a/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +++ b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml @@ -24,10 +24,10 @@ functions: frontend_return: "int" get_name: - frontend_return: "const char *" + frontend_return: "char *" get_description: - frontend_return: "const char *" + frontend_return: "char *" get_type: frontend_return: "uint32_t" @@ -64,19 +64,19 @@ functions: group_description: "buffer-type" functions: get_name: - frontend_return: "const char *" + frontend_return: "char *" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" get_alignment: frontend_return: "size_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" get_max_size: frontend_return: "size_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" is_host: deprecated: true @@ -84,13 +84,13 @@ functions: alloc_buffer: frontend_return: "apir_buffer_context_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buffer_buft" + - "apir_buffer_type_host_handle_t host_handle" - "size_t size" get_alloc_size: frontend_return: "size_t" frontend_extra_params: - - "ggml_backend_buffer_type_t buft" + - "apir_buffer_type_host_handle_t host_handle" - "const ggml_tensor *op" buffer: diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp index 0069b47a5b..1c0083e8e3 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp @@ -1,13 +1,13 @@ #include "virtgpu-forward-impl.h" -const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft) { +char * apir_buffer_type_get_name(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); REMOTE_CALL(gpu, encoder, decoder, ret); @@ -24,14 +24,14 @@ const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t return string; } -size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft) { +size_t apir_buffer_type_get_alignment(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); REMOTE_CALL(gpu, encoder, decoder, ret); @@ -43,14 +43,14 @@ size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t return alignment; } -size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft) { +size_t apir_buffer_type_get_max_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); REMOTE_CALL(gpu, encoder, decoder, ret); @@ -62,7 +62,7 @@ size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t b return max_size; } -apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buft, size_t size) { +apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, size_t size) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; @@ -71,7 +71,7 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); apir_encode_size_t(encoder, &size); @@ -84,14 +84,14 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_ return buffer_context; } -size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) { +size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, const ggml_tensor * op) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_apir_buffer_type_host_handle(encoder, host_handle); apir_encode_ggml_tensor_inline(encoder, op); diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp index 3e45e55bdc..627121b9ee 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp @@ -21,11 +21,7 @@ int apir_device_get_count(virtgpu * gpu) { return dev_count; } -const char * apir_device_get_name(virtgpu * gpu) { - static char * string = nullptr; - if (string) { - return string; - } +char * apir_device_get_name(virtgpu * gpu) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; @@ -34,7 +30,7 @@ const char * apir_device_get_name(virtgpu * gpu) { REMOTE_CALL(gpu, encoder, decoder, ret); const size_t string_size = apir_decode_array_size_unchecked(decoder); - string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); + char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__); return NULL; @@ -46,7 +42,7 @@ const char * apir_device_get_name(virtgpu * gpu) { return string; } -const char * apir_device_get_description(virtgpu * gpu) { +char * apir_device_get_description(virtgpu * gpu) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h index 6216ebad1f..fe4cae2025 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +++ b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h @@ -3,8 +3,8 @@ /* device */ void apir_device_get_device_count(struct virtgpu * gpu); int apir_device_get_count(struct virtgpu * gpu); -const char * apir_device_get_name(struct virtgpu * gpu); -const char * apir_device_get_description(struct virtgpu * gpu); +char * apir_device_get_name(struct virtgpu * gpu); +char * apir_device_get_description(struct virtgpu * gpu); uint32_t apir_device_get_type(struct virtgpu * gpu); void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total); bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op); @@ -17,13 +17,15 @@ void apir_device_get_props(struct virtgpu * gpu, apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size); /* buffer-type */ -const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, - ggml_backend_buffer_type_t buffer_buft, - size_t size); -size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op); +char * apir_buffer_type_get_name(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); +size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); +size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); +apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, + apir_buffer_type_host_handle_t host_handle, + size_t size); +size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, + apir_buffer_type_host_handle_t host_handle, + const ggml_tensor * op); /* buffer */ void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context); diff --git a/ggml/src/ggml-virtgpu/virtgpu.h b/ggml/src/ggml-virtgpu/virtgpu.h index d4bb42e20b..6144319e4d 100644 --- a/ggml/src/ggml-virtgpu/virtgpu.h +++ b/ggml/src/ggml-virtgpu/virtgpu.h @@ -73,6 +73,24 @@ struct virtgpu { /* APIR communication pages */ virtgpu_shmem reply_shmem; virtgpu_shmem data_shmem; + + /* Cached device information to prevent memory leaks and race conditions */ + struct { + char * description; + char * name; + int32_t device_count; + uint32_t type; + size_t memory_free; + size_t memory_total; + } cached_device_info; + + /* Cached buffer type information to prevent memory leaks and race conditions */ + struct { + apir_buffer_type_host_handle_t host_handle; + char * name; + size_t alignment; + size_t max_size; + } cached_buffer_type; }; static inline int virtgpu_ioctl(virtgpu * gpu, unsigned long request, void * args) { From fcc689071027d6d6fc3b9fc2d6744abddde69ab2 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Wed, 28 Jan 2026 14:38:06 +0100 Subject: [PATCH 04/10] ggml-virtgpu: protect the use of the shared memory to transfer data --- .../ggml-virtgpu/virtgpu-forward-backend.cpp | 12 ++++++++-- .../ggml-virtgpu/virtgpu-forward-buffer.cpp | 24 +++++++++++++++---- ggml/src/ggml-virtgpu/virtgpu.cpp | 7 ++++++ ggml/src/ggml-virtgpu/virtgpu.h | 3 +++ 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp index bf3c41011a..201100e421 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp @@ -18,9 +18,14 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { virtgpu_shmem temp_shmem; // Local storage for large buffers virtgpu_shmem * shmem = &temp_shmem; + bool using_shared_shmem = false; if (cgraph_size <= gpu->data_shmem.mmap_size) { - // prefer the init-time allocated page, if large enough + // Lock mutex before using shared data_shmem buffer + if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) { + GGML_ABORT("Failed to lock data_shmem mutex"); + } + using_shared_shmem = true; shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) { GGML_ABORT("Couldn't allocate the guest-host shared buffer"); @@ -42,7 +47,10 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { remote_call_finish(gpu, encoder, decoder); - if (shmem != &gpu->data_shmem) { + // Unlock mutex before cleanup + if (using_shared_shmem) { + mtx_unlock(&gpu->data_shmem_mutex); + } else { virtgpu_shmem_destroy(gpu, shmem); } diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp index 3181e39440..81ad66ec68 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp @@ -36,9 +36,14 @@ void apir_buffer_set_tensor(virtgpu * gpu, virtgpu_shmem temp_shmem; // Local storage for large buffers virtgpu_shmem * shmem = &temp_shmem; + bool using_shared_shmem = false; if (size <= gpu->data_shmem.mmap_size) { - // prefer the init-time allocated page, if large enough + // Lock mutex before using shared data_shmem buffer + if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) { + GGML_ABORT("Failed to lock data_shmem mutex"); + } + using_shared_shmem = true; shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { @@ -55,7 +60,10 @@ void apir_buffer_set_tensor(virtgpu * gpu, remote_call_finish(gpu, encoder, decoder); - if (shmem != &gpu->data_shmem) { + // Unlock mutex before cleanup + if (using_shared_shmem) { + mtx_unlock(&gpu->data_shmem_mutex); + } else { virtgpu_shmem_destroy(gpu, shmem); } @@ -79,9 +87,14 @@ void apir_buffer_get_tensor(virtgpu * gpu, virtgpu_shmem temp_shmem; // Local storage for large buffers virtgpu_shmem * shmem = &temp_shmem; + bool using_shared_shmem = false; if (size <= gpu->data_shmem.mmap_size) { - // prefer the init-time allocated page, if large enough + // Lock mutex before using shared data_shmem buffer + if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) { + GGML_ABORT("Failed to lock data_shmem mutex"); + } + using_shared_shmem = true; shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { @@ -98,7 +111,10 @@ void apir_buffer_get_tensor(virtgpu * gpu, remote_call_finish(gpu, encoder, decoder); - if (shmem != &gpu->data_shmem) { + // Unlock mutex before cleanup + if (using_shared_shmem) { + mtx_unlock(&gpu->data_shmem_mutex); + } else { virtgpu_shmem_destroy(gpu, shmem); } } diff --git a/ggml/src/ggml-virtgpu/virtgpu.cpp b/ggml/src/ggml-virtgpu/virtgpu.cpp index 005c8e21db..f44b0b6927 100644 --- a/ggml/src/ggml-virtgpu/virtgpu.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu.cpp @@ -149,6 +149,13 @@ virtgpu * create_virtgpu() { gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr; util_sparse_array_init(&gpu->shmem_array, sizeof(virtgpu_shmem), 1024); + // Initialize mutex to protect shared data_shmem buffer + if (mtx_init(&gpu->data_shmem_mutex, mtx_plain) != thrd_success) { + delete gpu; + GGML_ABORT("%s: failed to initialize data_shmem mutex", __func__); + return NULL; + } + if (virtgpu_open(gpu) != APIR_SUCCESS) { GGML_ABORT("%s: failed to open the virtgpu device", __func__); return NULL; diff --git a/ggml/src/ggml-virtgpu/virtgpu.h b/ggml/src/ggml-virtgpu/virtgpu.h index 6144319e4d..5c3709ec3b 100644 --- a/ggml/src/ggml-virtgpu/virtgpu.h +++ b/ggml/src/ggml-virtgpu/virtgpu.h @@ -74,6 +74,9 @@ struct virtgpu { virtgpu_shmem reply_shmem; virtgpu_shmem data_shmem; + /* Mutex to protect shared data_shmem buffer from concurrent access */ + mtx_t data_shmem_mutex; + /* Cached device information to prevent memory leaks and race conditions */ struct { char * description; From 171ab8b4ad97833b36ece79ea10a9f4cdbc5eb7d Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Wed, 28 Jan 2026 14:55:30 +0100 Subject: [PATCH 05/10] ggml-virtgpu: make the remote calls thread-safe --- ggml/src/ggml-virtgpu/virtgpu.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-virtgpu/virtgpu.cpp b/ggml/src/ggml-virtgpu/virtgpu.cpp index f44b0b6927..47af17ed38 100644 --- a/ggml/src/ggml-virtgpu/virtgpu.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu.cpp @@ -340,9 +340,9 @@ apir_encoder * remote_call_prepare(virtgpu * gpu, ApirCommandType apir_cmd_type, * Prepare the command encoder and its buffer */ - static char encoder_buffer[4096]; + thread_local char encoder_buffer[4096]; - static apir_encoder enc; + thread_local apir_encoder enc; enc = { .cur = encoder_buffer, .start = encoder_buffer, From 3864be3e5d67fe979b6cbbfd64f18c75250da488 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Thu, 29 Jan 2026 09:25:09 +0100 Subject: [PATCH 06/10] ggml-virtgpu: backend: don't continue if couldn't allocate the tensor memory --- ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h index 28f7f270ef..207b930d24 100644 --- a/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +++ b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h @@ -39,11 +39,17 @@ static inline void apir_encode_ggml_tensor(apir_encoder * enc, const ggml_tensor static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) { const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec); + + if (!apir_rpc_tensor) { + return NULL; + } + ggml_init_params params{ /*.mem_size =*/ ggml_tensor_overhead(), /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; + ggml_context * ctx = ggml_init(params); const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor); From 07f41cafee6d6ba8fccd94562c89a40cd2eeb18b Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Thu, 29 Jan 2026 09:50:47 +0100 Subject: [PATCH 07/10] ggml-virtgpu: add a cleanup function for consistency --- ggml/src/ggml-virtgpu/ggml-backend-reg.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp index ca8235bb65..7256737c63 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp @@ -4,6 +4,8 @@ #include #include +void ggml_virtgpu_cleanup(virtgpu *gpu); + static virtgpu * apir_initialize() { static virtgpu * gpu = NULL; static std::atomic initialized = false; @@ -170,3 +172,21 @@ ggml_backend_reg_t ggml_backend_virtgpu_reg() { } GGML_BACKEND_DL_IMPL(ggml_backend_virtgpu_reg) + +// public function, not exposed in the GGML interface at the moment +void ggml_virtgpu_cleanup(virtgpu *gpu) { + if (gpu->cached_device_info.name) { + free(gpu->cached_device_info.name); + gpu->cached_device_info.name = NULL; + } + if (gpu->cached_device_info.description) { + free(gpu->cached_device_info.description); + gpu->cached_device_info.description = NULL; + } + if (gpu->cached_buffer_type.name) { + free(gpu->cached_buffer_type.name); + gpu->cached_buffer_type.name = NULL; + } + + mtx_destroy(&gpu->data_shmem_mutex); +} From f35b24e87c8893141ca44b8c6c926a1b8992adfb Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Thu, 29 Jan 2026 14:26:39 +0100 Subject: [PATCH 08/10] ggml-virtgpu: backend: don't crash if buft->iface.get_max_size is missing --- .../ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp index 3a97fbd956..d55eec2761 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp @@ -36,7 +36,11 @@ uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); - size_t value = buft->iface.get_max_size(buft); + size_t value = SIZE_MAX; + if (buft->iface.get_max_size) { + value = buft->iface.get_max_size(buft); + } + apir_encode_size_t(enc, &value); return 0; From f978082f834183fb2da46fa24b52b53b49ab049f Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Fri, 30 Jan 2026 13:59:05 +0100 Subject: [PATCH 09/10] fix style and ordering --- ggml/src/ggml-virtgpu/ggml-backend-reg.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp index 7256737c63..276edd8fdd 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp @@ -4,7 +4,7 @@ #include #include -void ggml_virtgpu_cleanup(virtgpu *gpu); +void ggml_virtgpu_cleanup(virtgpu * gpu); static virtgpu * apir_initialize() { static virtgpu * gpu = NULL; @@ -171,10 +171,8 @@ ggml_backend_reg_t ggml_backend_virtgpu_reg() { return ® } -GGML_BACKEND_DL_IMPL(ggml_backend_virtgpu_reg) - // public function, not exposed in the GGML interface at the moment -void ggml_virtgpu_cleanup(virtgpu *gpu) { +void ggml_virtgpu_cleanup(virtgpu * gpu) { if (gpu->cached_device_info.name) { free(gpu->cached_device_info.name); gpu->cached_device_info.name = NULL; @@ -190,3 +188,5 @@ void ggml_virtgpu_cleanup(virtgpu *gpu) { mtx_destroy(&gpu->data_shmem_mutex); } + +GGML_BACKEND_DL_IMPL(ggml_backend_virtgpu_reg) From e9e9d736bce22f79cc24a11545be413489edba6b Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Fri, 30 Jan 2026 20:38:51 +0100 Subject: [PATCH 10/10] Remove the static variable in apir_device_get_count --- ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp index 627121b9ee..9cfc1ec171 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp @@ -2,11 +2,6 @@ #include "virtgpu-shm.h" int apir_device_get_count(virtgpu * gpu) { - static int32_t dev_count = -1; - if (dev_count != -1) { - return dev_count; - } - apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; @@ -14,6 +9,7 @@ int apir_device_get_count(virtgpu * gpu) { REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT); REMOTE_CALL(gpu, encoder, decoder, ret); + int32_t dev_count = -1; apir_decode_int32_t(decoder, &dev_count); remote_call_finish(gpu, encoder, decoder);