From b34ab0fb13003927c5b6ac5a590e1fb565bd7b45 Mon Sep 17 00:00:00 2001 From: Simon Redman Date: Sun, 1 Feb 2026 15:40:42 -0500 Subject: [PATCH 1/5] Add test for ggml_vk_buffer_from_host_ptr --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 89 ++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index a99375c088..b59ff6ba1b 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -4484,6 +4484,8 @@ static void ggml_vk_load_shaders(vk_device& device) { static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch); +static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device); + static vk_device ggml_vk_get_device(size_t idx) { VK_LOG_DEBUG("ggml_vk_get_device(" << idx << ")"); @@ -5243,6 +5245,10 @@ static vk_device ggml_vk_get_device(size_t idx) { return device; } + #if defined(GGML_VULKAN_RUN_TESTS) + ggml_vk_test_single_device_buffer_from_host_ptr(vk_instance.devices[idx]); + #endif + return vk_instance.devices[idx]; } @@ -15068,6 +15074,89 @@ static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, si return buf; } +static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device) { + static std::vector tested_devices; + if (std::find(tested_devices.begin(), tested_devices.end(), device) == tested_devices.end()) { + tested_devices.push_back(device); + } else{ + return; // Already tested this device, skip + } + + VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(" << device->name << ") - Single-device external host memory test"); + + const size_t test_buffer_size = 1024 * 1024; // 1 MB + const size_t test_buffer_ne = test_buffer_size / sizeof(float); + + // Get required alignment for external memory + const vk::DeviceSize min_alignment = device->physical_device.getProperties2().get().minImportedHostPointerAlignment; + + // Allocate aligned host memory using aligned_alloc (C11/POSIX) + // Note: aligned_alloc requires size to be a multiple of alignment + const size_t aligned_size = ((test_buffer_size + min_alignment - 1) / min_alignment) * min_alignment; + + VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory: min_alignment = " << min_alignment << ", aligned_size = " << aligned_size); + + // Initialize host memory, which we will use as input to the device + auto input_deleter = [](float* p) { free(p); }; + std::unique_ptr input_host_memory( + static_cast(aligned_alloc(min_alignment, aligned_size)), + input_deleter + ); + for (size_t i = 0; i < test_buffer_ne; i++) { + input_host_memory[i] = static_cast(i); + } + + // Initialize destination host memory, which we will use to verify results + auto result_deleter = [](float* p) { free(p); }; + std::unique_ptr result_host_memory( + static_cast(aligned_alloc(min_alignment, aligned_size)), + result_deleter + ); + for (size_t i = 0; i < test_buffer_ne; i++) { + result_host_memory[i] = 0.0f; // Ensure we know the original contents + } + + // Allocate an operation on-device destination buffer + vk_buffer device_dst_buffer = ggml_vk_create_buffer_device(device, test_buffer_size); + + // Allocate an external memory buffer on device, importing the host memory + vk_buffer external_source_buffer = ggml_vk_buffer_from_host_ptr(device, input_host_memory.get(), test_buffer_size); + + // Allocate an external memory buffer on device for the result + vk_buffer external_result_buffer = ggml_vk_buffer_from_host_ptr(device, result_host_memory.get(), test_buffer_size); + + vk_context subctx = ggml_vk_create_temporary_context(device->transfer_queue.cmd_pool); + ggml_vk_ctx_begin(device, subctx); + + // Submit an operation which we can observe accessing data from the source and moving to dst. + // For simplicity, we use a copy operation here, but this could be any compute operation (SUM, MUL_MAT, etc.). + ggml_vk_buffer_copy_async(subctx, device_dst_buffer, 0, external_source_buffer, 0, test_buffer_size); + + // Copy from on-device dst to host memory + // This is really intended to be a copy, as if we had done some compute on the device and now want to read back the results. + ggml_vk_buffer_copy_async(subctx, external_result_buffer, 0, device_dst_buffer, 0, test_buffer_size); + + // Submit and wait for completion + ggml_vk_ctx_end(subctx); + vk::Fence fence = device->device.createFence({}); + VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): Submitting and waiting for device."); + ggml_vk_submit(subctx, fence); + VK_CHECK(device->device.waitForFences(fence, VK_TRUE, UINT64_MAX), "Failed to wait for fence"); + device->device.destroyFence(fence); + + for (size_t i = 0; i < test_buffer_ne; i++) { + if (result_host_memory[i] != input_host_memory[i]) { + VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): External host memory test failed at index " << std::dec << i << ": expected " << input_host_memory[i] << ", got " << result_host_memory[i]); + GGML_ABORT("External host memory test failed"); + } + } + + ggml_vk_destroy_buffer(device_dst_buffer); + ggml_vk_destroy_buffer(external_source_buffer); + ggml_vk_destroy_buffer(external_result_buffer); + VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory() - Passed single-device, single-buffer external host memory test."); +} + static ggml_backend_buffer_t ggml_backend_vk_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) { VK_LOG_DEBUG("ggml_backend_vk_device_buffer_from_host_ptr(backend=" << dev << ", ptr=" << ptr << ", size=" << size << ")"); GGML_UNUSED(max_tensor_size); From 47b5f0224b607c48daf3adfd8831c35397b68171 Mon Sep 17 00:00:00 2001 From: Simon Redman Date: Mon, 16 Feb 2026 14:28:10 -0500 Subject: [PATCH 2/5] Move ggml_vk_test_single_device_buffer_from_host_ptr into existing GGML_VULKAN_RUN_TESTS block --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 168 ++++++++++++++------------- 1 file changed, 85 insertions(+), 83 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index b59ff6ba1b..fa59ffc8e2 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -4484,7 +4484,9 @@ static void ggml_vk_load_shaders(vk_device& device) { static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch); +#if defined(GGML_VULKAN_RUN_TESTS) static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device); +#endif static vk_device ggml_vk_get_device(size_t idx) { VK_LOG_DEBUG("ggml_vk_get_device(" << idx << ")"); @@ -12101,6 +12103,89 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m, free(d); free(d_chk); } + +static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device) { + static std::vector tested_devices; + if (std::find(tested_devices.begin(), tested_devices.end(), device) == tested_devices.end()) { + tested_devices.push_back(device); + } else{ + return; // Already tested this device, skip + } + + VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(" << device->name << ") - Single-device external host memory test"); + + const size_t test_buffer_size = 1024 * 1024; // 1 MB + const size_t test_buffer_ne = test_buffer_size / sizeof(float); + + // Get required alignment for external memory + const vk::DeviceSize min_alignment = device->physical_device.getProperties2().get().minImportedHostPointerAlignment; + + // Allocate aligned host memory using aligned_alloc (C11/POSIX) + // Note: aligned_alloc requires size to be a multiple of alignment + const size_t aligned_size = ((test_buffer_size + min_alignment - 1) / min_alignment) * min_alignment; + + VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory: min_alignment = " << min_alignment << ", aligned_size = " << aligned_size); + + // Initialize host memory, which we will use as input to the device + auto input_deleter = [](float* p) { free(p); }; + std::unique_ptr input_host_memory( + static_cast(aligned_alloc(min_alignment, aligned_size)), + input_deleter + ); + for (size_t i = 0; i < test_buffer_ne; i++) { + input_host_memory[i] = static_cast(i); + } + + // Initialize destination host memory, which we will use to verify results + auto result_deleter = [](float* p) { free(p); }; + std::unique_ptr result_host_memory( + static_cast(aligned_alloc(min_alignment, aligned_size)), + result_deleter + ); + for (size_t i = 0; i < test_buffer_ne; i++) { + result_host_memory[i] = 0.0f; // Ensure we know the original contents + } + + // Allocate an operation on-device destination buffer + vk_buffer device_dst_buffer = ggml_vk_create_buffer_device(device, test_buffer_size); + + // Allocate an external memory buffer on device, importing the host memory + vk_buffer external_source_buffer = ggml_vk_buffer_from_host_ptr(device, input_host_memory.get(), test_buffer_size); + + // Allocate an external memory buffer on device for the result + vk_buffer external_result_buffer = ggml_vk_buffer_from_host_ptr(device, result_host_memory.get(), test_buffer_size); + + vk_context subctx = ggml_vk_create_temporary_context(device->transfer_queue.cmd_pool); + ggml_vk_ctx_begin(device, subctx); + + // Submit an operation which we can observe accessing data from the source and moving to dst. + // For simplicity, we use a copy operation here, but this could be any compute operation (SUM, MUL_MAT, etc.). + ggml_vk_buffer_copy_async(subctx, device_dst_buffer, 0, external_source_buffer, 0, test_buffer_size); + + // Copy from on-device dst to host memory + // This is really intended to be a copy, as if we had done some compute on the device and now want to read back the results. + ggml_vk_buffer_copy_async(subctx, external_result_buffer, 0, device_dst_buffer, 0, test_buffer_size); + + // Submit and wait for completion + ggml_vk_ctx_end(subctx); + vk::Fence fence = device->device.createFence({}); + VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): Submitting and waiting for device."); + ggml_vk_submit(subctx, fence); + VK_CHECK(device->device.waitForFences(fence, VK_TRUE, UINT64_MAX), "Failed to wait for fence"); + device->device.destroyFence(fence); + + for (size_t i = 0; i < test_buffer_ne; i++) { + if (result_host_memory[i] != input_host_memory[i]) { + VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): External host memory test failed at index " << std::dec << i << ": expected " << input_host_memory[i] << ", got " << result_host_memory[i]); + GGML_ABORT("External host memory test failed"); + } + } + + ggml_vk_destroy_buffer(device_dst_buffer); + ggml_vk_destroy_buffer(external_source_buffer); + ggml_vk_destroy_buffer(external_result_buffer); + VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory() - Passed single-device, single-buffer external host memory test."); +} #endif static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx, vk_context subctx) { @@ -15074,89 +15159,6 @@ static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, si return buf; } -static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device) { - static std::vector tested_devices; - if (std::find(tested_devices.begin(), tested_devices.end(), device) == tested_devices.end()) { - tested_devices.push_back(device); - } else{ - return; // Already tested this device, skip - } - - VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(" << device->name << ") - Single-device external host memory test"); - - const size_t test_buffer_size = 1024 * 1024; // 1 MB - const size_t test_buffer_ne = test_buffer_size / sizeof(float); - - // Get required alignment for external memory - const vk::DeviceSize min_alignment = device->physical_device.getProperties2().get().minImportedHostPointerAlignment; - - // Allocate aligned host memory using aligned_alloc (C11/POSIX) - // Note: aligned_alloc requires size to be a multiple of alignment - const size_t aligned_size = ((test_buffer_size + min_alignment - 1) / min_alignment) * min_alignment; - - VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory: min_alignment = " << min_alignment << ", aligned_size = " << aligned_size); - - // Initialize host memory, which we will use as input to the device - auto input_deleter = [](float* p) { free(p); }; - std::unique_ptr input_host_memory( - static_cast(aligned_alloc(min_alignment, aligned_size)), - input_deleter - ); - for (size_t i = 0; i < test_buffer_ne; i++) { - input_host_memory[i] = static_cast(i); - } - - // Initialize destination host memory, which we will use to verify results - auto result_deleter = [](float* p) { free(p); }; - std::unique_ptr result_host_memory( - static_cast(aligned_alloc(min_alignment, aligned_size)), - result_deleter - ); - for (size_t i = 0; i < test_buffer_ne; i++) { - result_host_memory[i] = 0.0f; // Ensure we know the original contents - } - - // Allocate an operation on-device destination buffer - vk_buffer device_dst_buffer = ggml_vk_create_buffer_device(device, test_buffer_size); - - // Allocate an external memory buffer on device, importing the host memory - vk_buffer external_source_buffer = ggml_vk_buffer_from_host_ptr(device, input_host_memory.get(), test_buffer_size); - - // Allocate an external memory buffer on device for the result - vk_buffer external_result_buffer = ggml_vk_buffer_from_host_ptr(device, result_host_memory.get(), test_buffer_size); - - vk_context subctx = ggml_vk_create_temporary_context(device->transfer_queue.cmd_pool); - ggml_vk_ctx_begin(device, subctx); - - // Submit an operation which we can observe accessing data from the source and moving to dst. - // For simplicity, we use a copy operation here, but this could be any compute operation (SUM, MUL_MAT, etc.). - ggml_vk_buffer_copy_async(subctx, device_dst_buffer, 0, external_source_buffer, 0, test_buffer_size); - - // Copy from on-device dst to host memory - // This is really intended to be a copy, as if we had done some compute on the device and now want to read back the results. - ggml_vk_buffer_copy_async(subctx, external_result_buffer, 0, device_dst_buffer, 0, test_buffer_size); - - // Submit and wait for completion - ggml_vk_ctx_end(subctx); - vk::Fence fence = device->device.createFence({}); - VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): Submitting and waiting for device."); - ggml_vk_submit(subctx, fence); - VK_CHECK(device->device.waitForFences(fence, VK_TRUE, UINT64_MAX), "Failed to wait for fence"); - device->device.destroyFence(fence); - - for (size_t i = 0; i < test_buffer_ne; i++) { - if (result_host_memory[i] != input_host_memory[i]) { - VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): External host memory test failed at index " << std::dec << i << ": expected " << input_host_memory[i] << ", got " << result_host_memory[i]); - GGML_ABORT("External host memory test failed"); - } - } - - ggml_vk_destroy_buffer(device_dst_buffer); - ggml_vk_destroy_buffer(external_source_buffer); - ggml_vk_destroy_buffer(external_result_buffer); - VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory() - Passed single-device, single-buffer external host memory test."); -} - static ggml_backend_buffer_t ggml_backend_vk_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) { VK_LOG_DEBUG("ggml_backend_vk_device_buffer_from_host_ptr(backend=" << dev << ", ptr=" << ptr << ", size=" << size << ")"); GGML_UNUSED(max_tensor_size); From 60ebeb28a4bee0fbee175a24d0e0d76cf7d3c9a8 Mon Sep 17 00:00:00 2001 From: Simon Redman Date: Mon, 16 Feb 2026 14:35:08 -0500 Subject: [PATCH 3/5] Remove ggml_vk_test_single_device_buffer_from_host_ptr forward declaration, move callsite to ggml_vk_buffer_from_host_ptr --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index fa59ffc8e2..b3a93775a1 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -4484,10 +4484,6 @@ static void ggml_vk_load_shaders(vk_device& device) { static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch); -#if defined(GGML_VULKAN_RUN_TESTS) -static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device); -#endif - static vk_device ggml_vk_get_device(size_t idx) { VK_LOG_DEBUG("ggml_vk_get_device(" << idx << ")"); @@ -5247,10 +5243,6 @@ static vk_device ggml_vk_get_device(size_t idx) { return device; } - #if defined(GGML_VULKAN_RUN_TESTS) - ggml_vk_test_single_device_buffer_from_host_ptr(vk_instance.devices[idx]); - #endif - return vk_instance.devices[idx]; } @@ -15135,6 +15127,10 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm } static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, size_t size) { + #if defined(GGML_VULKAN_RUN_TESTS) + ggml_vk_test_single_device_buffer_from_host_ptr(vk_instance.devices[idx]); + #endif + if (!device->external_memory_host) { return {}; } From fe438c6fdd122f8879b1d995497a63e839aef0d6 Mon Sep 17 00:00:00 2001 From: Simon Redman Date: Mon, 16 Feb 2026 14:55:52 -0500 Subject: [PATCH 4/5] Predeclare ggml_vk_buffer_from_host_ptr for tests --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index b3a93775a1..40ec9eb8b9 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -1569,6 +1569,7 @@ struct ggml_vk_garbage_collector { static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx, vk_context subctx); static void ggml_vk_load_shaders(vk_device& device); static void ggml_pipeline_allocate_descriptor_sets(ggml_backend_vk_context * ctx); +static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, size_t size); static bool vk_memory_logger_enabled = false; From 40f23b341c072684893bfbeb6510d1376c8e1667 Mon Sep 17 00:00:00 2001 From: Simon Redman Date: Mon, 16 Feb 2026 14:56:08 -0500 Subject: [PATCH 5/5] Move ggml_vk_test_single_device_buffer_from_host_ptr call site to other test call location --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 40ec9eb8b9..c1f96a875f 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -12268,6 +12268,14 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx, vk_contex } } + for (size_t device_idx = 0; device_idx < GGML_VK_MAX_DEVICES; device_idx++) { + vk_device device = vk_instance.devices[device_idx]; + if (device == nullptr) { + continue; + } + ggml_vk_test_single_device_buffer_from_host_ptr(device); + } + GGML_ABORT("fatal error"); #endif @@ -15128,10 +15136,6 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm } static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, size_t size) { - #if defined(GGML_VULKAN_RUN_TESTS) - ggml_vk_test_single_device_buffer_from_host_ptr(vk_instance.devices[idx]); - #endif - if (!device->external_memory_host) { return {}; }