From b34ab0fb13003927c5b6ac5a590e1fb565bd7b45 Mon Sep 17 00:00:00 2001
From: Simon Redman <simon@ergotech.com>
Date: Sun, 1 Feb 2026 15:40:42 -0500
Subject: [PATCH 1/5] Add test for ggml_vk_buffer_from_host_ptr

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 89 ++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index a99375c088..b59ff6ba1b 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -4484,6 +4484,8 @@ static void ggml_vk_load_shaders(vk_device& device) {
 
 static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch);
 
+static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device);
+
 static vk_device ggml_vk_get_device(size_t idx) {
     VK_LOG_DEBUG("ggml_vk_get_device(" << idx << ")");
 
@@ -5243,6 +5245,10 @@ static vk_device ggml_vk_get_device(size_t idx) {
         return device;
     }
 
+    #if defined(GGML_VULKAN_RUN_TESTS)
+    ggml_vk_test_single_device_buffer_from_host_ptr(vk_instance.devices[idx]);
+    #endif
+
     return vk_instance.devices[idx];
 }
 
@@ -15068,6 +15074,89 @@ static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, si
     return buf;
 }
 
+static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device) {
+    static std::vector<vk_device> tested_devices;
+    if (std::find(tested_devices.begin(), tested_devices.end(), device) == tested_devices.end()) {
+        tested_devices.push_back(device);
+    } else{
+        return; // Already tested this device, skip
+    }
+
+    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(" << device->name << ") - Single-device external host memory test");
+
+    const size_t test_buffer_size = 1024 * 1024; // 1 MB
+    const size_t test_buffer_ne = test_buffer_size / sizeof(float);
+
+    // Get required alignment for external memory
+    const vk::DeviceSize min_alignment = device->physical_device.getProperties2<vk::PhysicalDeviceProperties2, vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>().get<vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>().minImportedHostPointerAlignment;
+
+    // Allocate aligned host memory using aligned_alloc (C11/POSIX)
+    // Note: aligned_alloc requires size to be a multiple of alignment
+    const size_t aligned_size = ((test_buffer_size + min_alignment - 1) / min_alignment) * min_alignment;
+
+    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory: min_alignment = " << min_alignment << ", aligned_size = " << aligned_size);
+
+    // Initialize host memory, which we will use as input to the device
+    auto input_deleter = [](float* p) { free(p); };
+    std::unique_ptr<float[], decltype(input_deleter)> input_host_memory(
+        static_cast<float*>(aligned_alloc(min_alignment, aligned_size)),
+        input_deleter
+    );
+    for (size_t i = 0; i < test_buffer_ne; i++) {
+        input_host_memory[i] = static_cast<float>(i);
+    }
+
+    // Initialize destination host memory, which we will use to verify results
+    auto result_deleter = [](float* p) { free(p); };
+    std::unique_ptr<float[], decltype(result_deleter)> result_host_memory(
+        static_cast<float*>(aligned_alloc(min_alignment, aligned_size)),
+        result_deleter
+    );
+    for (size_t i = 0; i < test_buffer_ne; i++) {
+        result_host_memory[i] = 0.0f; // Ensure we know the original contents
+    }
+
+    // Allocate an operation on-device destination buffer
+    vk_buffer device_dst_buffer = ggml_vk_create_buffer_device(device, test_buffer_size);
+
+    // Allocate an external memory buffer on device, importing the host memory
+    vk_buffer external_source_buffer = ggml_vk_buffer_from_host_ptr(device, input_host_memory.get(), test_buffer_size);
+
+    // Allocate an external memory buffer on device for the result
+    vk_buffer external_result_buffer = ggml_vk_buffer_from_host_ptr(device, result_host_memory.get(), test_buffer_size);
+
+    vk_context subctx = ggml_vk_create_temporary_context(device->transfer_queue.cmd_pool);
+    ggml_vk_ctx_begin(device, subctx);
+
+    // Submit an operation which we can observe accessing data from the source and moving to dst.
+    // For simplicity, we use a copy operation here, but this could be any compute operation (SUM, MUL_MAT, etc.).
+    ggml_vk_buffer_copy_async(subctx, device_dst_buffer, 0, external_source_buffer, 0, test_buffer_size);
+
+    // Copy from on-device dst to host memory
+    // This is really intended to be a copy, as if we had done some compute on the device and now want to read back the results.
+    ggml_vk_buffer_copy_async(subctx, external_result_buffer, 0, device_dst_buffer, 0, test_buffer_size);
+
+    // Submit and wait for completion
+    ggml_vk_ctx_end(subctx);
+    vk::Fence fence = device->device.createFence({});
+    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): Submitting and waiting for device.");
+    ggml_vk_submit(subctx, fence);
+    VK_CHECK(device->device.waitForFences(fence, VK_TRUE, UINT64_MAX), "Failed to wait for fence");
+    device->device.destroyFence(fence);
+
+    for (size_t i = 0; i < test_buffer_ne; i++) {
+        if (result_host_memory[i] != input_host_memory[i]) {
+            VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): External host memory test failed at index " << std::dec << i << ": expected " << input_host_memory[i] << ", got " << result_host_memory[i]);
+            GGML_ABORT("External host memory test failed");
+        }
+    }
+
+    ggml_vk_destroy_buffer(device_dst_buffer);
+    ggml_vk_destroy_buffer(external_source_buffer);
+    ggml_vk_destroy_buffer(external_result_buffer);
+    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory() - Passed single-device, single-buffer external host memory test.");
+}
+
 static ggml_backend_buffer_t ggml_backend_vk_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
     VK_LOG_DEBUG("ggml_backend_vk_device_buffer_from_host_ptr(backend=" << dev << ", ptr=" << ptr << ", size=" << size << ")");
     GGML_UNUSED(max_tensor_size);

From 47b5f0224b607c48daf3adfd8831c35397b68171 Mon Sep 17 00:00:00 2001
From: Simon Redman <simon@ergotech.com>
Date: Mon, 16 Feb 2026 14:28:10 -0500
Subject: [PATCH 2/5] Move ggml_vk_test_single_device_buffer_from_host_ptr into
 existing GGML_VULKAN_RUN_TESTS block

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 168 ++++++++++++++-------------
 1 file changed, 85 insertions(+), 83 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index b59ff6ba1b..fa59ffc8e2 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -4484,7 +4484,9 @@ static void ggml_vk_load_shaders(vk_device& device) {
 
 static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch);
 
+#if defined(GGML_VULKAN_RUN_TESTS)
 static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device);
+#endif
 
 static vk_device ggml_vk_get_device(size_t idx) {
     VK_LOG_DEBUG("ggml_vk_get_device(" << idx << ")");
@@ -12101,6 +12103,89 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
     free(d);
     free(d_chk);
 }
+
+static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device) {
+    static std::vector<vk_device> tested_devices;
+    if (std::find(tested_devices.begin(), tested_devices.end(), device) == tested_devices.end()) {
+        tested_devices.push_back(device);
+    } else{
+        return; // Already tested this device, skip
+    }
+
+    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(" << device->name << ") - Single-device external host memory test");
+
+    const size_t test_buffer_size = 1024 * 1024; // 1 MB
+    const size_t test_buffer_ne = test_buffer_size / sizeof(float);
+
+    // Get required alignment for external memory
+    const vk::DeviceSize min_alignment = device->physical_device.getProperties2<vk::PhysicalDeviceProperties2, vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>().get<vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>().minImportedHostPointerAlignment;
+
+    // Allocate aligned host memory using aligned_alloc (C11/POSIX)
+    // Note: aligned_alloc requires size to be a multiple of alignment
+    const size_t aligned_size = ((test_buffer_size + min_alignment - 1) / min_alignment) * min_alignment;
+
+    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory: min_alignment = " << min_alignment << ", aligned_size = " << aligned_size);
+
+    // Initialize host memory, which we will use as input to the device
+    auto input_deleter = [](float* p) { free(p); };
+    std::unique_ptr<float[], decltype(input_deleter)> input_host_memory(
+        static_cast<float*>(aligned_alloc(min_alignment, aligned_size)),
+        input_deleter
+    );
+    for (size_t i = 0; i < test_buffer_ne; i++) {
+        input_host_memory[i] = static_cast<float>(i);
+    }
+
+    // Initialize destination host memory, which we will use to verify results
+    auto result_deleter = [](float* p) { free(p); };
+    std::unique_ptr<float[], decltype(result_deleter)> result_host_memory(
+        static_cast<float*>(aligned_alloc(min_alignment, aligned_size)),
+        result_deleter
+    );
+    for (size_t i = 0; i < test_buffer_ne; i++) {
+        result_host_memory[i] = 0.0f; // Ensure we know the original contents
+    }
+
+    // Allocate an operation on-device destination buffer
+    vk_buffer device_dst_buffer = ggml_vk_create_buffer_device(device, test_buffer_size);
+
+    // Allocate an external memory buffer on device, importing the host memory
+    vk_buffer external_source_buffer = ggml_vk_buffer_from_host_ptr(device, input_host_memory.get(), test_buffer_size);
+
+    // Allocate an external memory buffer on device for the result
+    vk_buffer external_result_buffer = ggml_vk_buffer_from_host_ptr(device, result_host_memory.get(), test_buffer_size);
+
+    vk_context subctx = ggml_vk_create_temporary_context(device->transfer_queue.cmd_pool);
+    ggml_vk_ctx_begin(device, subctx);
+
+    // Submit an operation which we can observe accessing data from the source and moving to dst.
+    // For simplicity, we use a copy operation here, but this could be any compute operation (SUM, MUL_MAT, etc.).
+    ggml_vk_buffer_copy_async(subctx, device_dst_buffer, 0, external_source_buffer, 0, test_buffer_size);
+
+    // Copy from on-device dst to host memory
+    // This is really intended to be a copy, as if we had done some compute on the device and now want to read back the results.
+    ggml_vk_buffer_copy_async(subctx, external_result_buffer, 0, device_dst_buffer, 0, test_buffer_size);
+
+    // Submit and wait for completion
+    ggml_vk_ctx_end(subctx);
+    vk::Fence fence = device->device.createFence({});
+    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): Submitting and waiting for device.");
+    ggml_vk_submit(subctx, fence);
+    VK_CHECK(device->device.waitForFences(fence, VK_TRUE, UINT64_MAX), "Failed to wait for fence");
+    device->device.destroyFence(fence);
+
+    for (size_t i = 0; i < test_buffer_ne; i++) {
+        if (result_host_memory[i] != input_host_memory[i]) {
+            VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): External host memory test failed at index " << std::dec << i << ": expected " << input_host_memory[i] << ", got " << result_host_memory[i]);
+            GGML_ABORT("External host memory test failed");
+        }
+    }
+
+    ggml_vk_destroy_buffer(device_dst_buffer);
+    ggml_vk_destroy_buffer(external_source_buffer);
+    ggml_vk_destroy_buffer(external_result_buffer);
+    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory() - Passed single-device, single-buffer external host memory test.");
+}
 #endif
 
 static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx, vk_context subctx) {
@@ -15074,89 +15159,6 @@ static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, si
     return buf;
 }
 
-static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device) {
-    static std::vector<vk_device> tested_devices;
-    if (std::find(tested_devices.begin(), tested_devices.end(), device) == tested_devices.end()) {
-        tested_devices.push_back(device);
-    } else{
-        return; // Already tested this device, skip
-    }
-
-    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(" << device->name << ") - Single-device external host memory test");
-
-    const size_t test_buffer_size = 1024 * 1024; // 1 MB
-    const size_t test_buffer_ne = test_buffer_size / sizeof(float);
-
-    // Get required alignment for external memory
-    const vk::DeviceSize min_alignment = device->physical_device.getProperties2<vk::PhysicalDeviceProperties2, vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>().get<vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>().minImportedHostPointerAlignment;
-
-    // Allocate aligned host memory using aligned_alloc (C11/POSIX)
-    // Note: aligned_alloc requires size to be a multiple of alignment
-    const size_t aligned_size = ((test_buffer_size + min_alignment - 1) / min_alignment) * min_alignment;
-
-    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory: min_alignment = " << min_alignment << ", aligned_size = " << aligned_size);
-
-    // Initialize host memory, which we will use as input to the device
-    auto input_deleter = [](float* p) { free(p); };
-    std::unique_ptr<float[], decltype(input_deleter)> input_host_memory(
-        static_cast<float*>(aligned_alloc(min_alignment, aligned_size)),
-        input_deleter
-    );
-    for (size_t i = 0; i < test_buffer_ne; i++) {
-        input_host_memory[i] = static_cast<float>(i);
-    }
-
-    // Initialize destination host memory, which we will use to verify results
-    auto result_deleter = [](float* p) { free(p); };
-    std::unique_ptr<float[], decltype(result_deleter)> result_host_memory(
-        static_cast<float*>(aligned_alloc(min_alignment, aligned_size)),
-        result_deleter
-    );
-    for (size_t i = 0; i < test_buffer_ne; i++) {
-        result_host_memory[i] = 0.0f; // Ensure we know the original contents
-    }
-
-    // Allocate an operation on-device destination buffer
-    vk_buffer device_dst_buffer = ggml_vk_create_buffer_device(device, test_buffer_size);
-
-    // Allocate an external memory buffer on device, importing the host memory
-    vk_buffer external_source_buffer = ggml_vk_buffer_from_host_ptr(device, input_host_memory.get(), test_buffer_size);
-
-    // Allocate an external memory buffer on device for the result
-    vk_buffer external_result_buffer = ggml_vk_buffer_from_host_ptr(device, result_host_memory.get(), test_buffer_size);
-
-    vk_context subctx = ggml_vk_create_temporary_context(device->transfer_queue.cmd_pool);
-    ggml_vk_ctx_begin(device, subctx);
-
-    // Submit an operation which we can observe accessing data from the source and moving to dst.
-    // For simplicity, we use a copy operation here, but this could be any compute operation (SUM, MUL_MAT, etc.).
-    ggml_vk_buffer_copy_async(subctx, device_dst_buffer, 0, external_source_buffer, 0, test_buffer_size);
-
-    // Copy from on-device dst to host memory
-    // This is really intended to be a copy, as if we had done some compute on the device and now want to read back the results.
-    ggml_vk_buffer_copy_async(subctx, external_result_buffer, 0, device_dst_buffer, 0, test_buffer_size);
-
-    // Submit and wait for completion
-    ggml_vk_ctx_end(subctx);
-    vk::Fence fence = device->device.createFence({});
-    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): Submitting and waiting for device.");
-    ggml_vk_submit(subctx, fence);
-    VK_CHECK(device->device.waitForFences(fence, VK_TRUE, UINT64_MAX), "Failed to wait for fence");
-    device->device.destroyFence(fence);
-
-    for (size_t i = 0; i < test_buffer_ne; i++) {
-        if (result_host_memory[i] != input_host_memory[i]) {
-            VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): External host memory test failed at index " << std::dec << i << ": expected " << input_host_memory[i] << ", got " << result_host_memory[i]);
-            GGML_ABORT("External host memory test failed");
-        }
-    }
-
-    ggml_vk_destroy_buffer(device_dst_buffer);
-    ggml_vk_destroy_buffer(external_source_buffer);
-    ggml_vk_destroy_buffer(external_result_buffer);
-    VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory() - Passed single-device, single-buffer external host memory test.");
-}
-
 static ggml_backend_buffer_t ggml_backend_vk_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
     VK_LOG_DEBUG("ggml_backend_vk_device_buffer_from_host_ptr(backend=" << dev << ", ptr=" << ptr << ", size=" << size << ")");
     GGML_UNUSED(max_tensor_size);

From 60ebeb28a4bee0fbee175a24d0e0d76cf7d3c9a8 Mon Sep 17 00:00:00 2001
From: Simon Redman <simon@ergotech.com>
Date: Mon, 16 Feb 2026 14:35:08 -0500
Subject: [PATCH 3/5] Remove ggml_vk_test_single_device_buffer_from_host_ptr
 forward declaration, move callsite to ggml_vk_buffer_from_host_ptr

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index fa59ffc8e2..b3a93775a1 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -4484,10 +4484,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
 
 static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch);
 
-#if defined(GGML_VULKAN_RUN_TESTS)
-static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device);
-#endif
-
 static vk_device ggml_vk_get_device(size_t idx) {
     VK_LOG_DEBUG("ggml_vk_get_device(" << idx << ")");
 
@@ -5247,10 +5243,6 @@ static vk_device ggml_vk_get_device(size_t idx) {
         return device;
     }
 
-    #if defined(GGML_VULKAN_RUN_TESTS)
-    ggml_vk_test_single_device_buffer_from_host_ptr(vk_instance.devices[idx]);
-    #endif
-
     return vk_instance.devices[idx];
 }
 
@@ -15135,6 +15127,10 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm
 }
 
 static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, size_t size) {
+    #if defined(GGML_VULKAN_RUN_TESTS)
+    ggml_vk_test_single_device_buffer_from_host_ptr(vk_instance.devices[idx]);
+    #endif
+
     if (!device->external_memory_host) {
         return {};
     }

From fe438c6fdd122f8879b1d995497a63e839aef0d6 Mon Sep 17 00:00:00 2001
From: Simon Redman <simon@ergotech.com>
Date: Mon, 16 Feb 2026 14:55:52 -0500
Subject: [PATCH 4/5] Predeclare ggml_vk_buffer_from_host_ptr for tests

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index b3a93775a1..40ec9eb8b9 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -1569,6 +1569,7 @@ struct ggml_vk_garbage_collector {
 static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx, vk_context subctx);
 static void ggml_vk_load_shaders(vk_device& device);
 static void ggml_pipeline_allocate_descriptor_sets(ggml_backend_vk_context * ctx);
+static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, size_t size);
 
 static bool vk_memory_logger_enabled = false;
 

From 40f23b341c072684893bfbeb6510d1376c8e1667 Mon Sep 17 00:00:00 2001
From: Simon Redman <simon@ergotech.com>
Date: Mon, 16 Feb 2026 14:56:08 -0500
Subject: [PATCH 5/5] Move ggml_vk_test_single_device_buffer_from_host_ptr call
 site to other test call location

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 40ec9eb8b9..c1f96a875f 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -12268,6 +12268,14 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx, vk_contex
         }
     }
 
+    for (size_t device_idx = 0; device_idx < GGML_VK_MAX_DEVICES; device_idx++) {
+        vk_device device = vk_instance.devices[device_idx];
+        if (device == nullptr) {
+            continue;
+        }
+        ggml_vk_test_single_device_buffer_from_host_ptr(device);
+    }
+
     GGML_ABORT("fatal error");
 #endif
 
@@ -15128,10 +15136,6 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm
 }
 
 static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, size_t size) {
-    #if defined(GGML_VULKAN_RUN_TESTS)
-    ggml_vk_test_single_device_buffer_from_host_ptr(vk_instance.devices[idx]);
-    #endif
-
     if (!device->external_memory_host) {
         return {};
     }