From 6c48fa46f67b8bb7c8235d94bbe79b924a87d27e Mon Sep 17 00:00:00 2001 From: Ruben Ortlam Date: Sun, 15 Mar 2026 18:07:19 +0100 Subject: [PATCH 1/3] vulkan: avoid graphics queue on non-RADV AMD drivers --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 7092361d2e..2d5f95ea3e 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -4981,8 +4981,8 @@ static vk_device ggml_vk_get_device(size_t idx) { std::vector queue_family_props = device->physical_device.getQueueFamilyProperties(); // Try to find a non-graphics compute queue and transfer-focused queues - // On AMD, the graphics queue seems to be faster, so don't avoid it - const vk::QueueFlagBits graphics_flag = device->vendor_id == VK_VENDOR_ID_AMD ? (vk::QueueFlagBits)0 : vk::QueueFlagBits::eGraphics; + // On AMD GPUs with RADV driver, the graphics queue seems to be faster, so don't avoid it + const vk::QueueFlagBits graphics_flag = (device->vendor_id == VK_VENDOR_ID_AMD && device->driver_id == vk::DriverId::eMesaRadv) ? (vk::QueueFlagBits)0 : vk::QueueFlagBits::eGraphics; const uint32_t compute_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eCompute, graphics_flag, -1, 1); const uint32_t transfer_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eTransfer, vk::QueueFlagBits::eCompute | graphics_flag, compute_queue_family_index, 1); From c68b2428b330deac7e7ca5b0c42b0feeccac6393 Mon Sep 17 00:00:00 2001 From: Ruben Ortlam Date: Sun, 15 Mar 2026 18:08:45 +0100 Subject: [PATCH 2/3] avoid graphics queues on small GPUs --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 2d5f95ea3e..d820e47c27 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -4982,7 +4982,8 @@ static vk_device ggml_vk_get_device(size_t idx) { // Try to find a non-graphics compute queue and transfer-focused queues // On AMD GPUs with RADV driver, the graphics queue seems to be faster, so don't avoid it - const vk::QueueFlagBits graphics_flag = (device->vendor_id == VK_VENDOR_ID_AMD && device->driver_id == vk::DriverId::eMesaRadv) ? (vk::QueueFlagBits)0 : vk::QueueFlagBits::eGraphics; + // Avoid on small GPUs to prevent interfering with graphics tasks + const vk::QueueFlagBits graphics_flag = (device->vendor_id == VK_VENDOR_ID_AMD && device->driver_id == vk::DriverId::eMesaRadv && device->shader_core_count > 16) ? (vk::QueueFlagBits)0 : vk::QueueFlagBits::eGraphics; const uint32_t compute_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eCompute, graphics_flag, -1, 1); const uint32_t transfer_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eTransfer, vk::QueueFlagBits::eCompute | graphics_flag, compute_queue_family_index, 1); From 824aa149ca16361ca3c9b0889eea1f6842420991 Mon Sep 17 00:00:00 2001 From: Ruben Ortlam Date: Sun, 15 Mar 2026 20:34:43 +0100 Subject: [PATCH 3/3] change to only use graphics queue if overridden with env var GGML_VK_ALLOW_GRAPHICS_QUEUE --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index d820e47c27..df8291271c 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -4981,9 +4981,9 @@ static vk_device ggml_vk_get_device(size_t idx) { std::vector queue_family_props = device->physical_device.getQueueFamilyProperties(); // Try to find a non-graphics compute queue and transfer-focused queues - // On AMD GPUs with RADV driver, the graphics queue seems to be faster, so don't avoid it - // Avoid on small GPUs to prevent interfering with graphics tasks - const vk::QueueFlagBits graphics_flag = (device->vendor_id == VK_VENDOR_ID_AMD && device->driver_id == vk::DriverId::eMesaRadv && device->shader_core_count > 16) ? (vk::QueueFlagBits)0 : vk::QueueFlagBits::eGraphics; + // Allow overriding avoiding the graphics queue because it can increase performance on RADV + const bool allow_graphics_queue = (getenv("GGML_VK_ALLOW_GRAPHICS_QUEUE") != nullptr); + const vk::QueueFlagBits graphics_flag = allow_graphics_queue ? (vk::QueueFlagBits)0 : vk::QueueFlagBits::eGraphics; const uint32_t compute_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eCompute, graphics_flag, -1, 1); const uint32_t transfer_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eTransfer, vk::QueueFlagBits::eCompute | graphics_flag, compute_queue_family_index, 1);