From 88d23ad515e60c64db8df22b3de23cba00d044ab Mon Sep 17 00:00:00 2001 From: Oleksandr Kuvshynov <661042+okuvshynov@users.noreply.github.com> Date: Wed, 28 Jan 2026 06:35:54 -0500 Subject: [PATCH] vulkan: handle device dedup on MacOS + Vega II Duo cards (#19058) Deduplication here relied on the fact that vulkan would return unique UUID for different physical GPUs. It is at the moment not always the case. On Mac Pro 2019 running Mac OS, with 2 Vega II Duo cards (so, 4 GPU total), MotlenVK would assign same UUID to pairs of GPUs, unless they are connected with Infinity Fabric. See more details here: KhronosGroup/MoltenVK#2683. The right way is to fix that in MoltenVK, but until it is fixed, llama.cpp would only recognize 2 of 4 GPUs in such configuration. The deduplication logic here is changed to only filter GPUs if UUID is same but driver is different. --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index b5e5dba95f..514f290d09 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -5522,22 +5522,32 @@ static void ggml_vk_instance_init() { if ((new_props.properties.deviceType == vk::PhysicalDeviceType::eDiscreteGpu || new_props.properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu) && ggml_vk_device_is_supported(devices[i])) { // Check if there are two physical devices corresponding to the same GPU + // This handles the case where the same GPU appears with different drivers (e.g., RADV + AMDVLK on Linux), + // see https://github.com/ggml-org/llama.cpp/pull/7582 for original deduplication. + // However, for MoltenVK on macOS, multiple GPUs on the same card may report the same UUID, + // see https://github.com/KhronosGroup/MoltenVK/issues/2683. Until this is fixed, we'll only deduplicate + // when drivers differ (same driver + same UUID = likely different GPUs) auto old_device = std::find_if( vk_instance.device_indices.begin(), vk_instance.device_indices.end(), - [&devices, &new_id](const size_t k){ + [&devices, &new_id, &new_driver](const size_t k){ vk::PhysicalDeviceProperties2 old_props; + vk::PhysicalDeviceDriverProperties old_driver; vk::PhysicalDeviceIDProperties old_id; - old_props.pNext = &old_id; + old_props.pNext = &old_driver; + old_driver.pNext = &old_id; devices[k].getProperties2(&old_props); - bool equals = std::equal(std::begin(old_id.deviceUUID), std::end(old_id.deviceUUID), std::begin(new_id.deviceUUID)); - equals = equals || ( + bool same_uuid = std::equal(std::begin(old_id.deviceUUID), std::end(old_id.deviceUUID), std::begin(new_id.deviceUUID)); + same_uuid = same_uuid || ( old_id.deviceLUIDValid && new_id.deviceLUIDValid && std::equal(std::begin(old_id.deviceLUID), std::end(old_id.deviceLUID), std::begin(new_id.deviceLUID)) ); - return equals; + // Only deduplicate if same UUID AND different drivers + // (same driver + same UUID on MoltenVK = likely different GPUs on multi-GPU card) + bool different_driver = (old_driver.driverID != new_driver.driverID); + return same_uuid && different_driver; } ); if (old_device == vk_instance.device_indices.end()) {