From 6ab0d6496074f51644def999f94686d1d939785e Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Fri, 5 Dec 2025 14:21:57 -0600 Subject: [PATCH] vulkan: enable mmvq for q2_k on NVIDIA (#17675) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 97b0fd742e..c8f0449a94 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -6948,6 +6948,10 @@ static bool ggml_vk_should_use_mmvq(const vk_device& device, uint32_t m, uint32_ // Quantization overhead is not worth it for small k switch (device->vendor_id) { case VK_VENDOR_ID_NVIDIA: + if (src0_type == GGML_TYPE_Q2_K) { + return true; + } + if (k <= 4096) { return false; }