vulkan: enable mmvq for q2_k on NVIDIA (#17675)

This commit is contained in:
Jeff Bolz 2025-12-05 14:21:57 -06:00 committed by GitHub
parent 93bb92664e
commit 6ab0d64960
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 4 additions and 0 deletions

View File

@ -6948,6 +6948,10 @@ static bool ggml_vk_should_use_mmvq(const vk_device& device, uint32_t m, uint32_
// Quantization overhead is not worth it for small k
switch (device->vendor_id) {
case VK_VENDOR_ID_NVIDIA:
if (src0_type == GGML_TYPE_Q2_K) {
return true;
}
if (k <= 4096) {
return false;
}