only stage through shmem on Nvidia

This commit is contained in:
Ruben Ortlam 2026-02-08 10:41:50 +01:00
parent 8236c453a5
commit d8d536cf98
1 changed files with 1 additions and 1 deletions

View File

@ -3218,7 +3218,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
// Nvidia prefers shared memory use to load large tiles of K.
// Switch to loading from global memory when it would use too much shared memory.
// AMD prefers loading K directly from global memory
const uint32_t k_load_shmem = 1; // device->vendor_id == VK_VENDOR_ID_NVIDIA && hsk < 256 ? 1 : 0;
const uint32_t k_load_shmem = device->vendor_id == VK_VENDOR_ID_NVIDIA && hsk < 256 && hsv < 256 ? 1 : 0;
return {wg_size, rows_cols[0], rows_cols[1], hsk, hsv, clamp, D_split, device->subgroup_size, k_load_shmem, flags};
};