modify warptile tuning for bf16 matmuls to fix regression (m_warptile to l_warptile)

2025-12-30 13:07:54 -07:00 · 2025-12-30 13:07:54 -07:00 · 328116e62f
parent dc147928ea
commit 328116e62f
1 changed files with 7 additions and 0 deletions
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@ -3528,6 +3528,13 @@ static void ggml_vk_load_shaders(vk_device& device) {
        m_wg_denoms = { 64,  64, 1 };
        s_wg_denoms = { 32,  32, 1 };

+        if (device->vendor_id == VK_VENDOR_ID_INTEL) {
+            if (device->architecture == INTEL_XE2) {
+                // Xe2/Xe3 - bf16 warptile performance tuning
+                l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, 4, 4, 1, subgroup_size_8 };
+            }
+        }
+
        CREATE_MM(GGML_TYPE_BF16, pipeline_matmul_bf16, matmul_bf16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3, , 0);
        CREATE_MM(GGML_TYPE_BF16, pipeline_matmul_id_bf16, matmul_id_bf16, , wg_denoms, warptile, vk_mat_mat_id_push_constants, 4, _id, 0);
    }