From bf13638d56a747f3c7e36fe766cbbb0a0bf8f6a7 Mon Sep 17 00:00:00 2001 From: Progeny Alpha Date: Fri, 13 Mar 2026 17:00:15 -0400 Subject: [PATCH] vulkan: enable coopmat chunked GDN output path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lower GDN_CHUNK_THRESHOLD from UINT32_MAX to 2 and prefer the coopmat output pipeline (cm1) when available, falling back to the scalar variant. PP-512: ~206 → ~210 t/s on Radeon 890M (RDNA3.5). --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 383840db7f..d1c470b1c1 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -10406,7 +10406,7 @@ static void ggml_vk_rwkv_wkv7(ggml_backend_vk_context * ctx, vk_context& subctx, } static constexpr uint32_t GDN_CHUNK_SIZE = 64; -static constexpr uint32_t GDN_CHUNK_THRESHOLD = UINT32_MAX; // Disabled +static constexpr uint32_t GDN_CHUNK_THRESHOLD = 2; static void ggml_vk_gated_delta_net(ggml_backend_vk_context * ctx, vk_context& subctx, ggml_tensor * dst) { const ggml_tensor * src_q = dst->src[0]; @@ -10472,7 +10472,9 @@ static void ggml_vk_gated_delta_net(ggml_backend_vk_context * ctx, vk_context& s vk_pipeline pl_intra = ctx->device->pipeline_gated_delta_net_chunk_intra; vk_pipeline pl_inter = ctx->device->pipeline_gated_delta_net_chunk_inter; - vk_pipeline pl_output = ctx->device->pipeline_gated_delta_net_chunk_output; + vk_pipeline pl_output = ctx->device->pipeline_gated_delta_net_chunk_output_cm + ? ctx->device->pipeline_gated_delta_net_chunk_output_cm + : ctx->device->pipeline_gated_delta_net_chunk_output; ggml_pipeline_request_descriptor_sets(ctx, pl_intra, 1); ggml_pipeline_request_descriptor_sets(ctx, pl_inter, 1);