ggml webgpu: Fix bug in dispatching large matrix-vector multiplication (#19535)

* Fix bug in dispatching large matrix-vector multiplication
This commit is contained in:
Reese Levine 2026-02-18 16:06:29 -07:00 committed by GitHub
parent b55dcdef5d
commit e7f2f95c9a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 2 additions and 1 deletions

View File

@ -1121,7 +1121,8 @@ static webgpu_command ggml_webgpu_mul_mat(webgpu_context & ctx,
uint32_t batches = dst->ne[2] * dst->ne[3];
uint32_t output_groups = CEIL_DIV(dst->ne[0], decisions->outputs_per_wg);
uint32_t total_wg = output_groups * batches;
wg_x = total_wg % ctx->global_ctx->capabilities.limits.maxComputeWorkgroupsPerDimension;
// TODO: split large sizes into multiple batches to avoid way over-provisioning workgroups
wg_x = std::min(total_wg, ctx->global_ctx->capabilities.limits.maxComputeWorkgroupsPerDimension);
wg_y = CEIL_DIV(total_wg, ctx->global_ctx->capabilities.limits.maxComputeWorkgroupsPerDimension);
} else if (use_fast) {
auto decisions = static_cast<ggml_webgpu_mul_mat_shader_decisions *>(pipeline.context.get());