From 608fc9d16d565ed861128684b3b021041a102208 Mon Sep 17 00:00:00 2001 From: Nikhil Jain Date: Thu, 29 Jan 2026 20:15:20 -0800 Subject: [PATCH] Remove mutex for pipeline caches, since they are now per-thread. --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 29 ---------------------------- 1 file changed, 29 deletions(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 22e2bfeb4c..7f2f9ecc4d 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -861,9 +861,6 @@ static webgpu_command ggml_webgpu_pad(webgpu_context & ctx, ggml_tensor * src, g }; webgpu_pipeline pipeline; - { - // TODO: remove guard once pipeline caches are per-thread - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->pad_pipelines.find(pipeline_key); if (it != ctx->pad_pipelines.end()) { pipeline = it->second; @@ -875,7 +872,6 @@ static webgpu_command ggml_webgpu_pad(webgpu_context & ctx, ggml_tensor * src, g pipeline.context = processed.decisions; ctx->pad_pipelines.emplace(pipeline_key, pipeline); } - } ggml_webgpu_generic_shader_decisions decisions = *static_cast(pipeline.context); @@ -944,9 +940,6 @@ static std::optional ggml_webgpu_set_rows(webgpu_context & ctx, }; webgpu_pipeline pipeline; - // TODO: remove guard once pipeline caches are per-thread - { - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->set_rows_pipelines.find(key); if (it != ctx->set_rows_pipelines.end()) { pipeline = it->second; @@ -958,7 +951,6 @@ static std::optional ggml_webgpu_set_rows(webgpu_context & ctx, pipeline.context = processed.decisions; ctx->set_rows_pipelines.emplace(key, pipeline); } - } ggml_webgpu_generic_shader_decisions decisions = *static_cast(pipeline.context); @@ -1261,9 +1253,6 @@ static webgpu_command ggml_webgpu_flash_attn(webgpu_context & ctx, }; webgpu_pipeline pipeline; - // TODO: remove guard once pipeline caches are per-thread - { - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->flash_attn_pipelines.find(key); if (it != ctx->flash_attn_pipelines.end()) { pipeline = it->second; @@ -1284,7 +1273,6 @@ static webgpu_command ggml_webgpu_flash_attn(webgpu_context & ctx, pipeline.context = processed.decisions; ctx->flash_attn_pipelines.emplace(key, pipeline); } - } ggml_webgpu_flash_attn_shader_decisions decisions = *static_cast(pipeline.context); @@ -1308,9 +1296,6 @@ static webgpu_command ggml_webgpu_unary_op(webgpu_context & ctx, ggml_tensor * s }; webgpu_pipeline pipeline; - { - // TODO: remove guard once pipeline caches are per-thread - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->unary_pipelines.find(pipeline_key); if (it != ctx->unary_pipelines.end()) { pipeline = it->second; @@ -1322,7 +1307,6 @@ static webgpu_command ggml_webgpu_unary_op(webgpu_context & ctx, ggml_tensor * s pipeline.context = processed.decisions; ctx->unary_pipelines.emplace(pipeline_key, pipeline); } - } ggml_webgpu_generic_shader_decisions decisions = *static_cast(pipeline.context); @@ -1743,9 +1727,6 @@ static webgpu_command ggml_webgpu_argmax(webgpu_context & ctx, ggml_tensor * src }; webgpu_pipeline pipeline; - { - // TODO: remove guard once pipeline caches are per-thread - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->argmax_pipelines.find(shader_lib_ctx.vec4); if (it != ctx->argmax_pipelines.end()) { pipeline = it->second; @@ -1756,7 +1737,6 @@ static webgpu_command ggml_webgpu_argmax(webgpu_context & ctx, ggml_tensor * src ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); ctx->argmax_pipelines.emplace(shader_lib_ctx.vec4, pipeline); } - } uint32_t wg_x = ggml_nelements(dst); return ggml_backend_webgpu_build(ctx->global_ctx, ctx->param_buf_pool, pipeline, params, entries, wg_x); } @@ -1772,7 +1752,6 @@ static webgpu_command ggml_webgpu_argsort(webgpu_context & ctx, ggml_tensor * sr .order = order }; - std::lock_guard lock(ctx->global_ctx->mutex); webgpu_pipeline argsort_pipeline; auto it = ctx->argsort_pipelines.find(order); if (it != ctx->argsort_pipelines.end()) { @@ -1963,9 +1942,6 @@ static webgpu_command ggml_webgpu_cumsum(webgpu_context & ctx, ggml_tensor * src .max_wg_size = ctx->global_ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup, }; webgpu_pipeline pipeline; - // TODO: remove guard once pipeline caches are per-thread - { - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->cumsum_pipelines.find(1); if (it != ctx->cumsum_pipelines.end()) { pipeline = it->second; @@ -1976,7 +1952,6 @@ static webgpu_command ggml_webgpu_cumsum(webgpu_context & ctx, ggml_tensor * src ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); ctx->cumsum_pipelines.emplace(1, pipeline); } - } uint32_t wg_x = ggml_nrows(dst); return ggml_backend_webgpu_build(ctx->global_ctx, ctx->param_buf_pool, pipeline, params, entries, wg_x); } @@ -2009,9 +1984,6 @@ static webgpu_command ggml_webgpu_sum_rows(webgpu_context & ctx, ggml_tensor * s }; webgpu_pipeline pipeline; - { - // TODO: remove guard once pipeline caches are per-thread - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->sum_rows_pipelines.find(1); if (it != ctx->sum_rows_pipelines.end()) { pipeline = it->second; @@ -2022,7 +1994,6 @@ static webgpu_command ggml_webgpu_sum_rows(webgpu_context & ctx, ggml_tensor * s ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); ctx->sum_rows_pipelines.emplace(1, pipeline); } - } uint32_t wg_x = total_sum ? 1 : ggml_nrows(dst); return ggml_backend_webgpu_build(ctx->global_ctx, ctx->param_buf_pool, pipeline, params, entries, wg_x); }