From 608fc9d16d565ed861128684b3b021041a102208 Mon Sep 17 00:00:00 2001 From: Nikhil Jain Date: Thu, 29 Jan 2026 20:15:20 -0800 Subject: [PATCH 1/7] Remove mutex for pipeline caches, since they are now per-thread. --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 29 ---------------------------- 1 file changed, 29 deletions(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 22e2bfeb4c..7f2f9ecc4d 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -861,9 +861,6 @@ static webgpu_command ggml_webgpu_pad(webgpu_context & ctx, ggml_tensor * src, g }; webgpu_pipeline pipeline; - { - // TODO: remove guard once pipeline caches are per-thread - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->pad_pipelines.find(pipeline_key); if (it != ctx->pad_pipelines.end()) { pipeline = it->second; @@ -875,7 +872,6 @@ static webgpu_command ggml_webgpu_pad(webgpu_context & ctx, ggml_tensor * src, g pipeline.context = processed.decisions; ctx->pad_pipelines.emplace(pipeline_key, pipeline); } - } ggml_webgpu_generic_shader_decisions decisions = *static_cast(pipeline.context); @@ -944,9 +940,6 @@ static std::optional ggml_webgpu_set_rows(webgpu_context & ctx, }; webgpu_pipeline pipeline; - // TODO: remove guard once pipeline caches are per-thread - { - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->set_rows_pipelines.find(key); if (it != ctx->set_rows_pipelines.end()) { pipeline = it->second; @@ -958,7 +951,6 @@ static std::optional ggml_webgpu_set_rows(webgpu_context & ctx, pipeline.context = processed.decisions; ctx->set_rows_pipelines.emplace(key, pipeline); } - } ggml_webgpu_generic_shader_decisions decisions = *static_cast(pipeline.context); @@ -1261,9 +1253,6 @@ static webgpu_command ggml_webgpu_flash_attn(webgpu_context & ctx, }; webgpu_pipeline pipeline; - // TODO: remove guard once pipeline caches are per-thread - { - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->flash_attn_pipelines.find(key); if (it != ctx->flash_attn_pipelines.end()) { pipeline = it->second; @@ -1284,7 +1273,6 @@ static webgpu_command ggml_webgpu_flash_attn(webgpu_context & ctx, pipeline.context = processed.decisions; ctx->flash_attn_pipelines.emplace(key, pipeline); } - } ggml_webgpu_flash_attn_shader_decisions decisions = *static_cast(pipeline.context); @@ -1308,9 +1296,6 @@ static webgpu_command ggml_webgpu_unary_op(webgpu_context & ctx, ggml_tensor * s }; webgpu_pipeline pipeline; - { - // TODO: remove guard once pipeline caches are per-thread - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->unary_pipelines.find(pipeline_key); if (it != ctx->unary_pipelines.end()) { pipeline = it->second; @@ -1322,7 +1307,6 @@ static webgpu_command ggml_webgpu_unary_op(webgpu_context & ctx, ggml_tensor * s pipeline.context = processed.decisions; ctx->unary_pipelines.emplace(pipeline_key, pipeline); } - } ggml_webgpu_generic_shader_decisions decisions = *static_cast(pipeline.context); @@ -1743,9 +1727,6 @@ static webgpu_command ggml_webgpu_argmax(webgpu_context & ctx, ggml_tensor * src }; webgpu_pipeline pipeline; - { - // TODO: remove guard once pipeline caches are per-thread - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->argmax_pipelines.find(shader_lib_ctx.vec4); if (it != ctx->argmax_pipelines.end()) { pipeline = it->second; @@ -1756,7 +1737,6 @@ static webgpu_command ggml_webgpu_argmax(webgpu_context & ctx, ggml_tensor * src ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); ctx->argmax_pipelines.emplace(shader_lib_ctx.vec4, pipeline); } - } uint32_t wg_x = ggml_nelements(dst); return ggml_backend_webgpu_build(ctx->global_ctx, ctx->param_buf_pool, pipeline, params, entries, wg_x); } @@ -1772,7 +1752,6 @@ static webgpu_command ggml_webgpu_argsort(webgpu_context & ctx, ggml_tensor * sr .order = order }; - std::lock_guard lock(ctx->global_ctx->mutex); webgpu_pipeline argsort_pipeline; auto it = ctx->argsort_pipelines.find(order); if (it != ctx->argsort_pipelines.end()) { @@ -1963,9 +1942,6 @@ static webgpu_command ggml_webgpu_cumsum(webgpu_context & ctx, ggml_tensor * src .max_wg_size = ctx->global_ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup, }; webgpu_pipeline pipeline; - // TODO: remove guard once pipeline caches are per-thread - { - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->cumsum_pipelines.find(1); if (it != ctx->cumsum_pipelines.end()) { pipeline = it->second; @@ -1976,7 +1952,6 @@ static webgpu_command ggml_webgpu_cumsum(webgpu_context & ctx, ggml_tensor * src ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); ctx->cumsum_pipelines.emplace(1, pipeline); } - } uint32_t wg_x = ggml_nrows(dst); return ggml_backend_webgpu_build(ctx->global_ctx, ctx->param_buf_pool, pipeline, params, entries, wg_x); } @@ -2009,9 +1984,6 @@ static webgpu_command ggml_webgpu_sum_rows(webgpu_context & ctx, ggml_tensor * s }; webgpu_pipeline pipeline; - { - // TODO: remove guard once pipeline caches are per-thread - std::lock_guard lock(ctx->global_ctx->mutex); auto it = ctx->sum_rows_pipelines.find(1); if (it != ctx->sum_rows_pipelines.end()) { pipeline = it->second; @@ -2022,7 +1994,6 @@ static webgpu_command ggml_webgpu_sum_rows(webgpu_context & ctx, ggml_tensor * s ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); ctx->sum_rows_pipelines.emplace(1, pipeline); } - } uint32_t wg_x = total_sum ? 1 : ggml_nrows(dst); return ggml_backend_webgpu_build(ctx->global_ctx, ctx->param_buf_pool, pipeline, params, entries, wg_x); } From edee0d6bc6ff8b0ed5355dbf171648b2122fd549 Mon Sep 17 00:00:00 2001 From: Nikhil Jain Date: Thu, 29 Jan 2026 20:55:38 -0800 Subject: [PATCH 2/7] Add comment --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 7f2f9ecc4d..d1723ff73f 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -146,8 +146,13 @@ struct webgpu_submission_futures { struct webgpu_buf_pool { std::vector free; + // The pool must be synchronized because: + // 1. The memset pool is shared globally by every ggml buffer, + // since allocating a pool per ggml buffer would consume too much memory. + // 2. For the per-thread buffer pools in webgpu_context, + // buffers are allocated and freed in Dawn callbacks, + // which can run on a different thread than the calling thread. std::mutex mutex; - std::condition_variable cv; void init(wgpu::Device device, From a96218009556ad6b14af0eeede8920df46469123 Mon Sep 17 00:00:00 2001 From: Nikhil Jain Date: Thu, 29 Jan 2026 20:56:18 -0800 Subject: [PATCH 3/7] Run clang-format --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 184 +++++++++++++-------------- 1 file changed, 91 insertions(+), 93 deletions(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index d1723ff73f..89c6044dfe 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -152,7 +152,7 @@ struct webgpu_buf_pool { // 2. For the per-thread buffer pools in webgpu_context, // buffers are allocated and freed in Dawn callbacks, // which can run on a different thread than the calling thread. - std::mutex mutex; + std::mutex mutex; std::condition_variable cv; void init(wgpu::Device device, @@ -366,7 +366,6 @@ struct webgpu_context_struct { std::unordered_map pad_pipelines; size_t memset_bytes_per_thread; - }; typedef std::shared_ptr webgpu_context; @@ -388,9 +387,9 @@ struct ggml_backend_webgpu_device_context { // Per-thread data required to actually run WebGPU operations in a backend instance struct ggml_backend_webgpu_context { - webgpu_context webgpu_ctx; - std::once_flag init_once; - std::string name; + webgpu_context webgpu_ctx; + std::once_flag init_once; + std::string name; }; // Per-thread data related to buffers @@ -866,17 +865,16 @@ static webgpu_command ggml_webgpu_pad(webgpu_context & ctx, ggml_tensor * src, g }; webgpu_pipeline pipeline; - auto it = ctx->pad_pipelines.find(pipeline_key); - if (it != ctx->pad_pipelines.end()) { - pipeline = it->second; - } else { - ggml_webgpu_processed_shader processed = - ggml_webgpu_preprocess_pad_shader(ctx->p, wgsl_pad, shader_lib_ctx); - pipeline = - ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); - pipeline.context = processed.decisions; - ctx->pad_pipelines.emplace(pipeline_key, pipeline); - } + auto it = ctx->pad_pipelines.find(pipeline_key); + if (it != ctx->pad_pipelines.end()) { + pipeline = it->second; + } else { + ggml_webgpu_processed_shader processed = ggml_webgpu_preprocess_pad_shader(ctx->p, wgsl_pad, shader_lib_ctx); + pipeline = + ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); + pipeline.context = processed.decisions; + ctx->pad_pipelines.emplace(pipeline_key, pipeline); + } ggml_webgpu_generic_shader_decisions decisions = *static_cast(pipeline.context); @@ -945,17 +943,17 @@ static std::optional ggml_webgpu_set_rows(webgpu_context & ctx, }; webgpu_pipeline pipeline; - auto it = ctx->set_rows_pipelines.find(key); - if (it != ctx->set_rows_pipelines.end()) { - pipeline = it->second; - } else { - ggml_webgpu_processed_shader processed = - ggml_webgpu_preprocess_set_rows_shader(ctx->p, wgsl_set_rows, shader_lib_ctx); - pipeline = - ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); - pipeline.context = processed.decisions; - ctx->set_rows_pipelines.emplace(key, pipeline); - } + auto it = ctx->set_rows_pipelines.find(key); + if (it != ctx->set_rows_pipelines.end()) { + pipeline = it->second; + } else { + ggml_webgpu_processed_shader processed = + ggml_webgpu_preprocess_set_rows_shader(ctx->p, wgsl_set_rows, shader_lib_ctx); + pipeline = + ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); + pipeline.context = processed.decisions; + ctx->set_rows_pipelines.emplace(key, pipeline); + } ggml_webgpu_generic_shader_decisions decisions = *static_cast(pipeline.context); @@ -1258,26 +1256,26 @@ static webgpu_command ggml_webgpu_flash_attn(webgpu_context & ctx, }; webgpu_pipeline pipeline; - auto it = ctx->flash_attn_pipelines.find(key); - if (it != ctx->flash_attn_pipelines.end()) { - pipeline = it->second; - } else { - ggml_webgpu_flash_attn_shader_lib_context shader_lib_ctx = { - .key = key, - .sg_mat_m = ctx->global_ctx->capabilities.sg_mat_m, - .sg_mat_n = ctx->global_ctx->capabilities.sg_mat_n, - .sg_mat_k = ctx->global_ctx->capabilities.sg_mat_k, - .wg_mem_limit_bytes = ctx->global_ctx->capabilities.limits.maxComputeWorkgroupStorageSize, - .max_subgroup_size = ctx->global_ctx->capabilities.max_subgroup_size - }; + auto it = ctx->flash_attn_pipelines.find(key); + if (it != ctx->flash_attn_pipelines.end()) { + pipeline = it->second; + } else { + ggml_webgpu_flash_attn_shader_lib_context shader_lib_ctx = { + .key = key, + .sg_mat_m = ctx->global_ctx->capabilities.sg_mat_m, + .sg_mat_n = ctx->global_ctx->capabilities.sg_mat_n, + .sg_mat_k = ctx->global_ctx->capabilities.sg_mat_k, + .wg_mem_limit_bytes = ctx->global_ctx->capabilities.limits.maxComputeWorkgroupStorageSize, + .max_subgroup_size = ctx->global_ctx->capabilities.max_subgroup_size + }; - ggml_webgpu_processed_shader processed = - ggml_webgpu_preprocess_flash_attn_shader(ctx->p, wgsl_flash_attn, shader_lib_ctx); - pipeline = - ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); - pipeline.context = processed.decisions; - ctx->flash_attn_pipelines.emplace(key, pipeline); - } + ggml_webgpu_processed_shader processed = + ggml_webgpu_preprocess_flash_attn_shader(ctx->p, wgsl_flash_attn, shader_lib_ctx); + pipeline = + ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); + pipeline.context = processed.decisions; + ctx->flash_attn_pipelines.emplace(key, pipeline); + } ggml_webgpu_flash_attn_shader_decisions decisions = *static_cast(pipeline.context); @@ -1301,17 +1299,17 @@ static webgpu_command ggml_webgpu_unary_op(webgpu_context & ctx, ggml_tensor * s }; webgpu_pipeline pipeline; - auto it = ctx->unary_pipelines.find(pipeline_key); - if (it != ctx->unary_pipelines.end()) { - pipeline = it->second; - } else { - ggml_webgpu_processed_shader processed = - ggml_webgpu_preprocess_unary_shader(ctx->p, wgsl_unary, shader_lib_ctx); - pipeline = - ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); - pipeline.context = processed.decisions; - ctx->unary_pipelines.emplace(pipeline_key, pipeline); - } + auto it = ctx->unary_pipelines.find(pipeline_key); + if (it != ctx->unary_pipelines.end()) { + pipeline = it->second; + } else { + ggml_webgpu_processed_shader processed = + ggml_webgpu_preprocess_unary_shader(ctx->p, wgsl_unary, shader_lib_ctx); + pipeline = + ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); + pipeline.context = processed.decisions; + ctx->unary_pipelines.emplace(pipeline_key, pipeline); + } ggml_webgpu_generic_shader_decisions decisions = *static_cast(pipeline.context); @@ -1732,16 +1730,16 @@ static webgpu_command ggml_webgpu_argmax(webgpu_context & ctx, ggml_tensor * src }; webgpu_pipeline pipeline; - auto it = ctx->argmax_pipelines.find(shader_lib_ctx.vec4); - if (it != ctx->argmax_pipelines.end()) { - pipeline = it->second; - } else { - ggml_webgpu_processed_shader processed = - ggml_webgpu_preprocess_generic_shader(ctx->p, wgsl_argmax, shader_lib_ctx, "argmax"); - pipeline = - ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); - ctx->argmax_pipelines.emplace(shader_lib_ctx.vec4, pipeline); - } + auto it = ctx->argmax_pipelines.find(shader_lib_ctx.vec4); + if (it != ctx->argmax_pipelines.end()) { + pipeline = it->second; + } else { + ggml_webgpu_processed_shader processed = + ggml_webgpu_preprocess_generic_shader(ctx->p, wgsl_argmax, shader_lib_ctx, "argmax"); + pipeline = + ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); + ctx->argmax_pipelines.emplace(shader_lib_ctx.vec4, pipeline); + } uint32_t wg_x = ggml_nelements(dst); return ggml_backend_webgpu_build(ctx->global_ctx, ctx->param_buf_pool, pipeline, params, entries, wg_x); } @@ -1757,8 +1755,8 @@ static webgpu_command ggml_webgpu_argsort(webgpu_context & ctx, ggml_tensor * sr .order = order }; - webgpu_pipeline argsort_pipeline; - auto it = ctx->argsort_pipelines.find(order); + webgpu_pipeline argsort_pipeline; + auto it = ctx->argsort_pipelines.find(order); if (it != ctx->argsort_pipelines.end()) { argsort_pipeline = it->second; } else { @@ -1947,16 +1945,16 @@ static webgpu_command ggml_webgpu_cumsum(webgpu_context & ctx, ggml_tensor * src .max_wg_size = ctx->global_ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup, }; webgpu_pipeline pipeline; - auto it = ctx->cumsum_pipelines.find(1); - if (it != ctx->cumsum_pipelines.end()) { - pipeline = it->second; - } else { - ggml_webgpu_processed_shader processed = - ggml_webgpu_preprocess_generic_shader(ctx->p, wgsl_cumsum, shader_lib_ctx, "cumsum"); - pipeline = - ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); - ctx->cumsum_pipelines.emplace(1, pipeline); - } + auto it = ctx->cumsum_pipelines.find(1); + if (it != ctx->cumsum_pipelines.end()) { + pipeline = it->second; + } else { + ggml_webgpu_processed_shader processed = + ggml_webgpu_preprocess_generic_shader(ctx->p, wgsl_cumsum, shader_lib_ctx, "cumsum"); + pipeline = + ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); + ctx->cumsum_pipelines.emplace(1, pipeline); + } uint32_t wg_x = ggml_nrows(dst); return ggml_backend_webgpu_build(ctx->global_ctx, ctx->param_buf_pool, pipeline, params, entries, wg_x); } @@ -1989,16 +1987,16 @@ static webgpu_command ggml_webgpu_sum_rows(webgpu_context & ctx, ggml_tensor * s }; webgpu_pipeline pipeline; - auto it = ctx->sum_rows_pipelines.find(1); - if (it != ctx->sum_rows_pipelines.end()) { - pipeline = it->second; - } else { - ggml_webgpu_processed_shader processed = - ggml_webgpu_preprocess_generic_shader(ctx->p, wgsl_sum_rows, shader_lib_ctx, "sum_rows"); - pipeline = - ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); - ctx->sum_rows_pipelines.emplace(1, pipeline); - } + auto it = ctx->sum_rows_pipelines.find(1); + if (it != ctx->sum_rows_pipelines.end()) { + pipeline = it->second; + } else { + ggml_webgpu_processed_shader processed = + ggml_webgpu_preprocess_generic_shader(ctx->p, wgsl_sum_rows, shader_lib_ctx, "sum_rows"); + pipeline = + ggml_webgpu_create_pipeline(ctx->global_ctx->device, processed.wgsl.c_str(), processed.variant.c_str()); + ctx->sum_rows_pipelines.emplace(1, pipeline); + } uint32_t wg_x = total_sum ? 1 : ggml_nrows(dst); return ggml_backend_webgpu_build(ctx->global_ctx, ctx->param_buf_pool, pipeline, params, entries, wg_x); } @@ -2992,10 +2990,10 @@ static bool create_webgpu_device(ggml_backend_webgpu_reg_context * ctx) { #ifdef GGML_WEBGPU_GPU_PROFILE // Initialize buffer pool for timestamp queries, used for profiling - ctx->webgpu_global_ctx->timestamp_query_buf_pool.init(ctx->webgpu_global_ctx->device, WEBGPU_NUM_TIMESTAMP_QUERY_BUFS, - WEBGPU_TIMESTAMP_QUERY_BUF_SIZE_BYTES, - wgpu::BufferUsage::QueryResolve | wgpu::BufferUsage::CopySrc, - wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst); + ctx->webgpu_global_ctx->timestamp_query_buf_pool.init( + ctx->webgpu_global_ctx->device, WEBGPU_NUM_TIMESTAMP_QUERY_BUFS, WEBGPU_TIMESTAMP_QUERY_BUF_SIZE_BYTES, + wgpu::BufferUsage::QueryResolve | wgpu::BufferUsage::CopySrc, + wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst); #endif GGML_LOG_INFO( From 559206c3a046ebfd31949227aa89d5ad7cb96395 Mon Sep 17 00:00:00 2001 From: Nikhil Jain Date: Thu, 29 Jan 2026 21:03:25 -0800 Subject: [PATCH 4/7] Cleanup --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 89c6044dfe..b378e80f70 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -271,7 +271,7 @@ struct webgpu_command { #endif }; -struct webgpu_capabilities_base { +struct webgpu_capabilities { wgpu::Limits limits; bool supports_subgroup_matrix = false; @@ -291,7 +291,7 @@ struct webgpu_global_context_struct { wgpu::Device device; wgpu::Queue queue; - webgpu_capabilities_base capabilities; + webgpu_capabilities capabilities; // Shared buffer to move data from device to host wgpu::Buffer get_tensor_staging_buf; // Global mutex for pipeline and staging buffer, will be refactored to exclude pipeline caches. @@ -388,7 +388,6 @@ struct ggml_backend_webgpu_device_context { // Per-thread data required to actually run WebGPU operations in a backend instance struct ggml_backend_webgpu_context { webgpu_context webgpu_ctx; - std::once_flag init_once; std::string name; }; From bb36297bcd48bd6a36475127da5d81180690550e Mon Sep 17 00:00:00 2001 From: Nikhil Jain Date: Fri, 30 Jan 2026 08:14:18 -0800 Subject: [PATCH 5/7] Run CI again --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index b378e80f70..afeb991f51 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -146,7 +146,7 @@ struct webgpu_submission_futures { struct webgpu_buf_pool { std::vector free; - // The pool must be synchronized because: + // The pool must be synchronized because // 1. The memset pool is shared globally by every ggml buffer, // since allocating a pool per ggml buffer would consume too much memory. // 2. For the per-thread buffer pools in webgpu_context, From 8360113d395951df6a4b90a34de155ac5c8dacfc Mon Sep 17 00:00:00 2001 From: Nikhil Jain Date: Fri, 30 Jan 2026 12:14:09 -0800 Subject: [PATCH 6/7] Run CI once more --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index afeb991f51..4997f70674 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -151,7 +151,7 @@ struct webgpu_buf_pool { // since allocating a pool per ggml buffer would consume too much memory. // 2. For the per-thread buffer pools in webgpu_context, // buffers are allocated and freed in Dawn callbacks, - // which can run on a different thread than the calling thread. + // which can run on a different thread than the calling thread. std::mutex mutex; std::condition_variable cv; From 3e145062910d65a1eabb80f3eba52a0325bc868a Mon Sep 17 00:00:00 2001 From: Nikhil Jain Date: Fri, 30 Jan 2026 14:46:24 -0800 Subject: [PATCH 7/7] Run clang-format --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 4997f70674..4ef50e365e 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -151,7 +151,7 @@ struct webgpu_buf_pool { // since allocating a pool per ggml buffer would consume too much memory. // 2. For the per-thread buffer pools in webgpu_context, // buffers are allocated and freed in Dawn callbacks, - // which can run on a different thread than the calling thread. + // which can run on a different thread than the calling thread. std::mutex mutex; std::condition_variable cv; @@ -291,11 +291,11 @@ struct webgpu_global_context_struct { wgpu::Device device; wgpu::Queue queue; - webgpu_capabilities capabilities; + webgpu_capabilities capabilities; // Shared buffer to move data from device to host - wgpu::Buffer get_tensor_staging_buf; + wgpu::Buffer get_tensor_staging_buf; // Global mutex for pipeline and staging buffer, will be refactored to exclude pipeline caches. - std::recursive_mutex mutex; + std::recursive_mutex mutex; webgpu_buf_pool memset_buf_pool; std::map memset_pipelines; // variant or type index