From 73c9eb8ceda397b651dbb6661b2935f0283a2b1d Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 13 Mar 2026 11:43:20 +0200 Subject: [PATCH] metal : fix l2 norm scale (#20493) --- ggml/src/ggml-metal/ggml-metal-device.m | 2 +- ggml/src/ggml-metal/ggml-metal.metal | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index 05b826a61b..b7d587f3bd 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -1156,7 +1156,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te case GGML_OP_RWKV_WKV7: return true; case GGML_OP_GATED_DELTA_NET: - return op->src[2]->ne[0] % 32 == 0; + return has_simdgroup_reduction && op->src[2]->ne[0] % 32 == 0; case GGML_OP_SOLVE_TRI: case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT_ID: diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 24a3092af2..107e7cf2ff 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -3006,7 +3006,7 @@ kernel void kernel_l2_norm_impl( sumf = shmem_f32[tiisg]; sumf = simd_sum(sumf); - const float scale = 1.0f/sqrt(max(sumf, args.eps)); + const float scale = 1.0f/max(sqrt(sumf), args.eps); for (int i00 = tpitg.x; i00 < args.ne00; i00 += ntg.x) { y[i00] = x[i00] * scale;