From 4aec6a86bdb2fd2ceca6663d5c2c4210d9f8ede0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= Date: Fri, 27 Mar 2026 20:23:38 +0100 Subject: [PATCH] ggml : simplified testing for nh being power of 2 in Hadamard transform implementations --- ggml/src/ggml-cpu/ops.cpp | 14 +------------- ggml/src/ggml-cuda/hadamard.cu | 17 ++--------------- 2 files changed, 3 insertions(+), 28 deletions(-) diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 8f9f082c43..48a3644332 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -11207,18 +11207,6 @@ void ggml_compute_forward_opt_step_sgd(const ggml_compute_params * params, ggml_ // MIT license // SPDX-License-Identifier: MIT -#if defined(_MSC_VER) -#pragma warning(disable: 4244 4267) // possible loss of data -#include -#include -#include -#include -#include -inline int popcount(uint32_t x) { return __popcnt(x); } -#else -inline int popcount(uint32_t x) { return __builtin_popcount(x); } -#endif - template void fast_ht(int n, T * values) { constexpr float ksqrt2 = 0.707106781f; @@ -11250,7 +11238,7 @@ static void ggml_compute_forward_hadamard_f32( const int nth = params->nth; int nh = dst->op_params[0]; - GGML_ASSERT(nh > 1 && popcount(uint32_t(nh)) == 1); + GGML_ASSERT(nh > 1 && ((nh & (nh - 1)) == 0)); // power of 2 GGML_ASSERT(dst->ne[0] % nh == 0); int nc = dst->ne[0]/nh; diff --git a/ggml/src/ggml-cuda/hadamard.cu b/ggml/src/ggml-cuda/hadamard.cu index f03866cb5a..45091d2d20 100644 --- a/ggml/src/ggml-cuda/hadamard.cu +++ b/ggml/src/ggml-cuda/hadamard.cu @@ -58,19 +58,6 @@ static void hadamard_f32_cuda(int nh, const char * x, char * y, int ne0, int ne1 } } -#if defined(_MSC_VER) -#pragma warning(disable: 4244 4267) // possible loss of data -#include -#include -#include -#include -#include -static inline int popcount(uint32_t x) { return __popcnt(x); } -#else -static inline int popcount(uint32_t x) { return __builtin_popcount(x); } -#endif - - void ggml_cuda_op_hadamard(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const ggml_tensor * src = dst->src[0]; GGML_ASSERT(src->type == GGML_TYPE_F32); @@ -78,8 +65,8 @@ void ggml_cuda_op_hadamard(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { GGML_ASSERT(ggml_are_same_shape(src, dst)); int nh = dst->op_params[0]; - GGML_ASSERT(dst->ne[0]%nh == 0); - GGML_ASSERT(nh > 1 && popcount(nh) == 1); + GGML_ASSERT(dst->ne[0] % nh == 0); + GGML_ASSERT(nh > 1 && ((nh & (nh - 1)) == 0)); // power of 2 hadamard_f32_cuda(nh, (const char *)src->data, (char *)dst->data, src->ne[0], src->ne[1], src->ne[2], src->ne[3], src->nb[1], src->nb[2], src->nb[3], dst->nb[1], dst->nb[2], dst->nb[3], ctx.stream());