ggml : fix bug in CUDA Hadamard transform implementation

2026-03-27 20:07:50 +01:00 · 2026-03-27 20:07:50 +01:00 · 6011bdd92b
parent 83a0313a14
commit 6011bdd92b
2 changed files with 5 additions and 3 deletions
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@ -5022,8 +5022,10 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
        case GGML_OP_SOLVE_TRI:
        case GGML_OP_SCATTER:
            return true;
-        case GGML_OP_HADAMARD:
-            return (op->ne[0] == 64 || op->ne[0] == 128 || op->ne[0] == 256) && op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32;
+        case GGML_OP_HADAMARD: {
+            int nh = op->op_params[0];
+            return (nh == 64 || nh == 128 || nh == 256) && op->ne[0] % nh == 0 && op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32;
+        }
        default:
            return false;
    }
--- a/ggml/src/ggml-cuda/hadamard.cu
+++ b/ggml/src/ggml-cuda/hadamard.cu
@ -30,7 +30,7 @@ static __global__ void hadamard_f32(const char * src, char * dst, int ne0,
    float scale = ksqrt2;

 #pragma unroll
-    for (int h = 2; h < nh; h <<= 2) {
+    for (int h = 2; h < nh; h <<= 1) {
        __syncthreads();
        int ii = tid/h, jj = tid%h;
        int j = 2*h*ii+jj;