From 5d7df5df44fd34fbc4c2d505195f1ea7f20c4c56 Mon Sep 17 00:00:00 2001 From: iacopPBK Date: Mon, 30 Mar 2026 19:32:12 +0200 Subject: [PATCH] Fixed typo in q4_1 kernel --- ggml/src/ggml-cuda/mmq.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index a2098248ee..9618579424 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -492,7 +492,7 @@ static __device__ __forceinline__ void vec_dot_q4_1_q8_1_dp4a( const int i = i0 + threadIdx.x; const int kyqs = QI8_1 * ((k01/2) / (QI8_1/2)) + (k01/2) % (QI8_1/2); - int u[2*VDR_Q4_0_Q8_1_MMQ]; + int u[2*VDR_Q4_1_Q8_1_MMQ]; constexpr int max_cpy = ggml_cuda_get_max_cpy_bytes(); constexpr int mcpy_int = max_cpy / sizeof(int);