Corrected: changed s13 = src1->nb[3] instead of nb[2] (#18724)

2026-01-10 01:16:07 -08:00 · 2026-01-10 01:16:07 -08:00 · 600a366478
parent ea23c15990
commit 600a366478
1 changed files with 1 additions and 1 deletions
--- a/ggml/src/ggml-cuda/mmq.cu
+++ b/ggml/src/ggml-cuda/mmq.cu
@ -190,7 +190,7 @@ void ggml_cuda_mul_mat_q(
    {
        const int64_t s11 = src1->nb[1] / ts_src1;
        const int64_t s12 = src1->nb[2] / ts_src1;
-        const int64_t s13 = src1->nb[2] / ts_src1;
+        const int64_t s13 = src1->nb[3] / ts_src1;

        if (use_native_mxfp4) {
            quantize_mmq_mxfp4_cuda(src1_d, ids_src1.get(), src1_q8_1.get(), src0->type, ne10, s11, s12, s13,