corrected code-branch when flash-attn disabled

enabling usage of --flash-attn option
2025-12-04 23:45:59 +01:00 · 2025-12-04 23:45:59 +01:00 · 076138a428
parent 5381b9cf63
commit 076138a428
2 changed files with 5 additions and 7 deletions
--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@ -2590,10 +2590,7 @@ private:
        } else {
            ggml_tensor * v = ggml_permute(ctx0, v_cur, 1, 2, 0, 3);
            v = ggml_cont(ctx0, v);
-
-            const auto n_tokens = q->ne[1];
-            const auto n_head   = q->ne[2];
-
+            
            ggml_tensor * kq = ggml_mul_mat(ctx0, k, q);
            // F32 may not needed for vision encoders?
            // ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
@ -2601,8 +2598,9 @@ private:
            kq = ggml_soft_max_ext(ctx0, kq, kq_mask, kq_scale, 0.0f);

            ggml_tensor * kqv = ggml_mul_mat(ctx0, v, kq);
-            cur = ggml_permute(ctx0, kqv, 0, 2, 1, 3);
-            cur = ggml_cont_2d(ctx0, cur, cur->ne[0]*n_head, n_tokens);
+            cur = ggml_cont(ctx0, ggml_permute(ctx0, kqv, 0, 2, 1, 3));
+            cur = ggml_reshape_2d(ctx0, cur, cur->ne[0]*cur->ne[1], cur->ne[2]*cur->ne[3]);
+
        }

        cb(cur, "kqv_out", il);
--- a/tools/mtmd/mtmd.cpp
+++ b/tools/mtmd/mtmd.cpp
@ -175,7 +175,7 @@ struct mtmd_context {

        clip_context_params ctx_clip_params {
            /* use_gpu           */ ctx_params.use_gpu,
-            /* flash_attn_type   */ CLIP_FLASH_ATTN_TYPE_AUTO,
+            /* flash_attn_type   */ mtmd_get_clip_flash_attn_type(ctx_params.flash_attn_type),
            /* image_min_tokens  */ ctx_params.image_min_tokens,
            /* image_max_tokens  */ ctx_params.image_max_tokens,
            /* warmup            */ ctx_params.warmup,