added new opt to tests.sh to disable flash-attn
This commit is contained in:
Saba Fallah 2025-12-11 10:11:27 +01:00
parent 33fabf0bd8
commit d70f171fac
2 changed files with 15 additions and 9 deletions

View File

@ -2562,8 +2562,7 @@ private:
ggml_tensor * kqv = ggml_mul_mat(ctx0, v, kq);
cur = ggml_permute(ctx0, kqv, 0, 2, 1, 3);
cur = ggml_reshape_2d(ctx0, ggml_cont(ctx0, cur), cur->ne[0] * cur->ne[1], cur->ne[2] * cur->ne[3]);
cur = ggml_cont_2d(ctx0, cur, cur->ne[0] * cur->ne[1], cur->ne[2] * cur->ne[3]);
}
cb(cur, "kqv_out", il);
@ -2782,7 +2781,6 @@ private:
qr = ggml_permute(ctx0, Q, 0, 2, 1, 3);
qr = ggml_reshape_4d(ctx0, ggml_cont(ctx0, qr), d_heads, W, H, B * n_heads);
const int WH_pad = GGML_PAD(W*H, GGML_KQ_MASK_PAD) - W*H;
rw = ggml_mul_mat (ctx0, rw, ggml_cont(ctx0, ggml_permute(ctx0, qr, 0, 2, 1, 3))); // [B*n_heads, W, H, W]
rw = ggml_cont (ctx0, ggml_permute(ctx0, rw, 0, 2, 1, 3)); // [B*n_heads, H, W, W]
@ -2792,7 +2790,6 @@ private:
rh = ggml_reshape_4d(ctx0, rh, 1, H, W*H, n_heads*B);
mask = ggml_add (ctx0, rw, rh); // [B*n_heads, H*W, H, W]
mask = ggml_reshape_4d(ctx0, mask, W*H, W*H, n_heads, B);
mask = ggml_pad (ctx0, mask, 0, WH_pad, 0, 0);
mask = ggml_cast (ctx0, mask, GGML_TYPE_F16);
float scale = 1.0f / sqrtf((float)d_heads);
@ -5213,8 +5210,8 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
} break;
case PROJECTOR_TYPE_DEEPSEEKOCR:
{
const int native_resolutions[] = {
/* 512 tiny ,640 small ,*/ 1024 /* base */, 1280 /* large */
const std::vector native_resolutions = {
/*512 tiny , 640 small, */ 1024 /* base */, 1280 /* large */
};
// original image size
const int orig_w = original_size.width;
@ -5226,10 +5223,10 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
color[i] = (int)(255 * params.image_mean[i]);
}
int mode_i = 0;
size_t mode_i = 0;
int min_diff = orig_area;
for (int i = 0; i < 2; i++) {
for (size_t i = 0; i < native_resolutions.size(); i++) {
int r = native_resolutions[i];
if (std::abs(orig_area - r * r) < min_diff) {
mode_i = i;

View File

@ -28,6 +28,14 @@ if [ "${1:-}" = "huge" ]; then
echo "Include BIG and HUGE models..."
fi
# Check if the second argument is "flash", then enable flash attention
# This is useful to test if flash attention off works correctly
FLASH_ATTN="on"
if [ "${2:-}" = "flash_off" ] || [ "${1:-}" = "flash_off" ]; then
FLASH_ATTN="off"
echo "Flash attention disabled..."
fi
###############
arr_prefix=()
@ -142,6 +150,7 @@ for i in "${!arr_hf[@]}"; do
-hf $(printf %q "$hf") \
--image $(printf %q "$SCRIPT_DIR/$inp_file") \
--temp 0 -n 128 \
--flash-attn $(printf %q "$FLASH_ATTN") \
${extra_args}"
# if extra_args does not contain -p, we add a default prompt