From 3c8521c4f59a07302076dc5d41424bb3a25777d8 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Mon, 16 Mar 2026 22:07:13 +0800 Subject: [PATCH] llama-graph: replace cont with reshape for alpha in qwen35 (#20640) --- src/models/qwen35.cpp | 2 +- src/models/qwen35moe.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/models/qwen35.cpp b/src/models/qwen35.cpp index 3108bf331a..d07579ee87 100644 --- a/src/models/qwen35.cpp +++ b/src/models/qwen35.cpp @@ -224,7 +224,7 @@ ggml_tensor * llm_build_qwen35::build_layer_attn_linear( beta = ggml_sigmoid(ctx0, beta); ggml_tensor * alpha = build_lora_mm(model.layers[il].ssm_alpha, cur, model.layers[il].ssm_alpha_s); - alpha = ggml_cont_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs); + alpha = ggml_reshape_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs); cb(alpha, "alpha", il); ggml_tensor * alpha_biased = ggml_add(ctx0, alpha, model.layers[il].ssm_dt); diff --git a/src/models/qwen35moe.cpp b/src/models/qwen35moe.cpp index 165e2412e5..b38660c0bc 100644 --- a/src/models/qwen35moe.cpp +++ b/src/models/qwen35moe.cpp @@ -224,7 +224,7 @@ ggml_tensor * llm_build_qwen35moe ::build_layer_attn_linear( beta = ggml_sigmoid(ctx0, beta); ggml_tensor * alpha = build_lora_mm(model.layers[il].ssm_alpha, cur, model.layers[il].ssm_alpha_s); - alpha = ggml_cont_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs); + alpha = ggml_reshape_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs); cb(alpha, "alpha", il); ggml_tensor * alpha_biased = ggml_add(ctx0, alpha, model.layers[il].ssm_dt);