diff --git a/src/models/qwen35.cpp b/src/models/qwen35.cpp index 3108bf331a..d07579ee87 100644 --- a/src/models/qwen35.cpp +++ b/src/models/qwen35.cpp @@ -224,7 +224,7 @@ ggml_tensor * llm_build_qwen35::build_layer_attn_linear( beta = ggml_sigmoid(ctx0, beta); ggml_tensor * alpha = build_lora_mm(model.layers[il].ssm_alpha, cur, model.layers[il].ssm_alpha_s); - alpha = ggml_cont_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs); + alpha = ggml_reshape_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs); cb(alpha, "alpha", il); ggml_tensor * alpha_biased = ggml_add(ctx0, alpha, model.layers[il].ssm_dt); diff --git a/src/models/qwen35moe.cpp b/src/models/qwen35moe.cpp index 165e2412e5..b38660c0bc 100644 --- a/src/models/qwen35moe.cpp +++ b/src/models/qwen35moe.cpp @@ -224,7 +224,7 @@ ggml_tensor * llm_build_qwen35moe ::build_layer_attn_linear( beta = ggml_sigmoid(ctx0, beta); ggml_tensor * alpha = build_lora_mm(model.layers[il].ssm_alpha, cur, model.layers[il].ssm_alpha_s); - alpha = ggml_cont_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs); + alpha = ggml_reshape_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs); cb(alpha, "alpha", il); ggml_tensor * alpha_biased = ggml_add(ctx0, alpha, model.layers[il].ssm_dt);