diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 3e7b0f3674..90cd885a60 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -1598,6 +1598,7 @@ ggml_tensor * llm_graph_context::build_attn( // optionally store to KV cache if (k_cur) { const auto & k_idxs = is_swa ? inp->get_k_idxs_swa() : inp->get_k_idxs(); + ggml_build_forward_expand(gf, mctx_cur->cpy_k(ctx0, k_cur, k_idxs, il)); }