diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 5e1b512318..bbfc430b4d 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -8119,7 +8119,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, // For MTP: need room for active cell + checkpoint cells. // With size=4: active(1) + checkpoint(1) + room(2) ensures // can_checkpoint (used < size*0.9 = 3.6) can fire even with 3 cells in use. - const uint32_t rs_per_seq = 1 + (n_mtp > 0 ? 3 : 0); + const uint32_t rs_per_seq = 1 + (n_mtp > 0 ? 7 : 0); const uint32_t rs_size = std::max((uint32_t) 1, cparams.n_seq_max * rs_per_seq); res = new llama_memory_hybrid_iswa(