diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp index a1b45e4a3c..7dc119a5ad 100644 --- a/src/llama-memory-hybrid.cpp +++ b/src/llama-memory-hybrid.cpp @@ -130,12 +130,14 @@ void llama_memory_hybrid::clear(bool data) { } bool llama_memory_hybrid::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1) { - // Try removing from the recurrent cache first since it may fail. If it does - // fail, the cache will not have been mutated. - if (!mem_recr->seq_rm(seq_id, p0, p1)) { - return false; - } - return mem_attn->seq_rm(seq_id, p0, p1); + bool res = true; + + // Allow partial success when only mem_attn is modified + // This permits making cheap checkpoints if only mem_recr is saved and restored + // In that case it would require mem_attn to be manually trimmed + res &= mem_recr->seq_rm(seq_id, p0, p1); + res &= mem_attn->seq_rm(seq_id, p0, p1); + return res; } void llama_memory_hybrid::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_id_dst, llama_pos p0, llama_pos p1) {