Add LLAMA_STATE_SEQ_FLAGS_APPEND for incremental KV state restore

Add a new flag LLAMA_STATE_SEQ_FLAGS_APPEND (value 2) that skips the
seq_rm() call in state_read_meta, allowing incremental chunk-by-chunk
restore to the same sequence via repeated state_seq_set_data_ext calls.

This enables external KV cache systems to restore opaque state blobs
one chunk at a time without each chunk clearing the previous one.

- Add #define LLAMA_STATE_SEQ_FLAGS_APPEND 2 in llama.h
- Thread flags parameter through state_read() to state_read_meta()
- Gate seq_rm() on !(flags & LLAMA_STATE_SEQ_FLAGS_APPEND)
- Default behavior (flags=0) is unchanged
This commit is contained in:
samuel 2026-03-31 10:46:18 +00:00
parent 26dac845cc
commit ce447e2745
3 changed files with 9 additions and 4 deletions

View File

@ -872,6 +872,9 @@ extern "C" {
// work only with partial states, such as SWA KV cache or recurrent cache (e.g. Mamba)
#define LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY 1
// restore without clearing existing sequence data (append to existing KV cache entries)
#define LLAMA_STATE_SEQ_FLAGS_APPEND 2
typedef uint32_t llama_state_seq_flags;
LLAMA_API size_t llama_state_seq_get_size_ext(

View File

@ -1722,7 +1722,7 @@ void llama_kv_cache::state_read(llama_io_read_i & io, llama_seq_id seq_id, llama
slot_info sinfo;
bool res = true;
res = res && state_read_meta(io, strm, cell_count, sinfo, seq_id);
res = res && state_read_meta(io, strm, cell_count, sinfo, seq_id, flags);
res = res && state_read_data(io, strm, cell_count, sinfo);
if (!res) {
@ -1868,13 +1868,15 @@ void llama_kv_cache::state_write_data(llama_io_write_i & io, const cell_ranges_t
}
}
bool llama_kv_cache::state_read_meta(llama_io_read_i & io, uint32_t strm, uint32_t cell_count, slot_info & sinfo, llama_seq_id dest_seq_id) {
bool llama_kv_cache::state_read_meta(llama_io_read_i & io, uint32_t strm, uint32_t cell_count, slot_info & sinfo, llama_seq_id dest_seq_id, llama_state_seq_flags flags) {
auto & cells = v_cells[strm];
auto & head = v_heads[strm];
if (dest_seq_id != -1) {
// single sequence
seq_rm(dest_seq_id, -1, -1);
if (!(flags & LLAMA_STATE_SEQ_FLAGS_APPEND)) {
seq_rm(dest_seq_id, -1, -1);
}
llama_batch_allocr balloc(hparams.n_pos_per_embd());

View File

@ -280,7 +280,7 @@ private:
void state_write_meta(llama_io_write_i & io, const cell_ranges_t & cr, llama_seq_id seq_id = -1) const;
void state_write_data(llama_io_write_i & io, const cell_ranges_t & cr) const;
bool state_read_meta(llama_io_read_i & io, uint32_t strm, uint32_t cell_count, slot_info & sinfo, llama_seq_id dest_seq_id = -1);
bool state_read_meta(llama_io_read_i & io, uint32_t strm, uint32_t cell_count, slot_info & sinfo, llama_seq_id dest_seq_id = -1, llama_state_seq_flags flags = 0);
bool state_read_data(llama_io_read_i & io, uint32_t strm, uint32_t cell_count, const slot_info & sinfo);
};