fix: Fix shift logic to defer to unified cache
Branch: HybridRecurrentCache Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
parent
6c6ec0003a
commit
cf03d4ae5c
|
|
@ -150,8 +150,8 @@ void llama_kv_cache_hybrid_recurrent::defrag_sched(float thold) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
|
bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
|
||||||
// TODO: Should this return true if the attention cache can shift?
|
// Shifting is trivially supported for recurrent
|
||||||
return false;
|
return kv_attn->get_can_shift();
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
|
void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue