server : enable multi-modal prompt caching (#19877)
This commit is contained in:
parent
d7d826b3c1
commit
f20469d919
|
|
@ -995,9 +995,6 @@ private:
|
|||
// don't update the cache if the slot's context is empty
|
||||
update_cache = update_cache && tokens.size() > 0;
|
||||
|
||||
// TODO: mtmd does not support prompt cache
|
||||
update_cache = update_cache && (ret->mctx == nullptr);
|
||||
|
||||
if (update_cache) {
|
||||
SRV_WRN("%s", "updating prompt cache\n");
|
||||
|
||||
|
|
|
|||
|
|
@ -1900,10 +1900,9 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// TODO: for some reason we can't copy server_tokens, so we have to do this workaround
|
||||
auto & cur = states.emplace_back();
|
||||
cur = {
|
||||
/*.tokens =*/ server_tokens(prompt.tokens.get_text_tokens(), false),
|
||||
/*.tokens =*/ prompt.tokens.clone(),
|
||||
/*.data =*/ std::move(state_data),
|
||||
/*.checkpoints =*/ prompt.checkpoints,
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue