diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 67c3988bd0..73af812437 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -995,9 +995,6 @@ private: // don't update the cache if the slot's context is empty update_cache = update_cache && tokens.size() > 0; - // TODO: mtmd does not support prompt cache - update_cache = update_cache && (ret->mctx == nullptr); - if (update_cache) { SRV_WRN("%s", "updating prompt cache\n"); diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 739e30a704..d3aba18489 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -1900,10 +1900,9 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t return nullptr; } - // TODO: for some reason we can't copy server_tokens, so we have to do this workaround auto & cur = states.emplace_back(); cur = { - /*.tokens =*/ server_tokens(prompt.tokens.get_text_tokens(), false), + /*.tokens =*/ prompt.tokens.clone(), /*.data =*/ std::move(state_data), /*.checkpoints =*/ prompt.checkpoints, };