server : enable multi-modal prompt caching (#19877)

2026-02-25 15:15:42 +02:00 · 2026-02-25 15:15:42 +02:00 · f20469d919
parent d7d826b3c1
commit f20469d919
2 changed files with 1 additions and 5 deletions
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@ -995,9 +995,6 @@ private:
            // don't update the cache if the slot's context is empty
            update_cache = update_cache && tokens.size() > 0;

-            // TODO: mtmd does not support prompt cache
-            update_cache = update_cache && (ret->mctx == nullptr);
-
            if (update_cache) {
                SRV_WRN("%s", "updating prompt cache\n");

--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@ -1900,10 +1900,9 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t
        return nullptr;
    }

-    // TODO: for some reason we can't copy server_tokens, so we have to do this workaround
    auto & cur = states.emplace_back();
    cur = {
-        /*.tokens      =*/ server_tokens(prompt.tokens.get_text_tokens(), false),
+        /*.tokens      =*/ prompt.tokens.clone(),
        /*.data        =*/ std::move(state_data),
        /*.checkpoints =*/ prompt.checkpoints,
    };