server : n_tokens_cur and create_checkpoint in draft

2026-03-10 22:32:24 +01:00 · 2026-03-10 22:32:24 +01:00 · 91932ae05b
parent fe4f859a67
commit 91932ae05b
2 changed files with 2 additions and 2 deletions
--- a/common/speculative.h
+++ b/common/speculative.h
@ -99,7 +99,7 @@ struct common_speculative_session {

    ~common_speculative_session();

-    // dont copy
+    // don't copy
    common_speculative_session(const common_speculative_session &) = delete;
    common_speculative_session & operator=(const common_speculative_session &) = delete;

--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@ -659,7 +659,7 @@ private:
        size_t create_checkpoint() override {
            const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx_impl.ctx), slot.id);
            const auto pos_max = llama_memory_seq_pos_max(llama_get_memory(ctx_impl.ctx), slot.id);
-            const auto n_tokens_cur = batch.n_tokens;
+            const auto n_tokens_cur = 0; // TODO was ctx_impl.batch.n_tokens; The draft model doesn't change the prompt?
            const auto & cur_with_size = ctx_impl.get_checkpoint(slot, n_tokens_cur, pos_min, pos_max);
            auto & cur = cur_with_size.checkpoint;