server : n_tokens_cur and create_checkpoint in draft

This commit is contained in:
Sascha Rogmann 2026-03-10 22:32:24 +01:00
parent fe4f859a67
commit 91932ae05b
2 changed files with 2 additions and 2 deletions

View File

@ -99,7 +99,7 @@ struct common_speculative_session {
~common_speculative_session();
// dont copy
// don't copy
common_speculative_session(const common_speculative_session &) = delete;
common_speculative_session & operator=(const common_speculative_session &) = delete;

View File

@ -659,7 +659,7 @@ private:
size_t create_checkpoint() override {
const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx_impl.ctx), slot.id);
const auto pos_max = llama_memory_seq_pos_max(llama_get_memory(ctx_impl.ctx), slot.id);
const auto n_tokens_cur = batch.n_tokens;
const auto n_tokens_cur = 0; // TODO was ctx_impl.batch.n_tokens; The draft model doesn't change the prompt?
const auto & cur_with_size = ctx_impl.get_checkpoint(slot, n_tokens_cur, pos_min, pos_max);
auto & cur = cur_with_size.checkpoint;