diff --git a/common/speculative.h b/common/speculative.h index e9595b4bb9..7fb9c46a04 100644 --- a/common/speculative.h +++ b/common/speculative.h @@ -99,7 +99,7 @@ struct common_speculative_session { ~common_speculative_session(); - // dont copy + // don't copy common_speculative_session(const common_speculative_session &) = delete; common_speculative_session & operator=(const common_speculative_session &) = delete; diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index bbbd0e2154..db9ac5c9b6 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -659,7 +659,7 @@ private: size_t create_checkpoint() override { const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx_impl.ctx), slot.id); const auto pos_max = llama_memory_seq_pos_max(llama_get_memory(ctx_impl.ctx), slot.id); - const auto n_tokens_cur = batch.n_tokens; + const auto n_tokens_cur = 0; // TODO was ctx_impl.batch.n_tokens; The draft model doesn't change the prompt? const auto & cur_with_size = ctx_impl.get_checkpoint(slot, n_tokens_cur, pos_min, pos_max); auto & cur = cur_with_size.checkpoint;