From 5108add5cb3f382a9980e1ca21b40c4ebe58f1e0 Mon Sep 17 00:00:00 2001 From: Sascha Rogmann <59577610+srogmann@users.noreply.github.com> Date: Fri, 2 Jan 2026 00:17:53 +0100 Subject: [PATCH] can_speculate() includes self-speculation Co-authored-by: Georgi Gerganov --- tools/server/server-context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index caf115e2ee..d25c8a0c0e 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1153,7 +1153,7 @@ private: // initialize draft batch // TODO: rework speculative decoding [TAG_SERVER_SPEC_REWORK] - if (slot.ctx_dft || task.params.speculative.use_self) { + if (can_speculate()) { llama_batch_free(slot.batch_spec); slot.batch_spec = llama_batch_init(task.params.speculative.n_max + 1, 0, 1);