From 2235b4be499a2a3c03a12a7ca0c07f2ece371cf2 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 25 Feb 2026 15:39:35 +0100 Subject: [PATCH] server : enable backend sampling for multiple outputs per sequence --- tools/server/server-context.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index eba463e4da..8a20225240 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1149,9 +1149,6 @@ private: backend_sampling &= task.params.sampling.backend_sampling; - // TODO: speculative decoding requires multiple samples per batch - not supported yet - backend_sampling &= !(slot.spec && task.params.speculative.n_max > 0); - // TODO: getting post/pre sampling logits is not yet supported with backend sampling backend_sampling &= !need_logits;