server : enable backend sampling for multiple outputs per sequence
This commit is contained in:
parent
765998f2d7
commit
2235b4be49
|
|
@ -1149,9 +1149,6 @@ private:
|
|||
|
||||
backend_sampling &= task.params.sampling.backend_sampling;
|
||||
|
||||
// TODO: speculative decoding requires multiple samples per batch - not supported yet
|
||||
backend_sampling &= !(slot.spec && task.params.speculative.n_max > 0);
|
||||
|
||||
// TODO: getting post/pre sampling logits is not yet supported with backend sampling
|
||||
backend_sampling &= !need_logits;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue