server : enable backend sampling for multiple outputs per sequence
This commit is contained in:
parent
765998f2d7
commit
2235b4be49
|
|
@ -1149,9 +1149,6 @@ private:
|
||||||
|
|
||||||
backend_sampling &= task.params.sampling.backend_sampling;
|
backend_sampling &= task.params.sampling.backend_sampling;
|
||||||
|
|
||||||
// TODO: speculative decoding requires multiple samples per batch - not supported yet
|
|
||||||
backend_sampling &= !(slot.spec && task.params.speculative.n_max > 0);
|
|
||||||
|
|
||||||
// TODO: getting post/pre sampling logits is not yet supported with backend sampling
|
// TODO: getting post/pre sampling logits is not yet supported with backend sampling
|
||||||
backend_sampling &= !need_logits;
|
backend_sampling &= !need_logits;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue