server: replace can_speculate() with slot.can_speculate()
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
parent
9fee55e2d5
commit
a7bb6b5624
|
|
@ -1154,7 +1154,7 @@ private:
|
|||
|
||||
// initialize draft batch
|
||||
// TODO: rework speculative decoding [TAG_SERVER_SPEC_REWORK]
|
||||
if (can_speculate()) {
|
||||
if (slot.can_speculate()) {
|
||||
llama_batch_free(slot.batch_spec);
|
||||
|
||||
slot.batch_spec = llama_batch_init(task.params.speculative.n_max + 1, 0, 1);
|
||||
|
|
|
|||
Loading…
Reference in New Issue