server : fix "can batch with" bug (#17263)
This commit is contained in:
parent
45c6ef7307
commit
d396b43748
|
|
@ -3591,13 +3591,13 @@ struct server_context {
|
|||
// next, batch any pending prompts without exceeding n_batch
|
||||
if (params_base.cont_batching || batch.n_tokens == 0) {
|
||||
for (auto & slot : slots) {
|
||||
if (!slot.is_processing()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// check if we can batch this slot with the previous one
|
||||
if (slot.is_processing()) {
|
||||
if (!slot_batched) {
|
||||
slot_batched = &slot;
|
||||
} else if (!slot_batched->can_batch_with(slot)) {
|
||||
continue;
|
||||
}
|
||||
if (slot_batched && !slot_batched->can_batch_with(slot)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// this slot still has a prompt to be processed
|
||||
|
|
@ -4028,6 +4028,10 @@ struct server_context {
|
|||
}
|
||||
}
|
||||
|
||||
if (!slot_batched) {
|
||||
slot_batched = &slot;
|
||||
}
|
||||
|
||||
if (batch.n_tokens >= n_batch) {
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue