server: return_progress to also report 0% processing state (#18305)
This commit is contained in:
parent
5b6c9bc0f3
commit
5ee4e43f26
|
|
@ -2313,6 +2313,12 @@ private:
|
||||||
slot.n_prompt_tokens_processed = 0;
|
slot.n_prompt_tokens_processed = 0;
|
||||||
|
|
||||||
slot.prompt.tokens.keep_first(n_past);
|
slot.prompt.tokens.keep_first(n_past);
|
||||||
|
|
||||||
|
// send initial 0% progress update if needed
|
||||||
|
// this is to signal the client that the request has started processing
|
||||||
|
if (slot.task->params.stream && slot.task->params.return_progress) {
|
||||||
|
send_partial_response(slot, {}, true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!slot.can_split()) {
|
if (!slot.can_split()) {
|
||||||
|
|
|
||||||
|
|
@ -434,8 +434,8 @@ def test_context_size_exceeded_stream():
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"n_batch,batch_count,reuse_cache",
|
"n_batch,batch_count,reuse_cache",
|
||||||
[
|
[
|
||||||
(64, 3, False),
|
(64, 4, False),
|
||||||
(64, 1, True),
|
(64, 2, True),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
def test_return_progress(n_batch, batch_count, reuse_cache):
|
def test_return_progress(n_batch, batch_count, reuse_cache):
|
||||||
|
|
@ -462,10 +462,18 @@ def test_return_progress(n_batch, batch_count, reuse_cache):
|
||||||
res = make_cmpl_request()
|
res = make_cmpl_request()
|
||||||
last_progress = None
|
last_progress = None
|
||||||
total_batch_count = 0
|
total_batch_count = 0
|
||||||
|
|
||||||
for data in res:
|
for data in res:
|
||||||
cur_progress = data.get("prompt_progress", None)
|
cur_progress = data.get("prompt_progress", None)
|
||||||
if cur_progress is None:
|
if cur_progress is None:
|
||||||
continue
|
continue
|
||||||
|
if total_batch_count == 0:
|
||||||
|
# first progress report must have n_cache == n_processed
|
||||||
|
assert cur_progress["total"] > 0
|
||||||
|
assert cur_progress["cache"] == cur_progress["processed"]
|
||||||
|
if reuse_cache:
|
||||||
|
# when reusing cache, we expect some cached tokens
|
||||||
|
assert cur_progress["cache"] > 0
|
||||||
if last_progress is not None:
|
if last_progress is not None:
|
||||||
assert cur_progress["total"] == last_progress["total"]
|
assert cur_progress["total"] == last_progress["total"]
|
||||||
assert cur_progress["cache"] == last_progress["cache"]
|
assert cur_progress["cache"] == last_progress["cache"]
|
||||||
|
|
@ -473,6 +481,7 @@ def test_return_progress(n_batch, batch_count, reuse_cache):
|
||||||
total_batch_count += 1
|
total_batch_count += 1
|
||||||
last_progress = cur_progress
|
last_progress = cur_progress
|
||||||
|
|
||||||
|
# last progress should indicate completion (all tokens processed)
|
||||||
assert last_progress is not None
|
assert last_progress is not None
|
||||||
assert last_progress["total"] > 0
|
assert last_progress["total"] > 0
|
||||||
assert last_progress["processed"] == last_progress["total"]
|
assert last_progress["processed"] == last_progress["total"]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue