server : adjust unified KV cache tests (#18716)

2026-01-10 17:51:56 +02:00 · 2026-01-10 17:51:56 +02:00 · f307926482
parent 7fdc8c893d
commit f307926482
1 changed files with 3 additions and 3 deletions
--- a/tools/server/tests/unit/test_completion.py
+++ b/tools/server/tests/unit/test_completion.py
@ -393,12 +393,12 @@ def test_completion_unified(n_ctx, n_slots, n_predict_vals, expected_success):
    for res, n_predict, expect_ok in zip(results, n_predict_vals, expected_success):
        if expect_ok:
            assert res.status_code == 200
+
+        # note: https://github.com/ggml-org/llama.cpp/pull/18700#issuecomment-3728695581
+        if res.status_code == 200:
            assert "content" in res.body
            if "timings" in res.body:
                assert res.body["timings"]["predicted_n"] == n_predict
-        else:
-            assert res.status_code == 500
-            assert "content" not in res.body


@pytest.mark.parametrize(