51 lines
1.4 KiB
Python
51 lines
1.4 KiB
Python
import pytest
|
|
from utils import *
|
|
|
|
server: ServerProcess
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def create_server():
|
|
global server
|
|
server = ServerPreset.router()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model,success",
|
|
[
|
|
("ggml-org/tinygemma3-GGUF:Q8_0", True),
|
|
("non-existent/model", False),
|
|
]
|
|
)
|
|
def test_router_chat_completion_stream(model: str, success: bool):
|
|
# TODO: make sure the model is in cache (ie. ServerProcess.load_all()) before starting the router server
|
|
global server
|
|
server.start()
|
|
content = ""
|
|
ex: ServerError | None = None
|
|
try:
|
|
res = server.make_stream_request("POST", "/chat/completions", data={
|
|
"model": model,
|
|
"max_tokens": 16,
|
|
"messages": [
|
|
{"role": "user", "content": "hello"},
|
|
],
|
|
"stream": True,
|
|
})
|
|
for data in res:
|
|
if data["choices"]:
|
|
choice = data["choices"][0]
|
|
if choice["finish_reason"] in ["stop", "length"]:
|
|
assert "content" not in choice["delta"]
|
|
else:
|
|
assert choice["finish_reason"] is None
|
|
content += choice["delta"]["content"] or ''
|
|
except ServerError as e:
|
|
ex = e
|
|
|
|
if success:
|
|
assert ex is None
|
|
assert len(content) > 0
|
|
else:
|
|
assert ex is not None
|
|
assert content == ""
|