import pytest from utils import * server: ServerProcess @pytest.fixture(autouse=True) def create_server(): global server server = ServerPreset.router() @pytest.mark.parametrize( "model,success", [ ("ggml-org/tinygemma3-GGUF:Q8_0", True), ("non-existent/model", False), ] ) def test_router_chat_completion_stream(model: str, success: bool): # TODO: make sure the model is in cache (ie. ServerProcess.load_all()) before starting the router server global server server.start() content = "" ex: ServerError | None = None try: res = server.make_stream_request("POST", "/chat/completions", data={ "model": model, "max_tokens": 16, "messages": [ {"role": "user", "content": "hello"}, ], "stream": True, }) for data in res: if data["choices"]: choice = data["choices"][0] if choice["finish_reason"] in ["stop", "length"]: assert "content" not in choice["delta"] else: assert choice["finish_reason"] is None content += choice["delta"]["content"] or '' except ServerError as e: ex = e if success: assert ex is None assert len(content) > 0 else: assert ex is not None assert content == ""