diff --git a/tools/server/tests/unit/test_router.py b/tools/server/tests/unit/test_router.py index e85f2c3382..717007a446 100644 --- a/tools/server/tests/unit/test_router.py +++ b/tools/server/tests/unit/test_router.py @@ -103,8 +103,8 @@ def test_router_models_max_evicts_lru(): candidate_models = [ "ggml-org/tinygemma3-GGUF:Q8_0", - "ggml-org/test-model-stories260K", - "ggml-org/test-model-stories260K-infill", + "ggml-org/test-model-stories260K:F32", + "ggml-org/test-model-stories260K-infill:F32", ] # Load only the first 2 models to fill the cache