From e915644c7b7b45dc9fc28bc9fe6f228ba2a8b550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Fri, 20 Mar 2026 17:25:18 +0000 Subject: [PATCH] Check with the quant tag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Adrien Gallouët --- tools/server/tests/unit/test_router.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/server/tests/unit/test_router.py b/tools/server/tests/unit/test_router.py index e85f2c3382..717007a446 100644 --- a/tools/server/tests/unit/test_router.py +++ b/tools/server/tests/unit/test_router.py @@ -103,8 +103,8 @@ def test_router_models_max_evicts_lru(): candidate_models = [ "ggml-org/tinygemma3-GGUF:Q8_0", - "ggml-org/test-model-stories260K", - "ggml-org/test-model-stories260K-infill", + "ggml-org/test-model-stories260K:F32", + "ggml-org/test-model-stories260K-infill:F32", ] # Load only the first 2 models to fill the cache