import os import tempfile import pytest from utils import * server = ServerPreset.tinyllama2() class LogReader: def __init__(self, path): self.path = path self.pos = 0 def drain(self): with open(self.path) as f: f.seek(self.pos) content = f.read() self.pos = f.tell() return content @pytest.fixture(autouse=True) def create_server(): global server server = ServerPreset.tinyllama2() server.n_slots = 2 server.n_predict = 4 server.temperature = 0.0 server.server_slots = True server.cache_ram = 100 server.kv_unified = True server.debug = True fd, server.log_path = tempfile.mkstemp(suffix='.log') os.close(fd) yield LONG_PROMPT = ( "Once upon a time in a land far away, there lived a brave knight " "who traveled across mountains and rivers to find the legendary " "golden sword hidden deep within the enchanted forest of whispers. " "He met many creatures along the way including dragons and fairies " "and wizards who helped him on his noble quest to save the kingdom." ) # idle slot cleared on launch should restore from cache-ram def test_clear_and_restore(): global server server.start() log = LogReader(server.log_path) # verify feature is enabled assert "__TEST_TAG_CLEAR_IDLE_ENABLED__" in log.drain() res = server.make_request("POST", "/completion", data={ "prompt": LONG_PROMPT, "id_slot": 0, "cache_prompt": True, }) assert res.status_code == 200 original_prompt_n = res.body["timings"]["prompt_n"] # Slot 0 is the only slot with KV — should NOT be cleared assert "__TEST_TAG_CLEAR_IDLE_SLOT__" not in log.drain() # Launching slot 1 clears idle slot 0 res = server.make_request("POST", "/completion", data={ "prompt": "The quick brown fox", "id_slot": 1, "cache_prompt": True, }) assert res.status_code == 200 assert "__TEST_TAG_CLEAR_IDLE_SLOT__" in log.drain() # Re-send same prompt — should restore from cache-ram res = server.make_request("POST", "/completion", data={ "prompt": LONG_PROMPT, "cache_prompt": True, }) assert res.status_code == 200 assert "updating prompt cache" in log.drain() assert res.body["timings"]["cache_n"] > 0 assert res.body["timings"]["prompt_n"] < original_prompt_n # Follow-up — slot 0 kept its KV, no clearing needed res = server.make_request("POST", "/completion", data={ "prompt": LONG_PROMPT + " The knight finally reached the castle gates.", "cache_prompt": True, }) assert res.status_code == 200 assert "__TEST_TAG_CLEAR_IDLE_SLOT__" not in log.drain() def test_disabled_with_flag(): global server server.no_clear_idle = True server.start() log = LogReader(server.log_path) # Feature should not be enabled assert "__TEST_TAG_CLEAR_IDLE_ENABLED__" not in log.drain() res = server.make_request("POST", "/completion", data={ "prompt": LONG_PROMPT, "id_slot": 0, "cache_prompt": True, }) assert res.status_code == 200 # Request on different slot — should NOT trigger clearing res = server.make_request("POST", "/completion", data={ "prompt": "The quick brown fox", "id_slot": 1, "cache_prompt": True, }) assert res.status_code == 200 assert "__TEST_TAG_CLEAR_IDLE_SLOT__" not in log.drain()