| .. |
|
test_basic.py
|
server : host-memory prompt caching (#16391)
|
2025-10-09 18:54:51 +03:00 |
|
test_chat_completion.py
|
server : return HTTP 400 if prompt exceeds context length (#16486)
|
2025-10-10 16:11:07 +02:00 |
|
test_completion.py
|
server : host-memory prompt caching (#16391)
|
2025-10-09 18:54:51 +03:00 |
|
test_ctx_shift.py
|
memory : remove KV cache size padding (#16812)
|
2025-10-28 20:19:44 +02:00 |
|
test_embedding.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_infill.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_lora.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_rerank.py
|
server / ranking : add sorting and management of top_n (#16403)
|
2025-10-11 16:39:04 +03:00 |
|
test_security.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_slot_save.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_speculative.py
|
llama: use FA + max. GPU layers by default (#15434)
|
2025-08-30 16:32:10 +02:00 |
|
test_template.py
|
server : speed up tests (#15836)
|
2025-09-06 14:45:24 +02:00 |
|
test_tokenize.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_tool_call.py
|
server : speed up tests (#15836)
|
2025-09-06 14:45:24 +02:00 |
|
test_vision_api.py
|
server : speed up tests (#15836)
|
2025-09-06 14:45:24 +02:00 |