| .. |
|
test_basic.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_chat_completion.py
|
server: fix OpenAI API compatibility for usage statistics in chat streams (#15444)
|
2025-08-21 00:10:08 +02:00 |
|
test_completion.py
|
server : Support multimodal completion and embeddings prompts in JSON format (#15108)
|
2025-08-22 10:10:14 +02:00 |
|
test_ctx_shift.py
|
llama: use FA + max. GPU layers by default (#15434)
|
2025-08-30 16:32:10 +02:00 |
|
test_embedding.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_infill.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_lora.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_rerank.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_security.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_slot_save.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_speculative.py
|
llama: use FA + max. GPU layers by default (#15434)
|
2025-08-30 16:32:10 +02:00 |
|
test_template.py
|
`server`: add `--reasoning-budget 0` to disable thinking (incl. qwen3 w/ enable_thinking:false) (#13771)
|
2025-05-26 00:30:51 +01:00 |
|
test_tokenize.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_tool_call.py
|
server : disable context shift by default (#15416)
|
2025-08-19 16:46:37 +03:00 |
|
test_vision_api.py
|
server : Support multimodal completion and embeddings prompts in JSON format (#15108)
|
2025-08-22 10:10:14 +02:00 |