From 463b6a963c2de376e102d878a50d26802f15833c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Fri, 13 Mar 2026 21:25:57 +0100 Subject: [PATCH] tools : enable kvu in perplexity for hellaswag, winogrande, multiple-choice (#19954) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit llama-perplexity -hf unsloth/Qwen3-0.6B-GGUF:Q4_K_M -f winogrande-debiased-eval.csv --winogrande winogrande_score : tokenizing selected tasks winogrande_score : calculating winogrande score over selected tasks. split_equal: sequential split is not supported when there are coupled sequences in the input batch (you may need to use the -kvu flag) decode: failed to find a memory slot for batch of size 46 failed to decode the batch, n_batch = 2048, ret = 1 winogrande_score: llama_decode() failed same for hellaswag: split_equal: sequential split is not supported when there are coupled sequences in the input batch (you may need to use the -kvu flag) decode: failed to find a memory slot for batch of size 99 failed to decode the batch, n_batch = 2048, ret = 1 hellaswag_score: llama_decode() failed Signed-off-by: Adrien Gallouët --- tools/perplexity/perplexity.cpp | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp index cc5ea99c4d..0eb062f05d 100644 --- a/tools/perplexity/perplexity.cpp +++ b/tools/perplexity/perplexity.cpp @@ -2025,21 +2025,14 @@ int main(int argc, char ** argv) { return 1; } - const bool ppl = !params.hellaswag && !params.winogrande && !params.multiple_choice && !params.kl_divergence; - - if (ppl || params.kl_divergence) { - const int32_t n_seq = std::max(1, params.n_batch / n_ctx); - const int32_t n_kv = n_seq * n_ctx; - - params.n_parallel = n_seq; - params.n_ctx = n_kv; - - params.n_batch = std::min(params.n_batch, n_kv); - } else { - params.n_batch = std::min(params.n_batch, params.n_ctx); - // ensure there's at least enough seq_ids for HellaSwag + if (params.hellaswag || params.winogrande || params.multiple_choice) { params.n_parallel = std::max(4, params.n_parallel); + params.kv_unified = true; + } else { // Perplexity & KL divergence + params.n_parallel = std::max(1, params.n_batch / n_ctx); } + params.n_ctx = params.n_parallel * n_ctx; + params.n_batch = std::min(params.n_batch, params.n_ctx); if (params.ppl_stride > 0) { LOG_INF("Will perform strided perplexity calculation -> adjusting context size from %d to %d\n",