From d74eb61aa7a45ee63a6ac63a281d3f6e6985c6db Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 19 Nov 2025 11:29:26 +0100 Subject: [PATCH] squash! sampling : simplify backend sampling logic decode Fix condition to check if backend actually sampled tokens, not just that backend samplers are available. --- src/llama-context.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 8485416c3e..15ca80a735 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -1361,7 +1361,9 @@ int llama_context::decode(const llama_batch & batch_inp) { // ggml_graph_dump_dot(gf, NULL, "llama.dot"); //} - if (has_backend_samplers) { + const bool backend_has_sampled = !res->t_sampled_tokens.empty() || !res->t_sampled_probs.empty() || !res->t_sampled_logits.empty(); + + if (has_backend_samplers && backend_has_sampled) { const auto seq_to_batch_idx = build_seq_to_batch_idx(ubatch); // If a backend sampler has sampled a token we only want to copy the @@ -1381,7 +1383,9 @@ int llama_context::decode(const llama_batch & batch_inp) { // 2) CPU samplers to associate filtered logits with their token ids. copy_tensor_async_token_ids(res->t_sampled_token_ids, sampled_token_ids_map, seq_to_batch_idx, sched.get()); - } else { + } + + if (!backend_has_sampled) { auto * t_logits = res->get_logits(); auto * t_embd = cparams.embeddings ? res->get_embd() : nullptr;