From 87b2719eca55b30afff600fc7f61c6cce9452cbf Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Thu, 4 Dec 2025 08:13:49 +0100
Subject: [PATCH] sampling : stop short if backend sampler sampled a token

This commit modifies the graph building logic to immediately continue
when a token has already been sampled by the backend sampler.

It also updates the test for backend temporary sampling to include
top-k and distribution samplers in the chain to verify that they are not
producing any logits (they are not run).
---
 src/llama-graph.cpp            | 1 +
 tests/test-backend-sampler.cpp | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
index a621c4ebf5..c0ff7d1791 100644
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@@ -2100,6 +2100,7 @@ void llm_graph_context::build_sampling() const {
         if (data.sampled != nullptr) {
             res->t_sampled[seq_id] = data.sampled;
             ggml_build_forward_expand(gf, data.sampled);
+            continue;
         }
 
         if (data.probs != nullptr) {
diff --git a/tests/test-backend-sampler.cpp b/tests/test-backend-sampler.cpp
index f56cce6350..eb3a0e248d 100644
--- a/tests/test-backend-sampler.cpp
+++ b/tests/test-backend-sampler.cpp
@@ -441,6 +441,8 @@ static void test_backend_temp_sampling(const char * model_path) {
         struct llama_sampler_chain_params backend_chain_params = llama_sampler_chain_default_params();
         struct llama_sampler * backend_sampler_chain = llama_sampler_chain_init(backend_chain_params);
         llama_sampler_chain_add(backend_sampler_chain, llama_sampler_init_temp(temp));
+        llama_sampler_chain_add(backend_sampler_chain, llama_sampler_init_top_k(40));
+        llama_sampler_chain_add(backend_sampler_chain, llama_sampler_init_dist(18));
 
         std::vector<llama_sampler_seq_config> backend_sampler_configs = {
             { seq_id, backend_sampler_chain },