From 175091742073f22dc6f287ae39afb226c4a3b9de Mon Sep 17 00:00:00 2001
From: Oliver Simons <osimons@nvidia.com>
Date: Fri, 19 Dec 2025 11:42:10 +0100
Subject: [PATCH] Fix different RNG-states between backend-sampling and
 llama-sampling

By default, we perform a warm-up step where the ggml_cgraph is computed
once. For backend-sampling, this graph contains the sampler, and thus
the RNG state of the backend's dist sampler is advanced once.

Solution to this is to reset the samplers after the warmup has finished
---
 common/common.cpp | 8 ++++++++
 common/common.h   | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/common/common.cpp b/common/common.cpp
index 7a89f16250..9792c0b6a6 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1178,6 +1178,12 @@ common_sampler * common_init_result::sampler(llama_seq_id seq_id) {
     return pimpl->samplers[seq_id].get();
 }
 
+void common_init_result::reset_samplers() {
+    for (int i = 0; i < (int) pimpl->samplers.size(); ++i) {
+        llama_sampler_reset(common_sampler_get(pimpl->samplers[i].get()));
+    }
+}
+
 std::vector<llama_adapter_lora_ptr> & common_init_result::lora() {
     return pimpl->lora;
 }
@@ -1311,6 +1317,8 @@ common_init_result_ptr common_init_from_params(common_params & params) {
         llama_synchronize(lctx);
         llama_perf_context_reset(lctx);
         llama_set_warmup(lctx, false);
+        // reset samplers to reset RNG state after warmup to the seeded state
+        res->reset_samplers();
     }
 
     return res;
diff --git a/common/common.h b/common/common.h
index 431bc6f3dc..5eeee7d64a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -690,7 +690,9 @@ struct common_init_result {
 
     llama_model * model();
     llama_context * context();
+
     common_sampler * sampler(llama_seq_id seq_id);
+    void             reset_samplers();
 
     std::vector<llama_adapter_lora_ptr> & lora();