Fix different RNG-states between backend-sampling and llama-sampling

By default, we perform a warm-up step where the ggml_cgraph is computed once. For backend-sampling, this graph contains the sampler, and thus the RNG state of the backend's dist sampler is advanced once. Solution to this is to reset the samplers after the warmup has finished
2025-12-19 11:42:10 +01:00 · 2025-12-19 11:42:10 +01:00 · 1750917420
parent bc5195c585
commit 1750917420
2 changed files with 10 additions and 0 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1178,6 +1178,12 @@ common_sampler * common_init_result::sampler(llama_seq_id seq_id) {
    return pimpl->samplers[seq_id].get();
 }

+void common_init_result::reset_samplers() {
+    for (int i = 0; i < (int) pimpl->samplers.size(); ++i) {
+        llama_sampler_reset(common_sampler_get(pimpl->samplers[i].get()));
+    }
+}
+
 std::vector<llama_adapter_lora_ptr> & common_init_result::lora() {
    return pimpl->lora;
 }
@ -1311,6 +1317,8 @@ common_init_result_ptr common_init_from_params(common_params & params) {
        llama_synchronize(lctx);
        llama_perf_context_reset(lctx);
        llama_set_warmup(lctx, false);
+        // reset samplers to reset RNG state after warmup to the seeded state
+        res->reset_samplers();
    }

    return res;
--- a/common/common.h
+++ b/common/common.h
@ -690,7 +690,9 @@ struct common_init_result {

    llama_model * model();
    llama_context * context();
+
    common_sampler * sampler(llama_seq_id seq_id);
+    void             reset_samplers();

    std::vector<llama_adapter_lora_ptr> & lora();