From 791ecb94ff1baa7161e947c5da64e2d960891c52 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 30 Dec 2025 20:12:49 +0200 Subject: [PATCH] sampling : zero-initialize input buffers --- src/llama-sampling.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index 042e40de20..48291a3a7c 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -1156,6 +1156,8 @@ static bool llama_sampler_dist_backend_init( // Allocate all tensors from our context to the backend sctx->inp_buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(sctx->inp_ctx.get(), buft)); + + ggml_backend_buffer_clear(sctx->inp_buf.get(), 0); } const bool res = llama_sampler_backend_support(smpl, buft); @@ -1232,7 +1234,8 @@ static void llama_sampler_dist_backend_set_input(struct llama_sampler * smpl) { // std::uniform_real_distribution with same rng will produce // different sequences). std::uniform_real_distribution dist(0.0f, 1.0f); - const float rnd = dist(sctx->rng); + const float rnd = dist(sctx->rng); + ggml_backend_tensor_set(sctx->inp_uniform, &rnd, 0, sizeof(float)); } @@ -3431,6 +3434,8 @@ static bool llama_sampler_logit_bias_backend_init( // Allocate all tensors from our context to the backend sctx->inp_buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(sctx->inp_ctx.get(), buft)); + ggml_backend_buffer_clear(sctx->inp_buf.get(), 0); + return true; }