From 5c92c76e9edd20b040c1b9f14c8b40483cfdb601 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 25 Feb 2026 15:39:57 +0100 Subject: [PATCH] sampling : add clamping to backend dist sampler This commit adds clamping to the backend distribution sampler to avoid the case where idxf values are all zero. If this happens then we will incorrectly create an out of bounds idx value which will cause a crash. This can be reproduced by explicitly setting idxf to zero: ```c++ idxf = ggml_scale(ctx, idxf, 0.0f); ``` --- src/llama-sampler.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llama-sampler.cpp b/src/llama-sampler.cpp index 9bbc5dbde2..a5a67d206f 100644 --- a/src/llama-sampler.cpp +++ b/src/llama-sampler.cpp @@ -1180,6 +1180,9 @@ static void llama_sampler_dist_backend_apply( struct ggml_tensor * idxf = ggml_sum(ctx, mask); ggml_set_name(idxf, "dist_index_f32"); + // Clamp to prevent out-of-bounds access when computing the index. + idxf = ggml_clamp(ctx, idxf, 1.0f, mask->ne[0]); + // Use ggml_scale_bias to scale the index value by -1 and then add the size // of the mask to that value so we get the correct index ((-1 * idxf) + n). struct ggml_tensor * idx = ggml_cast(ctx, ggml_scale_bias(ctx, idxf, -1.0f, mask->ne[0]), GGML_TYPE_I32);