diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index 15dafcf102..ed2b8ababf 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -1214,8 +1214,13 @@ static void llama_sampler_dist_backend_set_input(struct llama_sampler * smpl) { auto * sctx = (llama_sampler_dist *) smpl->ctx; GGML_ASSERT(sctx->inp_uniform != nullptr); - std::uniform_real_distribution dist(0.0f, 1.0f); - const float rnd = dist(sctx->rng); + // We sample in double precision and cast to float to match rnd numbers of + // llama_dampler_dist which uses double precision (sampling from + // std::uniform_real_distribution and + // std::uniform_real_distribution with same rng will produce + // different sequences). + std::uniform_real_distribution dist(0.0f, 1.0f); + const float rnd = dist(sctx->rng); ggml_backend_tensor_set(sctx->inp_uniform, &rnd, 0, sizeof(float)); }