This commit is contained in:
Jan Boon 2026-02-13 16:12:42 +03:00 committed by GitHub
commit efed63bc5c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 501 additions and 25 deletions

View File

@ -1577,6 +1577,26 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.sampling.ignore_eos = true;
}
).set_sparam());
add_opt(common_arg(
{"--blue-noise"},
"use blue noise RNG for sampling instead of white noise",
[](common_params & params) {
params.sampling.blue_noise = true;
}
).set_sparam());
add_opt(common_arg(
{"--rng-type"}, "{mt19937,lowbias32}",
"RNG type for sampling (default: mt19937)",
[](common_params & params, const std::string & value) {
if (value == "mt19937") {
params.sampling.rng_type = LLAMA_RNG_TYPE_MT19937;
} else if (value == "lowbias32") {
params.sampling.rng_type = LLAMA_RNG_TYPE_LOWBIAS32;
} else {
throw std::invalid_argument("invalid value");
}
}
).set_sparam());
add_opt(common_arg(
{"--temp"}, "N",
string_format("temperature (default: %.2f)", (double)params.sampling.temp),

View File

@ -209,6 +209,8 @@ struct common_params_sampling {
bool ignore_eos = false;
bool no_perf = false; // disable performance metrics
bool timing_per_token = false;
bool blue_noise = false; // use blue noise RNG instead of white noise for dist sampler
enum llama_rng_type rng_type = LLAMA_RNG_TYPE_MT19937; // RNG type for dist sampler
uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers

View File

@ -167,11 +167,14 @@ std::string common_params_sampling::print() const {
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
"\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
"\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, top_n_sigma = %.3f, temp = %.3f\n"
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f, adaptive_target = %.3f, adaptive_decay = %.3f",
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f, adaptive_target = %.3f, adaptive_decay = %.3f\n"
"\tblue_noise = %s, rng_type = %s",
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, top_n_sigma, temp,
mirostat, mirostat_eta, mirostat_tau, adaptive_target, adaptive_decay);
mirostat, mirostat_eta, mirostat_tau, adaptive_target, adaptive_decay,
blue_noise ? "true" : "false",
rng_type == LLAMA_RNG_TYPE_LOWBIAS32 ? "lowbias32" : "mt19937");
return std::string(result);
}
@ -313,7 +316,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
samplers.push_back(llama_sampler_init_adaptive_p(params.adaptive_target, params.adaptive_decay, params.seed));
} else {
// default: sample from distribution
samplers.push_back(llama_sampler_init_dist(params.seed));
samplers.push_back(llama_sampler_init_dist_rng(params.seed, params.blue_noise, params.rng_type));
}
} else if (params.mirostat == 1) {
samplers.push_back(llama_sampler_init_temp(params.temp));

View File

@ -188,6 +188,11 @@ extern "C" {
LLAMA_API const char * llama_flash_attn_type_name(enum llama_flash_attn_type flash_attn_type);
enum llama_rng_type {
LLAMA_RNG_TYPE_MT19937 = 0,
LLAMA_RNG_TYPE_LOWBIAS32 = 1,
};
enum llama_split_mode {
LLAMA_SPLIT_MODE_NONE = 0, // single GPU
LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
@ -1295,7 +1300,8 @@ extern "C" {
LLAMA_API struct llama_sampler * llama_sampler_init_greedy(void);
/// seed == LLAMA_DEFAULT_SEED to use a random seed.
LLAMA_API struct llama_sampler * llama_sampler_init_dist(uint32_t seed);
LLAMA_API struct llama_sampler * llama_sampler_init_dist (uint32_t seed);
LLAMA_API struct llama_sampler * llama_sampler_init_dist_rng(uint32_t seed, bool blue_noise, enum llama_rng_type rng_type);
/// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
/// Setting k <= 0 makes this a noop

View File

@ -214,7 +214,8 @@ static void llama_token_data_array_partial_sort_inplace(llama_token_data_array *
cur_p->sorted = true;
}
static int llama_sample_dist(llama_token_data_array * cur_p, std::mt19937 & rng) {
template<typename RNG>
static int llama_sample_dist(llama_token_data_array * cur_p, RNG & rng) {
// iterator for the probabilities
#ifdef __GNUC__
#pragma GCC diagnostic push
@ -333,6 +334,318 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
cur_p->size = k;
}
// abstract RNG interface for the dist sampler
struct llama_dist_rng {
virtual ~llama_dist_rng() = default;
// whether the RNG requires sorted input for proper properties
// this also indicates whether the RNG output itself must be consumed in a sequential order
virtual bool requires_sorted() = 0;
virtual uint32_t next32() = 0; // uniform 32 bits
virtual uint64_t next64() = 0; // uniform 64 bits
virtual double nextf() = 0; // uniform double in [0, 1)
virtual void reseed(uint32_t s) = 0;
virtual void reset() = 0; // reset to post-seed state
virtual std::unique_ptr<llama_dist_rng> clone() const = 0;
};
// generative error diffusion for sequential blue noise
// pseudo-random number generator with ~6db/octave blue noise
// this generator produces a uniform distribution
// important: blue noise properties cannot be preserved when
// the generator is used for multiple purposes simultaneously
// nor when multiple next calls are used to construct a larger value
// nor when integer outputs are used with the modulo operator
struct blue_noise_rng {
uint8_t bit_depth = 0;
std::unique_ptr<llama_dist_rng> rng;
// binary tree of 1-bit 50% duty cycle error diffusion dithering blue noise generators
std::vector<std::array<int8_t, 2>> states; // {err0, err1} per tree node
blue_noise_rng() = default;
blue_noise_rng(uint8_t bit_depth, std::unique_ptr<llama_dist_rng> rng) {
init(bit_depth, std::move(rng));
}
// custom copy (clone the underlying RNG)
blue_noise_rng(const blue_noise_rng & other)
: bit_depth(other.bit_depth)
, rng(other.rng ? other.rng->clone() : nullptr)
, states(other.states) {}
blue_noise_rng & operator=(const blue_noise_rng & other) {
if (this != &other) {
bit_depth = other.bit_depth;
rng = other.rng ? other.rng->clone() : nullptr;
states = other.states;
}
return *this;
}
blue_noise_rng(blue_noise_rng &&) = default;
blue_noise_rng & operator=(blue_noise_rng &&) = default;
void init(uint8_t depth, std::unique_ptr<llama_dist_rng> source) {
bit_depth = std::clamp<uint8_t>(depth, 1, 16);
rng = std::move(source);
const int n = (1 << bit_depth) - 1;
states.resize(n); // at 16-bit depth, this uses 128KB of state
reset_states();
}
void reseed(uint32_t s) {
rng->reseed(s);
reset_states();
}
void reset_states() {
const int n = (int)states.size();
// 5 reachable states with distribution 3:3:2:1:1
// established based on empirical testing
static const int8_t tbl[10][2] = {
{ 0, 0}, { 0, 0}, { 0, 0},
{-1, 0}, {-1, 0}, {-1, 0},
{ 0, -1}, { 0, -1},
{-2, 0},
{-1, -1},
};
for (int i = 0; i < n; i++) {
uint32_t h = (uint32_t)(((uint64_t)rng->next32() * 10) >> 32);
states[i] = {tbl[h][0], tbl[h][1]}; // random initial state
}
}
uint16_t advance(uint32_t h) {
// traverse binary tree, one error diffusion ditherer per population split
// thresholding output at any value still produces blue noise
uint32_t acc = 0;
for (int level = 0; level < bit_depth; level++) {
auto & s = states[(1 << level) - 1 + acc]; // heap-style index
int out = (s[0] >= 0) ? 1 : 0;
int8_t qe = s[0] + (int8_t)(out ? -1 : 1); // inverse autocorrelation
s[0] = s[1]; // step forward
s[1] = 0;
// error diffusion dithering using binary weight perturbation
s[(h >> (31 - level)) & 1 ? 0 : 1] += qe; // forward to t+1 or defer to t+2
acc = acc * 2 + out;
}
return (uint16_t)acc;
}
uint16_t next() {
uint32_t h = rng->next32();
return advance(h);
}
// blue noise in the upper bit_depth bits, white noise in the lower bits
// do not use with modulo operator, as it would just produce white noise
uint32_t next32() {
uint32_t h = rng->next32();
uint32_t val = advance(h);
return (val << (32 - bit_depth)) | (h & ((1u << (32 - bit_depth)) - 1));
}
// blue noise in the upper bits, white noise in the lower bits
uint64_t next64() {
uint64_t r = rng->next64();
uint32_t val = advance((uint32_t)(r >> 32));
return ((uint64_t)val << (64 - bit_depth)) | (r & ((UINT64_C(1) << (64 - bit_depth)) - 1));
}
// uniform double in [0, 1) with blue noise temporal autocorrelation
double nextf() {
uint64_t combined = next64();
return (combined >> 11) * 0x1.0p-53;
}
};
// adapter to satisfy UniformRandomBitGenerator for std::discrete_distribution
// note: not guaranteed to preserve blue noise properties
// this is only used in a disabled branch of llama_sampler_dist_apply, added for compatibility
struct llama_dist_urbg {
using result_type = uint32_t;
llama_dist_rng & rng;
static constexpr result_type min() { return 0; }
static constexpr result_type max() { return UINT32_MAX; }
result_type operator()() { return rng.next32(); }
};
// wrapper to use existing llama_sample_dist for mt19937, otherwise implements CDF walk directly
// this is currently only used in a disabled branch of llama_sampler_dist_apply, added for compatibility and potential use by other samplers
// flag normalized to skip recomputing the probability sum when probs already sum to 1
static int llama_sample_dist_rng(llama_token_data_array * cur_p, llama_dist_rng & rng, bool normalized = false) {
if (!rng.requires_sorted()) {
llama_dist_urbg urbg{rng};
return llama_sample_dist(cur_p, urbg);
}
if (!cur_p->sorted) {
llama_token_data_array_partial_sort_inplace(cur_p, cur_p->size);
}
const double rnd = rng.nextf();
double sum_run = 0.0;
if (normalized) {
for (size_t i = 0; i < cur_p->size; ++i) {
sum_run += cur_p->data[i].p;
if (sum_run >= rnd) {
return i;
}
}
} else {
double sum_cum = 0.0;
for (size_t i = 0; i < cur_p->size; ++i) {
sum_cum += cur_p->data[i].p;
}
const double sum_tgt = sum_cum * rnd;
for (size_t i = 0; i < cur_p->size; ++i) {
sum_run += cur_p->data[i].p;
if (sum_run >= sum_tgt) {
return i;
}
}
}
return (int)(cur_p->size - 1);
}
struct llama_dist_rng_lowbias32 : llama_dist_rng {
uint32_t hashed_seed = 0;
uint32_t position = 0;
llama_dist_rng_lowbias32(uint32_t seed) : hashed_seed(hash(seed)), position(0) {}
bool requires_sorted() override { return false; }
static uint32_t hash(uint32_t x) { // lowbias32
// coefficients from https://github.com/skeeto/hash-prospector/issues/19
x ^= x >> 16; x *= 0x21f0aaad;
x ^= x >> 15; x *= 0x735a2d97;
x ^= x >> 15;
return x;
}
uint32_t next() {
uint32_t val = hash(position ^ hashed_seed);
position++;
return val;
}
uint32_t next32() override {
return next();
}
uint64_t next64() override {
uint64_t lo = hash(position ^ ~hashed_seed); // secondary sequence using opposing seed
uint64_t hi = next();
return (hi << 32) | lo;
}
double nextf() override {
uint64_t combined = next64();
return (combined >> 11) * 0x1.0p-53;
}
void reseed(uint32_t s) override {
hashed_seed = hash(s);
position = 0;
}
void reset() override {
position = 0;
}
std::unique_ptr<llama_dist_rng> clone() const override {
return std::make_unique<llama_dist_rng_lowbias32>(*this);
}
};
struct llama_dist_rng_mt19937 : llama_dist_rng {
uint32_t seed;
std::mt19937 rng;
llama_dist_rng_mt19937(uint32_t seed) : seed(seed), rng(seed) {}
bool requires_sorted() override { return false; }
uint32_t next32() override {
return rng();
}
uint64_t next64() override {
uint64_t hi = (uint64_t)rng() << 32;
uint64_t lo = (uint64_t)rng();
return hi | lo;
}
double nextf() override {
std::uniform_real_distribution<double> dist(0.0, 1.0);
return dist(rng);
}
void reseed(uint32_t s) override {
seed = s;
rng.seed(s);
}
void reset() override {
rng.seed(seed);
}
std::unique_ptr<llama_dist_rng> clone() const override {
return std::make_unique<llama_dist_rng_mt19937>(*this);
}
};
struct llama_dist_rng_blue : llama_dist_rng {
blue_noise_rng bn_rng;
llama_dist_rng_blue(std::unique_ptr<llama_dist_rng> source)
: bn_rng(16, std::move(source)) {}
bool requires_sorted() override { return true; }
uint32_t next32() override {
return bn_rng.next32();
}
uint64_t next64() override {
return bn_rng.next64();
}
double nextf() override {
return bn_rng.nextf();
}
void reseed(uint32_t s) override {
bn_rng.reseed(s);
}
void reset() override {
bn_rng.rng->reset();
bn_rng.reset_states();
}
std::unique_ptr<llama_dist_rng> clone() const override {
return std::make_unique<llama_dist_rng_blue>(*this);
}
};
static uint32_t get_rng_seed(uint32_t seed) {
if (seed == LLAMA_DEFAULT_SEED) {
// use system clock if std::random_device is not a true RNG
@ -1023,7 +1336,7 @@ struct llama_sampler_dist : public llama_sampler_backend {
const uint32_t seed;
uint32_t seed_cur;
std::mt19937 rng;
std::unique_ptr<llama_dist_rng> rng;
ggml_tensor * inp_uniform;
};
@ -1049,6 +1362,11 @@ static void llama_sampler_dist_apply(struct llama_sampler * smpl, llama_token_da
return;
}
// sort if required by the RNG (e.g., blue noise needs sorted input for proper temporal properties)
if (ctx->rng->requires_sorted() && !cur_p->sorted) {
llama_token_data_array_partial_sort_inplace(cur_p, cur_p->size);
}
// max logit for numerical stability
float max_l = cur_p->data[0].logit;
if (!cur_p->sorted) {
@ -1069,8 +1387,7 @@ static void llama_sampler_dist_apply(struct llama_sampler * smpl, llama_token_da
// sample from the obtained probabilities and normalize the probs in a single pass
// this is ~3x faster on Mac with full gpt-oss vocab than the version below
//
std::uniform_real_distribution<double> dist(0.0f, 1.0f);
const double rnd = dist(ctx->rng);
const double rnd = ctx->rng->nextf();
double sum_run = 0.0f;
const double sum_tgt = sum_cum*rnd;
@ -1101,28 +1418,29 @@ static void llama_sampler_dist_apply(struct llama_sampler * smpl, llama_token_da
cur_p->data[i].p /= sum_cum;
}
cur_p->selected = llama_sample_dist(cur_p, ctx->rng);
cur_p->selected = llama_sample_dist_rng(cur_p, *ctx->rng, true);
#endif
}
static void llama_sampler_dist_reset(struct llama_sampler * smpl) {
auto * ctx = (llama_sampler_dist *) smpl->ctx;
ctx->seed_cur = get_rng_seed(ctx->seed);
ctx->rng.seed(ctx->seed_cur);
ctx->rng->reseed(ctx->seed_cur);
}
static struct llama_sampler * llama_sampler_dist_clone(const struct llama_sampler * smpl) {
const auto * ctx = (const llama_sampler_dist *) smpl->ctx;
auto * result = llama_sampler_init_dist(ctx->seed);
auto * ctx = (llama_sampler_dist *) smpl->ctx;
// copy the state
{
auto * result_ctx = (llama_sampler_dist *) result->ctx;
result_ctx->rng = ctx->rng;
}
return result;
return llama_sampler_init(
/* .iface = */ smpl->iface,
/* .ctx = */ new llama_sampler_dist {
{ctx->get_name()},
/* .seed = */ ctx->seed,
/* .seed_cur = */ ctx->seed_cur,
/* .rng = */ ctx->rng->clone(),
/* .inp_uniform = */ nullptr,
}
);
}
static void llama_sampler_dist_free(struct llama_sampler * smpl) {
@ -1154,6 +1472,30 @@ static void llama_sampler_dist_backend_apply(
ggml_set_name (sctx->inp_uniform, "uniform");
ggml_set_input(sctx->inp_uniform);
// If the RNG requires sorted input (e.g., blue noise), sort logits first
// so the CDF walk operates in probability-rank space, not arbitrary vocab order.
if (sctx->rng->requires_sorted()) {
auto ggml_sort = [ctx](struct ggml_tensor * a, struct ggml_tensor * b) {
GGML_ASSERT(ggml_nrows(a) == 1);
struct ggml_tensor * a_reshaped = ggml_reshape_2d(ctx, a, 1, a->ne[0]);
struct ggml_tensor * a_sorted = ggml_get_rows(ctx, a_reshaped, b);
return ggml_reshape_1d(ctx, a_sorted, a->ne[0]);
};
struct ggml_tensor * sorted_idx = ggml_argsort(ctx, data->logits, GGML_SORT_ORDER_DESC);
ggml_set_name(sorted_idx, "dist_sorted_idx");
data->logits = ggml_sort(data->logits, sorted_idx);
ggml_set_name(data->logits, "dist_sorted_logits");
if (data->candidates) {
data->candidates = ggml_sort(data->candidates, sorted_idx);
} else {
data->candidates = sorted_idx;
}
ggml_set_name(data->candidates, "dist_sorted_candidates");
}
struct ggml_tensor * probs = ggml_soft_max(ctx, data->logits);
ggml_set_name(probs, "dist_probs");
@ -1208,8 +1550,8 @@ static void llama_sampler_dist_backend_set_input(struct llama_sampler * smpl) {
// std::uniform_real_distribution<double> and
// std::uniform_real_distribution<float> with same rng will produce
// different sequences).
std::uniform_real_distribution<double> dist(0.0f, 1.0f);
const float rnd = dist(sctx->rng);
// nextf returns double, equivalent to std::uniform_real_distribution<double>
const float rnd = (float)sctx->rng->nextf();
ggml_backend_tensor_set(sctx->inp_uniform, &rnd, 0, sizeof(float));
}
@ -1227,20 +1569,36 @@ static struct llama_sampler_i llama_sampler_dist_i = {
/* .backend_set_input = */ llama_sampler_dist_backend_set_input,
};
struct llama_sampler * llama_sampler_init_dist(uint32_t seed) {
static std::unique_ptr<llama_dist_rng> make_dist_rng(uint32_t seed, enum llama_rng_type rng_type) {
switch (rng_type) {
case LLAMA_RNG_TYPE_LOWBIAS32: return std::make_unique<llama_dist_rng_lowbias32>(seed);
case LLAMA_RNG_TYPE_MT19937:
default: return std::make_unique<llama_dist_rng_mt19937>(seed);
}
}
struct llama_sampler * llama_sampler_init_dist_rng(uint32_t seed, bool blue_noise, enum llama_rng_type rng_type) {
auto seed_cur = get_rng_seed(seed);
auto rng = make_dist_rng(seed_cur, rng_type);
if (blue_noise) {
rng = std::make_unique<llama_dist_rng_blue>(std::move(rng));
}
return llama_sampler_init(
/* .iface = */ &llama_sampler_dist_i,
/* .ctx = */ new llama_sampler_dist {
("dist"),
{"dist"},
/* .seed = */ seed,
/* .seed_cur = */ seed_cur,
/* .rng = */ std::mt19937(seed_cur),
/* .rng = */ std::move(rng),
/* .inp_uniform = */ nullptr,
}
);
}
struct llama_sampler * llama_sampler_init_dist(uint32_t seed) {
return llama_sampler_init_dist_rng(seed, false, LLAMA_RNG_TYPE_MT19937);
}
// top-k
struct llama_sampler_top_k : public llama_sampler_backend {

View File

@ -192,6 +192,73 @@ static void test_top_n_sigma(const std::vector<float> & probs, const std::vector
tester.check();
}
static void test_dist_rng(uint32_t seed, bool blue_noise, enum llama_rng_type rng_type,
const std::vector<llama_token> & expected, const char * desc) {
const int n_vocab = 16;
const int n_samples = 32;
// fixed non-uniform distribution: token i has logit log(i+1)
std::vector<llama_token_data> data(n_vocab);
for (int i = 0; i < n_vocab; i++) {
data[i] = {i, logf((float)(i + 1)), 0.0f};
}
auto * sampler = llama_sampler_init_dist_rng(seed, blue_noise, rng_type);
std::vector<llama_token> tokens(n_samples);
for (int i = 0; i < n_samples; i++) {
std::vector<llama_token_data> cur(data);
llama_token_data_array cur_p = {cur.data(), cur.size(), -1, false};
llama_sampler_apply(sampler, &cur_p);
GGML_ASSERT(cur_p.selected >= 0 && cur_p.selected < (llama_token)n_vocab);
tokens[i] = cur_p.data[cur_p.selected].id;
}
if (expected.empty()) {
// print sequence for capture
printf("test_dist_rng %s: {", desc);
for (int i = 0; i < n_samples; i++) {
printf("%s%d", i ? ", " : "", tokens[i]);
}
printf("}\n");
} else {
// verify against known sequence
GGML_ASSERT((int)expected.size() == n_samples);
bool match = true;
for (int i = 0; i < n_samples; i++) {
if (tokens[i] != expected[i]) {
match = false;
break;
}
}
if (!match) {
printf("test_dist_rng %s: MISMATCH\n got: {", desc);
for (int i = 0; i < n_samples; i++) {
printf("%s%d", i ? ", " : "", tokens[i]);
}
printf("}\n expected: {");
for (int i = 0; i < n_samples; i++) {
printf("%s%d", i ? ", " : "", expected[i]);
}
printf("}\n");
GGML_ASSERT(false);
}
// also verify reset reproduces same sequence
llama_sampler_reset(sampler);
for (int i = 0; i < n_samples; i++) {
std::vector<llama_token_data> cur(data);
llama_token_data_array cur_p = {cur.data(), cur.size(), -1, false};
llama_sampler_apply(sampler, &cur_p);
GGML_ASSERT(cur_p.data[cur_p.selected].id == tokens[i]);
}
printf("test_dist_rng %-30s OK\n", desc);
}
llama_sampler_free(sampler);
}
static void test_sampler_queue(const size_t n_vocab, const std::string & samplers_sequence, const int top_k, const float top_p, const float min_p
) {
sampler_tester tester(n_vocab);
@ -392,6 +459,13 @@ int main(void) {
test_sampler_queue(10000, "mkp", 100, 0.8f, 0.1f);
test_sampler_queue(10000, "mpk", 100, 0.8f, 0.1f);
test_dist_rng(42, false, LLAMA_RNG_TYPE_LOWBIAS32,
{5, 12, 8, 10, 12, 11, 10, 8, 8, 10, 11, 9, 7, 6, 11, 13, 14, 15, 13, 4, 12, 14, 13, 13, 14, 12, 5, 15, 4, 13, 15, 12},
"lowbias32");
test_dist_rng(42, true, LLAMA_RNG_TYPE_LOWBIAS32,
{10, 5, 12, 8, 15, 13, 3, 10, 13, 12, 2, 15, 8, 14, 5, 11, 7, 9, 15, 11, 8, 2, 12, 14, 7, 9, 13, 10, 14, 5, 12, 15},
"lowbias32 + blue noise");
printf("OK\n");
test_perf();

View File

@ -66,6 +66,8 @@ json task_params::to_json(bool only_metrics) const {
{"n_keep", n_keep},
{"n_discard", n_discard},
{"ignore_eos", sampling.ignore_eos},
{"blue_noise", sampling.blue_noise},
{"rng_type", sampling.rng_type == LLAMA_RNG_TYPE_LOWBIAS32 ? "lowbias32" : "mt19937"},
{"stream", stream},
{"n_probs", sampling.n_probs},
{"min_keep", sampling.min_keep},
@ -124,6 +126,8 @@ json task_params::to_json(bool only_metrics) const {
{"n_keep", n_keep},
{"n_discard", n_discard},
{"ignore_eos", sampling.ignore_eos},
{"blue_noise", sampling.blue_noise},
{"rng_type", sampling.rng_type == LLAMA_RNG_TYPE_LOWBIAS32 ? "lowbias32" : "mt19937"},
{"stream", stream},
{"logit_bias", format_logit_bias(sampling.logit_bias)},
{"n_probs", sampling.n_probs},
@ -463,6 +467,15 @@ task_params server_task::params_from_json_cmpl(
}
}
params.sampling.blue_noise = json_value(data, "blue_noise", params_base.sampling.blue_noise);
{
const auto rng_source = json_value(data, "rng_type", std::string(""));
if (rng_source == "lowbias32") {
params.sampling.rng_type = LLAMA_RNG_TYPE_LOWBIAS32;
} else if (rng_source == "mt19937") {
params.sampling.rng_type = LLAMA_RNG_TYPE_MT19937;
}
}
params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos);
if (params.sampling.ignore_eos) {
params.sampling.logit_bias.insert(