llama : initial blue noise test implementation

2026-02-04 19:12:50 +00:00 · 2026-02-04 19:12:50 +00:00 · f271576d81
parent b2ee2fbc0a
commit f271576d81
6 changed files with 214 additions and 3 deletions
--- a/common/arg.cpp
+++ b/common/arg.cpp
@ -1577,6 +1577,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
            params.sampling.ignore_eos = true;
        }
    ).set_sparam());
+    add_opt(common_arg(
+        {"--blue-noise"},
+        "use blue noise RNG for sampling instead of white noise",
+        [](common_params & params) {
+            params.sampling.blue_noise = true;
+        }
+    ).set_sparam());
    add_opt(common_arg(
        {"--temp"}, "N",
        string_format("temperature (default: %.2f)", (double)params.sampling.temp),
--- a/common/common.h
+++ b/common/common.h
@ -209,6 +209,7 @@ struct common_params_sampling {
    bool    ignore_eos         = false;
    bool    no_perf            = false;  // disable performance metrics
    bool    timing_per_token   = false;
+    bool    blue_noise         = false;  // use blue noise RNG instead of white noise for dist sampler

    uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers

--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@ -313,7 +313,11 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
            samplers.push_back(llama_sampler_init_adaptive_p(params.adaptive_target, params.adaptive_decay, params.seed));
        } else {
            // default: sample from distribution
-            samplers.push_back(llama_sampler_init_dist(params.seed));
+            if (params.blue_noise) {
+                samplers.push_back(llama_sampler_init_dist_blue_noise(params.seed));
+            } else {
+                samplers.push_back(llama_sampler_init_dist(params.seed));
+            }
        }
    } else if (params.mirostat == 1) {
        samplers.push_back(llama_sampler_init_temp(params.temp));
--- a/include/llama.h
+++ b/include/llama.h
@ -1295,7 +1295,8 @@ extern "C" {
    LLAMA_API struct llama_sampler * llama_sampler_init_greedy(void);

    /// seed == LLAMA_DEFAULT_SEED to use a random seed.
-    LLAMA_API struct llama_sampler * llama_sampler_init_dist(uint32_t seed);
+    LLAMA_API struct llama_sampler * llama_sampler_init_dist           (uint32_t seed);
+    LLAMA_API struct llama_sampler * llama_sampler_init_dist_blue_noise(uint32_t seed);

    /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
    /// Setting k <= 0 makes this a noop
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@ -369,7 +369,7 @@ struct blue_noise_rng {
        const int n = (int)states.size();
        position  = 0;

-        // 5 reachable states with stationary distribution 3:3:2:1:1 (out of 10)
+        // 5 reachable states with distribution 3:3:2:1:1
        static const int8_t tbl[10][2] = {
            { 0,  0}, { 0,  0}, { 0,  0},
            {-1,  0}, {-1,  0}, {-1,  0},
@ -1340,6 +1340,197 @@ struct llama_sampler * llama_sampler_init_dist(uint32_t seed) {
    );
 }

+// dist (blue noise)
+
+struct llama_sampler_dist_blue_noise : public llama_sampler_backend {
+    const uint32_t seed;
+          uint32_t seed_cur;
+
+    blue_noise_rng bn_rng;
+
+    ggml_tensor * inp_uniform;
+};
+
+static const char * llama_sampler_dist_blue_noise_name(const struct llama_sampler * smpl) {
+    auto * sctx = (llama_sampler_dist_blue_noise *) smpl->ctx;
+    return sctx->get_name();
+}
+
+static void llama_sampler_dist_blue_noise_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
+    auto * ctx = (llama_sampler_dist_blue_noise *) smpl->ctx;
+
+    // edge cases
+    if (cur_p->size == 0) {
+        cur_p->selected = -1;
+        return;
+    }
+
+    cur_p->selected = 0;
+
+    if (cur_p->size == 1) {
+        cur_p->data[0].p = 1.0f;
+        return;
+    }
+
+    // max logit for numerical stability
+    float max_l = cur_p->data[0].logit;
+    if (!cur_p->sorted) {
+        for (size_t i = 1; i < cur_p->size; ++i) {
+            max_l = std::max(max_l, cur_p->data[i].logit);
+        }
+    }
+
+    // apply softmax to obtain the probabilities
+    double sum_cum = 0.0f;
+    for (size_t i = 0; i < cur_p->size; ++i) {
+        float p = expf(cur_p->data[i].logit - max_l);
+        cur_p->data[i].p = p;
+        sum_cum += p;
+    }
+
+    // sample using blue noise RNG
+    const double rnd = ctx->bn_rng.nextf();
+
+          double sum_run = 0.0f;
+    const double sum_tgt = sum_cum*rnd;
+
+    bool found = false;
+    for (size_t i = 0; i < cur_p->size; ++i) {
+        if (!found) {
+            sum_run += cur_p->data[i].p;
+            if (sum_run >= sum_tgt) {
+                cur_p->selected = i;
+                found = true;
+            }
+        }
+
+        // normalize probs
+        cur_p->data[i].p /= sum_cum;
+    }
+
+    assert(found);
+    if (!found) {
+        cur_p->selected = cur_p->size - 1;
+    }
+}
+
+static void llama_sampler_dist_blue_noise_reset(struct llama_sampler * smpl) {
+    auto * ctx = (llama_sampler_dist_blue_noise *) smpl->ctx;
+    ctx->seed_cur = get_rng_seed(ctx->seed);
+    ctx->bn_rng.init(16, ctx->seed_cur);
+}
+
+static struct llama_sampler * llama_sampler_dist_blue_noise_clone(const struct llama_sampler * smpl) {
+    const auto * ctx = (const llama_sampler_dist_blue_noise *) smpl->ctx;
+    auto * result = llama_sampler_init_dist_blue_noise(ctx->seed);
+
+    // copy the state
+    {
+        auto * result_ctx = (llama_sampler_dist_blue_noise *) result->ctx;
+
+        result_ctx->seed_cur = ctx->seed_cur;
+        result_ctx->bn_rng   = ctx->bn_rng;
+    }
+
+    return result;
+}
+
+static void llama_sampler_dist_blue_noise_free(struct llama_sampler * smpl) {
+    delete (llama_sampler_dist_blue_noise *) smpl->ctx;
+}
+
+static bool llama_sampler_dist_blue_noise_backend_init(
+        struct llama_sampler       * smpl,
+        ggml_backend_buffer_type_t   buft) {
+    auto * sctx = (llama_sampler_dist_blue_noise *) smpl->ctx;
+
+    const bool res = llama_sampler_backend_support(smpl, buft);
+
+    sctx->init(res);
+
+    return res;
+}
+
+static void llama_sampler_dist_blue_noise_backend_apply(
+        struct llama_sampler      * smpl,
+        struct ggml_context       * ctx,
+        struct ggml_cgraph        * gf,
+        struct llama_sampler_data * data) {
+    GGML_UNUSED(gf);
+
+    auto * sctx = (llama_sampler_dist_blue_noise *) smpl->ctx;
+
+    sctx->inp_uniform = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
+    ggml_set_name (sctx->inp_uniform, "uniform");
+    ggml_set_input(sctx->inp_uniform);
+
+    struct ggml_tensor * probs = ggml_soft_max(ctx, data->logits);
+    ggml_set_name(probs, "dist_probs");
+
+    struct ggml_tensor * cumsum = ggml_cumsum(ctx, probs);
+    ggml_set_name(cumsum, "dist_cumsum");
+
+    struct ggml_tensor * diff = ggml_sub(ctx, cumsum, sctx->inp_uniform);
+    ggml_set_name(diff, "dist_cumsum");
+
+    struct ggml_tensor * mask = ggml_step(ctx, diff);
+    ggml_set_name(mask, "dist_mask");
+
+    struct ggml_tensor * idxf = ggml_sum(ctx, mask);
+    ggml_set_name(idxf, "dist_index_f32");
+
+    struct ggml_tensor * idx = ggml_cast(ctx, ggml_scale_bias(ctx, idxf, -1.0f, mask->ne[0]), GGML_TYPE_I32);
+    ggml_set_name(idx, "dist_index_i32");
+
+    struct ggml_tensor * sampled_token = idx;
+    if (data->candidates != nullptr) {
+        struct ggml_tensor * candidates = ggml_reshape_2d(ctx, data->candidates, 1, ggml_nelements(data->candidates));
+
+        sampled_token = ggml_get_rows(ctx, candidates, idx);
+        ggml_set_name(sampled_token, "dist_sampled_token");
+    }
+
+    data->sampled = sampled_token;
+    data->probs = probs;
+}
+
+static void llama_sampler_dist_blue_noise_backend_set_input(struct llama_sampler * smpl) {
+    auto * sctx = (llama_sampler_dist_blue_noise *) smpl->ctx;
+
+    GGML_ASSERT(sctx->inp_uniform != nullptr);
+
+    const float rnd = (float)sctx->bn_rng.nextf();
+
+    ggml_backend_tensor_set(sctx->inp_uniform, &rnd, 0, sizeof(float));
+}
+
+static struct llama_sampler_i llama_sampler_dist_blue_noise_i = {
+    /* .name              = */ llama_sampler_dist_blue_noise_name,
+    /* .accept            = */ nullptr,
+    /* .apply             = */ llama_sampler_dist_blue_noise_apply,
+    /* .reset             = */ llama_sampler_dist_blue_noise_reset,
+    /* .clone             = */ llama_sampler_dist_blue_noise_clone,
+    /* .free              = */ llama_sampler_dist_blue_noise_free,
+    /* .backend_init      = */ llama_sampler_dist_blue_noise_backend_init,
+    /* .backend_accept    = */ nullptr,
+    /* .backend_apply     = */ llama_sampler_dist_blue_noise_backend_apply,
+    /* .backend_set_input = */ llama_sampler_dist_blue_noise_backend_set_input,
+};
+
+struct llama_sampler * llama_sampler_init_dist_blue_noise(uint32_t seed) {
+    auto seed_cur = get_rng_seed(seed);
+    return llama_sampler_init(
+        /* .iface = */ &llama_sampler_dist_blue_noise_i,
+        /* .ctx   = */ new llama_sampler_dist_blue_noise {
+            ("dist-blue-noise"),
+            /* .seed        = */ seed,
+            /* .seed_cur    = */ seed_cur,
+            /* .bn_rng      = */ blue_noise_rng(16, seed_cur),
+            /* .inp_uniform = */ nullptr,
+        }
+    );
+}
+
 // top-k

 struct llama_sampler_top_k : public llama_sampler_backend {
@ -3928,6 +4119,10 @@ uint32_t llama_sampler_get_seed(const struct llama_sampler * smpl) {
        return ((const llama_sampler_dist *) smpl->ctx)->seed_cur;
    }

+    if (smpl->iface == &llama_sampler_dist_blue_noise_i) {
+        return ((const llama_sampler_dist_blue_noise *) smpl->ctx)->seed_cur;
+    }
+
    if (smpl->iface == &llama_sampler_mirostat_i) {
        return ((const llama_sampler_mirostat *) smpl->ctx)->seed_cur;
    }
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@ -66,6 +66,7 @@ json task_params::to_json(bool only_metrics) const {
            {"n_keep",                    n_keep},
            {"n_discard",                 n_discard},
            {"ignore_eos",                sampling.ignore_eos},
+            {"blue_noise",                sampling.blue_noise},
            {"stream",                    stream},
            {"n_probs",                   sampling.n_probs},
            {"min_keep",                  sampling.min_keep},
@ -125,6 +126,7 @@ json task_params::to_json(bool only_metrics) const {
        {"n_keep",                    n_keep},
        {"n_discard",                 n_discard},
        {"ignore_eos",                sampling.ignore_eos},
+        {"blue_noise",                sampling.blue_noise},
        {"stream",                    stream},
        {"logit_bias",                format_logit_bias(sampling.logit_bias)},
        {"n_probs",                   sampling.n_probs},
@ -467,6 +469,7 @@ task_params server_task::params_from_json_cmpl(
            }
        }

+        params.sampling.blue_noise  = json_value(data, "blue_noise",  params_base.sampling.blue_noise);
        params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos);
        if (params.sampling.ignore_eos) {
            params.sampling.logit_bias.insert(