initial commit for branch

2025-12-10 22:13:58 -06:00 · 2025-12-10 22:13:58 -06:00 · 774cf23ee5
parent 34ce48d97a
commit 774cf23ee5
2 changed files with 143 additions and 0 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -1289,6 +1289,15 @@ extern "C" {
                          const char ** seq_breakers,
                              size_t    num_breakers);

+    /// @details power law sampler, reshapes probability distribution to target specific probability ranges
+    /// ref: https://github.com/MrJackSpade/llama.cpp
+    /// ref: [PR]
+    LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
+                                float    target,       // target probability (0.0 to 1.0)
+                                float    target_range, // adaptive target range (±range from target)
+                                int32_t  queue_size,   // rolling history window size for adaptation
+                                uint32_t seed);        // RNG seed
+
    LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
                             int32_t   n_vocab,
                             int32_t   n_logit_bias,
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@ -2313,6 +2313,140 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa
    return result;
 }

+// power-law
+// ref: https://github.com/MrJackSpade/llama.cpp/tree/master
+// ref: [PR]
+
+struct llama_sampler_power_law {
+    const float    target;
+    const float    target_range;
+    const int32_t  queue_size;
+    const uint32_t seed;
+
+    std::mt19937       rng;
+    ring_buffer<float> history;
+};
+
+static const char * llama_sampler_power_law_name(const struct llama_sampler * /*smpl*/) {
+    return "power-law";
+}
+
+static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
+    auto * ctx = (llama_sampler_power_law *) smpl->ctx;
+
+    // these don't need to be modified or exposed to the user
+    const float peak_logit_value   = 3.0f;
+    const float tail_heaviness     = 3.0f;
+
+    const float min_target = ctx->target - ctx->target_range;
+    const float max_target = ctx->target + ctx->target_range;
+
+    // compute probabilities to get the "original" values
+    llama_sampler_softmax_impl(cur_p, false);
+
+    // store original probabilities (needed for history update)
+    std::vector<float> original_probs;
+    original_probs.reserve(cur_p->size);
+    for (size_t i = 0; i < cur_p->size; ++i) {
+        original_probs.push_back(cur_p->data[i].p);
+    }
+
+    // calculate adaptive target
+    float computed_target = ctx->target;
+    if (ctx->history.size() > 0) {
+        float sum_excluding_oldest = 0.0f;
+        size_t sz = ctx->history.size();
+
+        // sum all except the oldest element
+        for (size_t i = 0; i < sz - 1; ++i) {
+            sum_excluding_oldest += ctx->history.rat(i);
+        }
+
+        float next_value = (ctx->target * ctx->queue_size) - sum_excluding_oldest;
+        computed_target = std::max(min_target, std::min(next_value, max_target));
+    }
+
+    // find closest token (for degenerate width ~ 0 case)
+    float min_distance      = FLT_MAX;
+    int   closest_token_idx = -1;
+
+    for (size_t i = 0; i < cur_p->size; ++i) {
+        float distance = std::abs(cur_p->data[i].p - computed_target);
+        if (distance < min_distance) {
+            min_distance      = distance;
+            closest_token_idx = (int) i;
+        }
+    }
+
+    // apply power law transformation
+    for (size_t i = 0; i < cur_p->size; ++i) {
+        float p = cur_p->data[i].p;
+
+        float distance            = std::abs(p - computed_target);
+        float normalized_distance = distance / 0.2f;
+        cur_p->data[i].logit      = peak_logit_value / (1.0f + std::pow(normalized_distance, tail_heaviness));
+    }
+
+    llama_sampler_softmax_impl(cur_p, false);
+
+    // sample from distribution
+    const int idx = llama_sample_dist(cur_p, ctx->rng);
+
+    // set sampled token
+    cur_p->selected = idx;
+
+    // update history with ORIGINAL probability
+    ctx->history.push_back(original_probs[idx]);
+}
+
+static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {
+    auto * ctx    = (llama_sampler_power_law *) smpl->ctx;
+    ctx->history  = ring_buffer<float>(ctx->queue_size);
+}
+
+static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_sampler * smpl) {
+    const auto * ctx  = (const llama_sampler_power_law *) smpl->ctx;
+    auto * result     = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->queue_size, ctx->seed);
+    auto * result_ctx = (llama_sampler_power_law *) result->ctx;
+
+    result_ctx->history  = ctx->history;
+
+    return result;
+}
+
+static void llama_sampler_power_law_free(struct llama_sampler * smpl) {
+    delete (llama_sampler_power_law *) smpl->ctx;
+}
+
+static struct llama_sampler_i llama_sampler_power_law_i = {
+    /* .name   = */ llama_sampler_power_law_name,
+    /* .accept = */ nullptr,
+    /* .apply  = */ llama_sampler_power_law_apply,
+    /* .reset  = */ llama_sampler_power_law_reset,
+    /* .clone  = */ llama_sampler_power_law_clone,
+    /* .free   = */ llama_sampler_power_law_free,
+};
+
+struct llama_sampler * llama_sampler_init_power_law(
+    float    target,
+    float    target_range,
+    int32_t  queue_size,
+    uint32_t seed
+) {
+    auto seed_cur = get_rng_seed(seed);
+    return llama_sampler_init(
+        /* .iface = */ &llama_sampler_power_law_i,
+        /* .ctx   = */ new llama_sampler_power_law {
+            /* .target       = */ target,
+            /* .target_range = */ target_range,
+            /* .queue_size   = */ queue_size,
+            /* .seed         = */ seed_cur,
+            /* .rng          = */ std::mt19937(seed_cur),
+            /* .history      = */ ring_buffer<float>(queue_size),
+        }
+    );
+}
+
 // logit-bias

 struct llama_sampler_logit_bias {